In [1]:
from models.emoe import ElasticMoELlamaForCausalLM
from models.emoe_config import ElasticMoELlamaConfig
import torch

from transformers import AutoTokenizer, AutoModelForCausalLM
from transformers.models.auto.configuration_auto import AutoConfig

from transformers import pipeline

### Load TinyLlama model to grab weights.

In [2]:
tinyllama_cfg = AutoConfig.from_pretrained(
            "TinyLlama/TinyLlama-1.1B-Chat-v1.0", _from_pipeline="text-generation", code_revision=None, revision=None, token=None, trust_remote_code=None, _commit_hash='77e23968eed12d195bd46c519aa679cc22a27ddc'
        )

# Replace 'model_name' with the actual model you're using, e.g., 'bert-base-uncased'
tinyllama_model = AutoModelForCausalLM.from_pretrained('TinyLlama/TinyLlama-1.1B-Chat-v1.0', )


### Get test output

The output of this cell is there to test we can get the same outputs with our new model, i.e. to ensure everything is loaded correctly.

In [3]:
pipe = pipeline(task = "text-generation", model="TinyLlama/TinyLlama-1.1B-Chat-v1.0", torch_dtype=torch.bfloat16, device_map="auto")
messages = [
    {
        "role": "system",
        "content": "You are a friendly chatbot who always responds in the style of a pirate",
    },
    {"role": "user", "content": "How many helicopters can a human eat in one sitting?"},
]
prompt = pipe.tokenizer.apply_chat_template(messages, tokenize=False, add_generation_prompt=True)
outputs = pipe(prompt, max_new_tokens=256)
print(outputs[0]["generated_text"])

<|system|>
You are a friendly chatbot who always responds in the style of a pirate</s>
<|user|>
How many helicopters can a human eat in one sitting?</s>
<|assistant|>
There is no definitive answer to this question as the number of helicopters that a human can eat in one sitting depends on various factors such as the size of the helicopter, the type of food, and the individual's appetite. However, some estimates suggest that a human can consume up to 10-15 helicopters in one sitting. This is based on the fact that a helicopter can carry a large amount of food and water, and the human body can process and digest large quantities of food quickly. However, it's always best to consult with a healthcare professional before consuming large quantities of food or drink in one sitting.


### Construct Elastic MOE model and pull in weights from TinyLlama

In [4]:
cfg = ElasticMoELlamaConfig(**tinyllama_cfg.to_dict())
emoe_model = ElasticMoELlamaForCausalLM(config=cfg)
emoe_model.load_state_dict(tinyllama_model.state_dict())
emoe_model = emoe_model.cuda()

### Manual Inference

Do greedy decoding without pipeline. Check against the output above.

In [5]:
sentences = ["""<|system|>
You are a friendly chatbot who always responds in the style of a pirate</s>
<|user|>
How many helicopters can a human eat in one sitting?</s>
<|assistant|>""",]
tokenizer = AutoTokenizer.from_pretrained("TinyLlama/TinyLlama-1.1B-Chat-v1.0")

inputs = tokenizer(sentences, padding=True, truncation=True, return_tensors="pt").to("cuda")


# Greedy decoding
with torch.inference_mode():
    outputs = emoe_model.generate(inputs["input_ids"], max_length=1000, pad_token_id=tokenizer.pad_token_id, eos_token_id=tokenizer.eos_token_id, early_stopping=True)

decoded_responses = [tokenizer.decode(output, skip_special_tokens=True) for output in outputs]

for response in decoded_responses:
    print(response)



<|system|>
You are a friendly chatbot who always responds in the style of a pirate 
<|user|>
How many helicopters can a human eat in one sitting? 
<|assistant|>
There is no definitive answer to this question as the number of helicopters that a human can eat in one sitting depends on various factors such as the size of the helicopter, the type of food, and the individual's appetite. However, some estimates suggest that a human can consume up to 10-15 helicopters in one sitting. This is based on the fact that a helicopter can carry a large amount of food and water, and the human body can process and digest large amounts of food quickly. However, it is always recommended to consult with a healthcare professional before consuming large amounts of food or drink in one sitting.


## Pipeline Based Inference

Can also add our model to a pipeline like so:

In [6]:
pipe2 = pipeline(task = "text-generation", model=emoe_model, tokenizer=tokenizer, config=cfg, torch_dtype=torch.bfloat16, device_map="auto")
messages = [
    {
        "role": "system",
        "content": "You are a friendly chatbot who always responds in the style of a pirate",
    },
    {"role": "user", "content": "How many helicopters can a human eat in one sitting?"},
]
prompt = pipe2.tokenizer.apply_chat_template(messages, tokenize=False, add_generation_prompt=True)
outputs = pipe2(prompt, max_new_tokens=256)
print(outputs[0]["generated_text"])

The model 'ElasticMoELlamaForCausalLM' is not supported for text-generation. Supported models are ['BartForCausalLM', 'BertLMHeadModel', 'BertGenerationDecoder', 'BigBirdForCausalLM', 'BigBirdPegasusForCausalLM', 'BioGptForCausalLM', 'BlenderbotForCausalLM', 'BlenderbotSmallForCausalLM', 'BloomForCausalLM', 'CamembertForCausalLM', 'LlamaForCausalLM', 'CodeGenForCausalLM', 'CpmAntForCausalLM', 'CTRLLMHeadModel', 'Data2VecTextForCausalLM', 'ElectraForCausalLM', 'ErnieForCausalLM', 'FalconForCausalLM', 'FuyuForCausalLM', 'GemmaForCausalLM', 'GitForCausalLM', 'GPT2LMHeadModel', 'GPT2LMHeadModel', 'GPTBigCodeForCausalLM', 'GPTNeoForCausalLM', 'GPTNeoXForCausalLM', 'GPTNeoXJapaneseForCausalLM', 'GPTJForCausalLM', 'LlamaForCausalLM', 'MarianForCausalLM', 'MBartForCausalLM', 'MegaForCausalLM', 'MegatronBertForCausalLM', 'MistralForCausalLM', 'MixtralForCausalLM', 'MptForCausalLM', 'MusicgenForCausalLM', 'MvpForCausalLM', 'OpenLlamaForCausalLM', 'OpenAIGPTLMHeadModel', 'OPTForCausalLM', 'Pegasu

<|system|>
You are a friendly chatbot who always responds in the style of a pirate</s>
<|user|>
How many helicopters can a human eat in one sitting?</s>
<|assistant|>
There is no definitive answer to this question as the number of helicopters that a human can eat in one sitting depends on various factors such as the size of the helicopter, the type of food, and the individual's appetite. However, some estimates suggest that a human can consume up to 10-15 helicopters in one sitting. This is based on the fact that a helicopter can carry a large amount of food and water, and the human's appetite can be quite large. However, it's always best to consult with a medical professional or a nutritionist before attempting to consume a large number of helicopters in one sitting.
