```
accelerate==0.29.3
bitsandbytes==0.43.1
datasets==2.19.0
sentence-transformers==2.3.1
tokenizers==0.19.1
torch==2.2.1
transformers==4.40.1
trl==0.8.5
wandb==0.16.6
```

# Train

In [None]:
from transformers import AutoModelForCausalLM, AutoTokenizer
import torch

model = AutoModelForCausalLM.from_pretrained(
    "models/OpenELM-450M", 
    trust_remote_code=True,
    device_map = "auto",
    torch_dtype = torch.bfloat16,
)

tokenizer = AutoTokenizer.from_pretrained(
    "meta-llama/Llama-2-7b-hf", 
    use_fast=False) 

In [None]:
from transformers import TrainingArguments, set_seed, get_constant_schedule
from trl import SFTTrainer, setup_chat_format, DataCollatorForCompletionOnlyLM
from datasets import load_dataset
import uuid, wandb

set_seed(42)
lr = 5e-5
run_id = f"OpenELM-450M_LR-{lr}_OA_{str(uuid.uuid4())}"

model, tokenizer = setup_chat_format(model, tokenizer)
if tokenizer.pad_token in [None, tokenizer.eos_token]: 
    tokenizer.pad_token = tokenizer.unk_token

dataset = load_dataset("g-ronimo/oasst2_top4k_en")

training_arguments = TrainingArguments(
    output_dir = f"out_{run_id}",
    evaluation_strategy = "steps",
    label_names = ["labels"],
    per_device_train_batch_size = 8,
    gradient_accumulation_steps = 2,
    save_steps = 250,
    eval_steps = 250,
    logging_steps = 1, 
    learning_rate = lr,
    num_train_epochs = 3,
    lr_scheduler_type = "constant",
    optim = 'paged_adamw_8bit',
    bf16 = True,
    gradient_checkpointing = True,
    group_by_length = True,
)

trainer = SFTTrainer(
    model = model,
    tokenizer = tokenizer,
    train_dataset = dataset["train"],
    eval_dataset = dataset['test'],
    data_collator = DataCollatorForCompletionOnlyLM(
        instruction_template = "<|im_start|>user", 
        response_template = "<|im_start|>assistant", 
        tokenizer = tokenizer, 
        mlm = False),
    max_seq_length = 2048,
    dataset_kwargs = dict(add_special_tokens = False),
    args = training_arguments,
)

In [None]:
wandb.init(
    project = "OpenELM", 
    name = run_id,
).log_code(include_fn=lambda path: path.endswith(".py") or path.endswith(".ipynb"))

trainer.train()

# Inference

In [1]:
from transformers import AutoModelForCausalLM, AutoTokenizer

# change me!
model_path = "out/checkpoint-750"

model = AutoModelForCausalLM.from_pretrained(
    model_path, 
    trust_remote_code=True,
    device_map="auto"
)

tokenizer = AutoTokenizer.from_pretrained(
    model_path, 
    use_fast=False) 

def prompt(question, debug=False, max_new_tokens=500):
    messages = [
        {"role": "user", "content": question},
    ]
            
    input_tokens = tokenizer.apply_chat_template(
        messages, 
        add_generation_prompt=True,
        return_tensors="pt"
    ).to("cuda")
    output_tokens = model.generate(input_tokens, max_new_tokens=max_new_tokens)

    if debug:
        for tok in output_tokens[0]:
            print(tok, tokenizer.decode(tok))
    
    output = tokenizer.decode(output_tokens[0], skip_special_tokens=False)
    
    print(output)

Special tokens have been added in the vocabulary, make sure the associated word embeddings are fine-tuned or trained.


In [2]:
prompt("hello, who are you?")

<|im_start|> user
hello, who are you? <|im_end|> 
 <|im_start|> assistant
I am Open Assistant, an open source chatbot developed by the Open Assistant Project. I am designed to be friendly, helpful, and informative. <|im_end|>
