In [None]:
import numpy as np
import torch
import os
from dotenv import load_dotenv

from datasets import Dataset, load_metric

from transformers import (
    AutoModelForCausalLM,
    AutoTokenizer,
    BitsAndBytesConfig,
    TrainingArguments,
    pipeline, 
    logging,
)

from peft import (
    LoraConfig,
    PeftModel,
    prepare_model_for_kbit_training,
    get_peft_model,
)

from trl import SFTTrainer, SFTConfig

In [None]:
base_model_path = "microsoft/phi-2"

new_model_name = "phi-2-function-calling"
new_model_path = f"./{new_model_name}"

train_model_name = f"{new_model_name}-train"
train_model_path = f"./{train_model_name}"

adapter_name = f"{new_model_name}-adapter"
adapter_path = f"./{new_model_name}-adapter"

In [None]:
load_dotenv()

hf_token = os.getenv("HUGGINGFACE_TOKEN")

!huggingface-cli login --token $hf_token

In [None]:
messages = np.load('./data/messages.npy', allow_pickle=True)

data = {
    "text": messages #[:100]
}

dataset = Dataset.from_dict(data)


In [None]:
# split into training (90%) and test set (10%)  
dataset = dataset.train_test_split(test_size=0.1)

dataset

In [None]:


# Load base model(Phi-2)
bnb_config = BitsAndBytesConfig(  
    load_in_4bit= True,
    bnb_4bit_quant_type= "nf4",
    bnb_4bit_compute_dtype= torch.bfloat16,
    bnb_4bit_use_double_quant= False,
)

model = AutoModelForCausalLM.from_pretrained(
    base_model_path ,
    quantization_config=bnb_config,
    device_map="auto",
    trust_remote_code=True,
    torch_dtype=torch.bfloat16,
)

model.config.use_cache = False
model.config.pretraining_tp = 1

# Load tokenizer
tokenizer = AutoTokenizer.from_pretrained(base_model_path, trust_remote_code=True, use_fast=False)

# new tokens
new_tokens =["<|im_start|>", "<|pad|>"]
# add the tokens to the tokenizer vocabulary
tokenizer.add_tokens(list(new_tokens))
tokenizer.pad_token = "<|pad|>"
tokenizer.add_special_tokens(dict(eos_token="<|im_end|>"))

model.config.eos_token_id = tokenizer.eos_token_id



In [None]:
tokenizer.special_tokens_map

In [None]:
IGNORE_INDEX = -100

def tokenize(input):
    max_length = 1024 
    input_ids, attention_mask, labels = [], [], [] 
    message = [input['text']['system'],
               input['text']['user'],
               input['text']['assistant']]
   
    for i, msg in enumerate(message):
        msg_tokenized = tokenizer(  
          msg,   
          truncation=False,   
          add_special_tokens=False)  
  
        # Copy tokens and attention mask without changes  
        input_ids += msg_tokenized["input_ids"]  
        attention_mask += msg_tokenized["attention_mask"]
        
        # Adapt labels for loss calculation: if system or user ->IGNORE_INDEX, 
        # if assistant->input_ids  (calculate loss only for assistant messages)      
        if i == 2:
            labels += msg_tokenized["input_ids"]  
        else:
            labels += [IGNORE_INDEX]*len(msg_tokenized["input_ids"]) 
    
    # truncate to max. length  
    return {  
        "input_ids": input_ids[:max_length],   
        "attention_mask": attention_mask[:max_length],  
        "labels": labels[:max_length],  
    }  

        
dataset_tokenized = dataset.map(tokenize,   
            batched = False,  
            num_proc = os.cpu_count(),    # multithreaded  
            remove_columns = dataset["train"].column_names  # Remove original columns, no longer needed  
)


In [None]:
model = prepare_model_for_kbit_training(model, use_gradient_checkpointing=True)

lora_config = LoraConfig(
    r=32,
    lora_alpha=32,
    target_modules=[
        'q_proj',
        'k_proj',
        'v_proj',
        'dense',
    ],
    modules_to_save = ["lm_head", "embed_tokens"],
    lora_dropout=0.1,
    bias="none",
    task_type="CAUSAL_LM",
    
)

model = get_peft_model(model, lora_config)

model.config.use_cache = False

In [None]:

def collate(elements):
    tokens=[e["input_ids"] for e in elements]
    tokens_maxlen=max([len(t) for t in tokens])

    for i,sample in enumerate(elements):
        input_ids=sample["input_ids"]
        labels=sample["labels"]
        attention_mask=sample["attention_mask"]

        pad_len=tokens_maxlen-len(input_ids)

        input_ids.extend( pad_len * [tokenizer.pad_token_id] )   
        labels.extend( pad_len * [IGNORE_INDEX] )    
        attention_mask.extend( pad_len * [0] ) 

    batch={
        "input_ids": torch.tensor( [e["input_ids"] for e in elements] ),
        "labels": torch.tensor( [e["labels"] for e in elements] ),
        "attention_mask": torch.tensor( [e["attention_mask"] for e in elements] ),
    }

    return batch

In [None]:
train_batch_size=2
eval_batch_size=16
ga_steps=16  # gradient acc. steps  
steps_per_epoch=len(dataset_tokenized["train"])//(train_batch_size*ga_steps)  
epochs=1
lr=2e-4 #0.00002  

training_arguments =  SFTConfig(
    output_dir=train_model_path,
    num_train_epochs=epochs,
    per_device_train_batch_size=train_batch_size,
    per_device_eval_batch_size=eval_batch_size,
    gradient_accumulation_steps=ga_steps,
    optim="paged_adamw_32bit", 
    save_strategy="epoch",
    logging_steps=50, 
    logging_strategy="steps",
    learning_rate=lr,
    fp16=False,
    bf16=False,
    group_by_length=True,
    disable_tqdm=False,
    max_seq_length= 2048,
    dataset_text_field="text",
    packing= False,
    # report_to="tensorboard",  
    report_to="wandb",
    run_name=train_model_name,

    eval_steps=steps_per_epoch//5,      # eval 5 times per epoch  
    save_steps=steps_per_epoch,         # save once per epoch  
    lr_scheduler_type="constant",  
)

In [None]:
trainer = SFTTrainer(
    model=model,
    train_dataset=dataset_tokenized["train"],  
    eval_dataset=dataset_tokenized["test"],
    peft_config=lora_config,
    tokenizer=tokenizer,
    args=training_arguments,
    data_collator=collate,  
)

In [None]:
# log to a wandb project
import wandb

run = wandb.init(
    project="kensei-phi2",
    name="testrun-4.8",
)

In [None]:
trainer.train()

In [None]:
# After training, the trainer will have logged information including evaluation metrics
metrics = trainer.state.log_history

# Filter out the evaluation metrics
eval_metrics = [log for log in metrics if 'eval_loss' in log]

# Print evaluation metrics
for i, metric in enumerate(eval_metrics):
    print(f"Evaluation {i+1}:")
    print(metric)

In [None]:
# Perform evaluation after training
eval_metrics = trainer.evaluate()

# Print out the evaluation metrics
print(eval_metrics)

In [None]:
wandb.finish()

In [None]:
# Save the adapter and push to hub
trainer.model.save_pretrained(adapter_path, token=True)
trainer.model.push_to_hub(f"DataKensei/{adapter_name}", token=True, safe_serialization=True)

In [None]:
from transformers import GenerationConfig

# Merge the model with adapter
new_model = trainer.model.merge_and_unload()

# Save merged model and push to hub

# Model
new_model.save_pretrained(new_model_path, token=True)
# Tokenizer
tokenizer.save_pretrained(new_model_path)
# Generation configuration
generation_config = GenerationConfig(
    max_new_tokens=100, 
    temperature=0.7,
    top_p=0.1,
    top_k=40,
    repetition_penalty=1.18,
    do_sample=True,
    pad_token_id=tokenizer.pad_token_id,
    eos_token_id=tokenizer.eos_token_id,
)
generation_config.save_pretrained(new_model_path)

# Upload
new_model.push_to_hub(f"DataKensei/{new_model_name}", token=True, safe_serialization=True)
tokenizer.push_to_hub(f"DataKensei/{new_model_name}")

In [None]:
logging.set_verbosity(logging.CRITICAL)

prompt = '''<|im_start|system
You are a helpful assistant with access to the following functions. Use these functions when they are relevant to assist with a user's request
[
	{
		"name": "calculate_retirement_savings",
		"description": "Project the savings at retirement based on current contributions.",
		"parameters": {
			"type": "object",
			"properties": {
				"current_age": {
					"type": "integer",
					"description": "The current age of the individual."
				},
				"retirement_age": {
					"type": "integer",
					"description": "The desired retirement age."
				},
				"current_savings": {
					"type": "number",
					"description": "The current amount of savings."
				},
				"monthly_contribution": {
					"type": "number",
					"description": "The monthly contribution towards retirement savings."
				}
			},
			"required": ["current_age", "retirement_age", "current_savings", "monthly_contribution"]
		}
	}
]<|im_end|>
<|im_start|user
I am currently 40 years old and plan to retire at 65. I have no savings at the moment, but I intend to save $500 every month. Could you project the savings at retirement based on current contributions?<|im_end|>
'''
pipe = pipeline(task="text-generation", model=new_model, tokenizer=tokenizer, max_length=500)
result = pipe(prompt)
print(result[0]['generated_text'])

In [None]:
logging.set_verbosity(logging.CRITICAL)

prompt = '''<|im_start|>system
You are a helpful assistant with access to the following functions. Use these functions when they are relevant to assist with a user's request
[{
    "name": "schedule_meeting",
            "description": "Schedule a meeting on the user's calendar.",
    "parameters": {
            "type": "object",
        "properties": {
            "date": {
                "type": "string",
                "description": "The date of the meeting."
            },
            "time": {
                "type": "string",
                "description": "The time of the meeting."
            },
            "participants": {
                "type": "array",
                "description": "The email addresses of the participants."
            },
            "duration": {
                "type": "string",
                "description": "The duration of the meeting."
            },
        },
        "required": ['date', 'time', 'participants']
    }
}]<|im_end|>
<|im_start|>user
Can you arrange a meeting on 2024/10/01 at 13:00 with ['Erin Mendez', 'Craig Shields', 'Jennifer Mclaughlin', 'Jason Curry', 'Heidi Coleman', 'Patricia Booth', 'Laura Willis', 'Sarah Cruz', 'Matthew Hale', 'Kirk Reynolds']?<|im_end|>
'''
pipe = pipeline(task="text-generation", model=new_model, tokenizer=tokenizer, max_length=500)
result = pipe(prompt)
print(result[0]['generated_text'])