In [1]:
# %%capture
# !pip install -U datasets
# uv pip install torch torchvision torchaudio --index-url https://download.pytorch.org/whl/cu118



In [None]:
import torch
from transformers import AutoTokenizer, AutoModelForCausalLM
from dotenv import load_dotenv
import os
from peft import LoraConfig
from peft import TaskType
from trl import SFTConfig, SFTTrainer

load_dotenv()

from huggingface_hub import login
login(os.environ["HUGGINGFACE_TOKEN"])

print("torch version: ", torch.__version__)
print('is gpu enbled: ', torch.cuda.is_available())

In [None]:
# Load model and tokenizer
model_id = "google/gemma-2b-it"

tokenizer = AutoTokenizer.from_pretrained(model_id)
model = AutoModelForCausalLM.from_pretrained(model_id, torch_dtype=torch.float16).to('cuda')
# meta-llama/Llama-3.2-3B-Instruct

Loading checkpoint shards:   0%|          | 0/2 [00:00<?, ?it/s]

In [None]:
# for name, weights in model.named_parameters():
#     print(name)
#     print(weights)
#     break

In [None]:
# Load the dataset
from datasets import load_dataset
ds = load_dataset("Jofthomas/hermes-function-calling-thinking-V1")

In [None]:
ds['train'][0]

{'conversations': [{'content': "You are a function calling AI model. You are provided with function signatures within <tools></tools> XML tags.You may call one or more functions to assist with the user query. Don't make assumptions about what values to plug into functions.Here are the available tools:<tools> [{'type': 'function', 'function': {'name': 'get_stock_price', 'description': 'Get the current stock price of a company', 'parameters': {'type': 'object', 'properties': {'company': {'type': 'string', 'description': 'The name of the company'}}, 'required': ['company']}}}, {'type': 'function', 'function': {'name': 'get_movie_details', 'description': 'Get details about a movie', 'parameters': {'type': 'object', 'properties': {'title': {'type': 'string', 'description': 'The title of the movie'}}, 'required': ['title']}}}] </tools>Use the following pydantic model json schema for each tool call you will make: {'title': 'FunctionCall', 'type': 'object', 'properties': {'arguments': {'title'

In [None]:
# turn of kv
model.config.use_cache = False

In [None]:
model

GemmaForCausalLM(
  (model): GemmaModel(
    (embed_tokens): Embedding(256000, 2048, padding_idx=0)
    (layers): ModuleList(
      (0-17): 18 x GemmaDecoderLayer(
        (self_attn): GemmaAttention(
          (q_proj): Linear(in_features=2048, out_features=2048, bias=False)
          (k_proj): Linear(in_features=2048, out_features=256, bias=False)
          (v_proj): Linear(in_features=2048, out_features=256, bias=False)
          (o_proj): Linear(in_features=2048, out_features=2048, bias=False)
        )
        (mlp): GemmaMLP(
          (gate_proj): Linear(in_features=2048, out_features=16384, bias=False)
          (up_proj): Linear(in_features=2048, out_features=16384, bias=False)
          (down_proj): Linear(in_features=16384, out_features=2048, bias=False)
          (act_fn): GELUActivation()
        )
        (input_layernorm): GemmaRMSNorm((2048,), eps=1e-06)
        (post_attention_layernorm): GemmaRMSNorm((2048,), eps=1e-06)
      )
    )
    (norm): GemmaRMSNorm((2048,), 

In [None]:
# ds['train'][12] # sample data

In [None]:
sample_data = ds['train'][0]['conversations']

In [None]:
# we won't use bos token in here , since it's will be added when we set SFTTrainer and data get tokenized, so we set new chat template there with bos token and no other difference
chat_template_for_preprocessing = "{% if  messages[0]['role']==system%}{{ raise_exception('System message is not supported in gemma, it would be good to merget the system prompt with first user message')}}{% endif %}{% for message in messages %}{{'<start_of_turn>' + message['role'] + '\n' + message['content'] | trim + '<eos_turn><eos>\n'}}{% endfor %}{% if add_generation_prompt %}{{'<start_of_turn>model\n'}}{% endif %}"
chat_template = "{{ bos_token }}{% if  messages[0]['role']==system%}{{ raise_exception('System message is not supported in gemma, it would be good to merget the system prompt with first user message')}}{% endif %}{% for message in messages %}{{'<start_of_turn>' + message['role'] + '\n' + message['content'] | trim + '<eos_turn><eos>\n'}}{% endfor %}{% if add_generation_prompt %}{{'<start_of_turn>model\n'}}{% endif %}"


In [None]:
tokenizer.chat_template = chat_template_for_preprocessing

In [None]:
print(tokenizer.apply_chat_template(sample_data, tokenize=False))

<start_of_turn>system
You are a function calling AI model. You are provided with function signatures within <tools></tools> XML tags.You may call one or more functions to assist with the user query. Don't make assumptions about what values to plug into functions.Here are the available tools:<tools> [{'type': 'function', 'function': {'name': 'get_stock_price', 'description': 'Get the current stock price of a company', 'parameters': {'type': 'object', 'properties': {'company': {'type': 'string', 'description': 'The name of the company'}}, 'required': ['company']}}}, {'type': 'function', 'function': {'name': 'get_movie_details', 'description': 'Get details about a movie', 'parameters': {'type': 'object', 'properties': {'title': {'type': 'string', 'description': 'The title of the movie'}}, 'required': ['title']}}}] </tools>Use the following pydantic model json schema for each tool call you will make: {'title': 'FunctionCall', 'type': 'object', 'properties': {'arguments': {'title': 'Argumen

In [None]:
ds = ds.rename_column('conversations', 'messages')

In [None]:
# preprocess dataset {apply chat template}

def preprocess_dataset(row):
    messages = row['messages'] # Dict['str', List[Dict['str', 'str']]]
    
    # Check if there system message, if yes, merge the system prompt with first user input, since the gemma model does not have system prompt , it trained in this way
    if messages[0]['role'] == 'system':
        system_message = messages[0]['content']
        messages[1]['content'] = system_message + "Also, before making a call to a function take the time to plan the function to take. Make that thinking process between <think>{your thought}</think>\n\n" + messages[1]['content']
        # after merging, remove the system message from message
        messages.pop(0)
    
    return {'text': tokenizer.apply_chat_template(messages, tokenize=False)}
       

In [None]:
ds = ds.map(preprocess_dataset, remove_columns=['messages'])

In [None]:
ds = ds['train'].train_test_split(.1, seed=0)

In [None]:
ds

DatasetDict({
    train: Dataset({
        features: ['text'],
        num_rows: 3213
    })
    test: Dataset({
        features: ['text'],
        num_rows: 357
    })
})

In [None]:
from dataclasses import dataclass
from enum import Enum

class ChatToolLMSpecialTokens(Enum):
    tools = '<tools>'
    eos_tools = '</tools>'
    think = '<think>'
    eos_think = '</think>'
    tool_response = '<tool_response>'
    eos_tool_response = '</tool_response>'
    pad_token = '<pad>'
    eos_token = '<eos>'
    
    @classmethod
    def special_token_list(clss):
        return [cls_token.value for cls_token in clss] 
    
ChatToolLMSpecialTokens.special_token_list()
    

['<tools>',
 '</tools>',
 '<think>',
 '</think>',
 '<tool_response>',
 '</tool_response>',
 '<pad>',
 '<eos>']

In [None]:
list(ChatToolLMSpecialTokens)

[<ChatToolLMSpecialTokens.tools: '<tools>'>,
 <ChatToolLMSpecialTokens.eos_tools: '</tools>'>,
 <ChatToolLMSpecialTokens.think: '<think>'>,
 <ChatToolLMSpecialTokens.eos_think: '</think>'>,
 <ChatToolLMSpecialTokens.tool_response: '<tool_response>'>,
 <ChatToolLMSpecialTokens.eos_tool_response: '</tool_response>'>,
 <ChatToolLMSpecialTokens.pad_token: '<pad>'>,
 <ChatToolLMSpecialTokens.eos_token: '<eos>'>]

In [None]:
ChatToolLMSpecialTokens.pad_token.value

'<pad>'

In [None]:
# Load tokenizer again so we can add special tokens easly 
tokenizer = AutoTokenizer.from_pretrained(model_id, 
                                          pad_token=ChatToolLMSpecialTokens.pad_token.value,
                                          eos_token=ChatToolLMSpecialTokens.eos_token.value,
                                          additional_special_tokens=ChatToolLMSpecialTokens.special_token_list())

In [None]:
# set new chat template with bos token for sft auto data processing
tokenizer.chat_template = chat_template

In [None]:
peft_config = LoraConfig(r=32,
                         lora_alpha=64,
                         target_modules=['q_proj', 'v_proj', 'o_proj', 'up_proj', 'embed_tokens', 'gate_proj'], # 'gate_proj', 'up_proj', 'down_proj'],
                         task_type=TaskType.CAUSAL_LM,
                         lora_dropout=0.05)

In [None]:
training_arugment = SFTConfig(output_dir='gemma_function_calling_and_thinking',
                              per_device_train_batch_size=1,
                              per_device_eval_batch_size=1,
                              gradient_accumulation_steps=4,
                              logging_first_step=True,
                              logging_dir='runs',
                              learning_rate=1e-4,
                              max_grad_norm=1.0,
                              num_train_epochs=1,
                              warmup_ratio=0.1,
                              lr_scheduler_type='cosine',
                              eval_strategy='steps',
                              save_strategy='steps',
                              report_to='tensorboard',
                              gradient_checkpointing=True,
                              gradient_checkpointing_kwargs={"use_reentrant": False},
                              packing=False,
                              save_steps=100,
                              eval_steps=10,
                              logging_steps=10,
                              fp16=True,
                              max_seq_length=1500)

In [None]:
len(tokenizer) # Total number of tokens

256006

In [None]:
# since we added some special tokenzer , we need to add some random embedding for it
model.resize_token_embeddings(len(tokenizer))

The new embeddings will be initialized from a multivariate normal distribution that has old embeddings' mean and covariance. As described in this article: https://nlp.stanford.edu/~johnhew/vocab-expansion.html. To disable this, use `mean_resizing=False`


Embedding(256006, 2048, padding_idx=0)

In [None]:
trainer =  SFTTrainer(model=model,
                      processing_class=tokenizer,
                      peft_config=peft_config,
                      train_dataset=ds['train'],
                      eval_dataset=ds['test'],
                      args=training_arugment)



Adding EOS to train dataset:   0%|          | 0/3213 [00:00<?, ? examples/s]

Tokenizing train dataset:   0%|          | 0/3213 [00:00<?, ? examples/s]

Truncating train dataset:   0%|          | 0/3213 [00:00<?, ? examples/s]

Adding EOS to eval dataset:   0%|          | 0/357 [00:00<?, ? examples/s]

Tokenizing eval dataset:   0%|          | 0/357 [00:00<?, ? examples/s]

Truncating eval dataset:   0%|          | 0/357 [00:00<?, ? examples/s]

No label_names provided for model class `PeftModelForCausalLM`. Since `PeftModel` hides base models input arguments, if label_names is not given, label_names can't be set automatically within `Trainer`. Note that empty label_names list will be used instead.


In [None]:
trainer.train()

KeyboardInterrupt: 

## Infernce

In [None]:
dataloader = trainer.get_train_dataloader()

In [None]:
for i in dataloader:
    print(i)
    break

{'input_ids': tensor([[    2,   106, 17877,  ...,     1,   108,     1]], device='cuda:0'), 'attention_mask': tensor([[1, 1, 1,  ..., 1, 1, 1]], device='cuda:0'), 'labels': tensor([[    2,   106, 17877,  ...,     1,   108,     1]], device='cuda:0')}


In [None]:
tokenizer.decode(tokenizer.all_special_ids)

'<bos><eos><unk><pad><tools></tools><think></think><tool_response></tool_response>'

In [None]:
tokenizer.decode([1,108])

'<eos>\n'

In [None]:
print(tokenizer.batch_decode(i['input_ids'])[0])

<bos><start_of_turn>human
You are a function calling AI model. You are provided with function signatures within <tools></tools> XML tags.You may call one or more functions to assist with the user query. Don't make assumptions about what values to plug into functions.Here are the available tools:<tools> [{'type': 'function', 'function': {'name': 'get_stock_price', 'description': 'Get the current stock price', 'parameters': {'type': 'object', 'properties': {'ticker': {'type': 'string', 'description': 'The ticker symbol of the stock'}}, 'required': ['ticker']}}}, {'type': 'function', 'function': {'name': 'calculate_bmi', 'description': 'Calculate the Body Mass Index (BMI)', 'parameters': {'type': 'object', 'properties': {'weight': {'type': 'number', 'description': 'The weight of the person in kilograms'}, 'height': {'type': 'number', 'description': 'The height of the person in meters'}}, 'required': ['weight', 'height']}}}] </tools>Use the following pydantic model json schema for each too

In [None]:
inp = tokenizer.batch_decode(i['input_ids'])[0]
labels = tokenizer.batch_decode(i['labels'])[0]
inp==labels

True

In [None]:
# len(i['labels'][0])

In [None]:
# print(labels) 

In [None]:
# tokenizer.decode(i['input_ids'][0])

In [None]:
##inference

In [None]:
from peft import AutoPeftModelForCausalLM
from transformers import AutoTokenizer
import torch

In [None]:
# Inference
peft_model_id = "checkpoint-500"

model = AutoPeftModelForCausalLM.from_pretrained(peft_model_id, torch_dtype=torch.float16)
tokenizer = AutoTokenizer.from_pretrained(peft_model_id)

Loading checkpoint shards:   0%|          | 0/2 [00:00<?, ?it/s]

The new embeddings will be initialized from a multivariate normal distribution that has old embeddings' mean and covariance. As described in this article: https://nlp.stanford.edu/~johnhew/vocab-expansion.html. To disable this, use `mean_resizing=False`


In [None]:
from transformers import pipeline
import torch

In [None]:
pipe = pipeline('text-generation',
         model=model,
         tokenizer=tokenizer,
         device='cuda',
         torch_dtype=torch.float16
         )

Device set to use cuda


In [None]:
message = [{"role": "user", "content":'who are you'}]
inp = tokenizer.apply_chat_template(message, add_generation_prompt=True, tokenize=False)
print(inp)

<bos><start_of_turn>user
who are you<eos_turn><eos>
<start_of_turn>model



In [None]:
message = [{"role": "human", "content":'who are you'}]
inp = tokenizer.apply_chat_template(message, add_generation_prompt=True, tokenize=False)
print(inp)

<bos><start_of_turn>human
who are you<eos_turn><eos>
<start_of_turn>model



In [None]:
out = pipe(message)[0]['generated_text'][-1]['content']
print(out)

In [None]:
inp = tokenizer.apply_chat_template(message, add_generation_prompt=True, tokenize=True, return_tensors='pt').to('cuda')

In [None]:
inp

tensor([[     2,    106,   1645,    108,  10569,    708,    692, 235322,  96396,
         235298,  15508, 235313,      1,    108,    106,   2516,    108]],
       device='cuda:0')

In [None]:
out = model.generate(inp.to('cuda'))[0]

In [None]:
print(tokenizer.decode(out))

TypeError: argument 'ids': 'list' object cannot be interpreted as an integer

In [None]:
inp = ds['test'][0]['messages']
inp = torch.tensor(inp, device='cuda').unsqueeze(0)

In [None]:
out = model.generate(inp, max_new_tokens=100)

In [None]:
input = """<bos><start_of_turn>human
You are a function calling AI model. You are provided with function signatures within <tools></tools> XML tags.You may call one or more functions to assist with the user query. Don't make assumptions about what values to plug into functions.Here are the available tools:<tools> [{'type': 'function', 'function': {'name': 'search_restaurants', 'description': 'Search for restaurants in a specific location', 'parameters': {'type': 'object', 'properties': {'location': {'type': 'string', 'description': 'The location to search for restaurants'}, 'cuisine': {'type': 'string', 'description': 'The cuisine type to filter the restaurants'}, 'price_range': {'type': 'integer', 'description': 'The price range of the restaurants (1-4)'}}, 'required': ['location']}}}, {'type': 'function', 'function': {'name': 'generate_random_password', 'description': 'Generate a random password', 'parameters': {'type': 'object', 'properties': {'length': {'type': 'integer', 'description': 'The length of the password'}}, 'required': ['length']}}}] </tools>Use the following pydantic model json schema for each tool call you will make: {'title': 'FunctionCall', 'type': 'object', 'properties': {'arguments': {'title': 'Arguments', 'type': 'object'}, 'name': {'title': 'Name', 'type': 'string'}}, 'required': ['arguments', 'name']}For each function call return a json object with function name and arguments within <tool_call></tool_call> XML tags as follows:
<tool_call>
{tool_call}
</tool_call>Also, before making a call to a function take the time to plan the function to take. Make that thinking process between <think>{your thought}</think>

I'm in New York and I'm craving Italian food. Can you find a restaurant for me?<eos_turn><eos>
<start_of_turn>model
<think>Okay, so
"""

In [None]:
inp = tokenizer.encode(input, return_tensors='pt',add_special_tokens=False).to('cuda')

In [None]:
print(tokenizer.decode(inp[0]))

<bos><start_of_turn>human
You are a function calling AI model. You are provided with function signatures within <tools></tools> XML tags.You may call one or more functions to assist with the user query. Don't make assumptions about what values to plug into functions.Here are the available tools:<tools> [{'type': 'function', 'function': {'name': 'search_restaurants', 'description': 'Search for restaurants in a specific location', 'parameters': {'type': 'object', 'properties': {'location': {'type': 'string', 'description': 'The location to search for restaurants'}, 'cuisine': {'type': 'string', 'description': 'The cuisine type to filter the restaurants'}, 'price_range': {'type': 'integer', 'description': 'The price range of the restaurants (1-4)'}}, 'required': ['location']}}}, {'type': 'function', 'function': {'name': 'generate_random_password', 'description': 'Generate a random password', 'parameters': {'type': 'object', 'properties': {'length': {'type': 'integer', 'description': 'The 

In [None]:
out = model.generate(inp, max_new_tokens=200)

In [None]:
print(tokenizer.decode(out[0]))

<bos><start_of_turn>human
You are a function calling AI model. You are provided with function signatures within <tools></tools> XML tags.You may call one or more functions to assist with the user query. Don't make assumptions about what values to plug into functions.Here are the available tools:<tools> [{'type': 'function', 'function': {'name': 'search_restaurants', 'description': 'Search for restaurants in a specific location', 'parameters': {'type': 'object', 'properties': {'location': {'type': 'string', 'description': 'The location to search for restaurants'}, 'cuisine': {'type': 'string', 'description': 'The cuisine type to filter the restaurants'}, 'price_range': {'type': 'integer', 'description': 'The price range of the restaurants (1-4)'}}, 'required': ['location']}}}, {'type': 'function', 'function': {'name': 'generate_random_password', 'description': 'Generate a random password', 'parameters': {'type': 'object', 'properties': {'length': {'type': 'integer', 'description': 'The 

In [None]:
print(tokenizer.decode(ds['train'][0]['messages']))

<bos><start_of_turn>human
You are a function calling AI model. You are provided with function signatures within <tools></tools> XML tags.You may call one or more functions to assist with the user query. Don't make assumptions about what values to plug into functions.Here are the available tools:<tools> [{'type': 'function', 'function': {'name': 'search_restaurants', 'description': 'Search for restaurants in a specific location', 'parameters': {'type': 'object', 'properties': {'location': {'type': 'string', 'description': 'The location to search for restaurants'}, 'cuisine': {'type': 'string', 'description': 'The cuisine type to filter the restaurants'}, 'price_range': {'type': 'integer', 'description': 'The price range of the restaurants (1-4)'}}, 'required': ['location']}}}, {'type': 'function', 'function': {'name': 'generate_random_password', 'description': 'Generate a random password', 'parameters': {'type': 'object', 'properties': {'length': {'type': 'integer', 'description': 'The 