<a href="https://colab.research.google.com/github/etuckerman/SOCOTEC/blob/main/SOCOTEC_FINETUNE2_elliot_tuckerman.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [1]:
%%capture
#install unsloth, xformers (for flash attn) and other pckgs
!pip install "unsloth[colab-new] @ git+https://github.com/unslothai/unsloth.git"
!pip install --no-deps xformers trl peft accelerate bitsandbytes

In [2]:
from unsloth import FastLanguageModel
import pandas as pd
import torch
from datasets import Dataset
from transformers import TrainingArguments
from trl import SFTTrainer
from unsloth import is_bfloat16_supported

# Constants
MODEL_NAME = "unsloth/mistral-7b-v0.3"
MAX_SEQ_LENGTH = 2048
LOAD_IN_4BIT = True
BATCH_SIZE = 2
GRAD_ACCUMULATION = 4
LEARNING_RATE = 2e-4
TRAIN_STEPS = 100  # Optimized training steps
OUTPUT_DIR = "outputs"
SEED = 3407

# Load model and tokenizer
model, tokenizer = FastLanguageModel.from_pretrained(
    model_name=MODEL_NAME,
    max_seq_length=MAX_SEQ_LENGTH,
    dtype=None,
    load_in_4bit=LOAD_IN_4BIT
)

# Apply LoRA Adapters
model = FastLanguageModel.get_peft_model(
    model,
    r=16,
    target_modules=["q_proj", "k_proj", "v_proj", "o_proj", "gate_proj", "up_proj", "down_proj"],
    lora_alpha=16,
    lora_dropout=0.1,  # Small dropout for regularization
    bias="none",
    use_gradient_checkpointing="unsloth",
    random_state=SEED,
)


🦥 Unsloth: Will patch your computer to enable 2x faster free finetuning.
🦥 Unsloth Zoo will now patch everything to make training faster!
==((====))==  Unsloth 2024.12.4: Fast Mistral patching. Transformers:4.46.3.
   \\   /|    GPU: NVIDIA A100-SXM4-40GB. Max memory: 39.564 GB. Platform: Linux.
O^O/ \_/ \    Torch: 2.5.1+cu121. CUDA: 8.0. CUDA Toolkit: 12.1. Triton: 3.1.0
\        /    Bfloat16 = TRUE. FA [Xformers = 0.0.28.post3. FA2 = False]
 "-____-"     Free Apache license: http://github.com/unslothai/unsloth
Unsloth: Fast downloading is enabled - ignore downloading bars which are red colored!


Unsloth: Dropout = 0 is supported for fast patching. You are using dropout = 0.1.
Unsloth will patch all other layers, except LoRA matrices, causing a performance hit.
Unsloth 2024.12.4 patched 32 layers with 0 QKV layers, 0 O layers and 0 MLP layers.


In [3]:
import itertools
import csv

def generate_multi_function_examples():
    # Numbers to use in combinations
    numbers = list(range(1, 21))

    # Function combinations and their natural language templates
    multi_function_examples = [
        # Square and Add combinations
        {
            'prompt': f"What is the square of {n1} added to {n2}?",
            'function_call': f"add(square({n1}), {n2})"
        } for n1, n2 in itertools.combinations(range(1, 11), 2)
    ] + [
        # Cube and Add combinations
        {
            'prompt': f"Add the cube of {n1} to {n2}",
            'function_call': f"add(cube({n1}), {n2})"
        } for n1, n2 in itertools.combinations(range(1, 11), 2)
    ] + [
        # Add cubed results
        {
            'prompt': f"What do you get when you add the cube of {n1} and the cube of {n2}?",
            'function_call': f"add(cube({n1}), cube({n2}))"
        } for n1, n2 in itertools.combinations(range(1, 11), 2)
    ] + [
        # Add squared results
        {
            'prompt': f"Sum the squares of {n1} and {n2}",
            'function_call': f"add(square({n1}), square({n2}))"
        } for n1, n2 in itertools.combinations(range(1, 11), 2)
    ] + [
        # Complex combinations
        {
            'prompt': f"What is the cube of {n1} plus the square of {n2}?",
            'function_call': f"add(cube({n1}), square({n2}))"
        } for n1, n2 in itertools.combinations(range(1, 11), 2)
    ] + [
        # Greetings with numbers
        {
            'prompt': f"Greet {n1} friends. The first friend is Alice.",
            'function_call': f'greet("Alice")'
        } for n1 in range(1, 6)
    ] + [
        # Name-based examples
        {
            'prompt': f"Multiply {n1} by itself then add {n2}",
            'function_call': f"add(square({n1}), {n2})"
        } for n1, n2 in itertools.combinations(range(1, 11), 2)
    ]

    return multi_function_examples

def append_to_csv(filename, new_examples):
    # Read existing CSV to avoid duplicates
    existing_prompts = set()
    with open(filename, 'r', newline='') as csvfile:
        reader = csv.DictReader(csvfile)
        existing_prompts = {row['Prompt'] for row in reader}

    # Append new unique examples
    with open(filename, 'a', newline='') as csvfile:
        writer = csv.writer(csvfile)
        for example in new_examples:
            if example['prompt'] not in existing_prompts:
                writer.writerow([example['prompt'], example['function_call']])
                existing_prompts.add(example['prompt'])

# Generate and append multi-function examples
new_examples = generate_multi_function_examples()
append_to_csv('SOCOTEC_DATASET.csv', new_examples)

print(f"Added {len(new_examples)} new multi-function examples to the dataset.")
# Load and process dataset
dataset = pd.read_csv("SOCOTEC_DATASET.csv")
EOS_TOKEN = tokenizer.eos_token

# Modify your dataset to ensure proper function calls (not just the prompt)
# Assuming `dataset` is a pandas dataframe that has been pre-loaded with prompts and function calls
def format_data(examples):
    # Ensure function calls are correctly formatted
    function_call = examples["Function Call"]
    return {"text": f"Question: {examples['Prompt']} \nFunction Call: {function_call}\n"}

# Preprocess dataset (if you have a new dataset with proper function calls)
dataset = Dataset.from_pandas(dataset)  # Assuming the dataset is preloaded
dataset = dataset.map(format_data)


Added 275 new multi-function examples to the dataset.


Map:   0%|          | 0/411 [00:00<?, ? examples/s]

In [21]:
# Convert the dataset to a Pandas DataFrame
dataset_df = dataset.to_pandas()

# Display the first few rows of the dataset (default is 5 rows)
print(dataset_df.head(20))


                                       Prompt Function Call  \
0                     What's the square of 6?     square(6)   
1                   Calculate the square of 7     square(7)   
2                 Find the squared value of 5     square(5)   
3          What do you get when you square 4?     square(4)   
4                     Compute the square of 3     square(3)   
5                    Can you square 8 for me?     square(8)   
6            What is the result of 9 squared?     square(9)   
7                                    Square 2     square(2)   
8                    Tell me the square of 10    square(10)   
9             What number is 11 when squared?    square(11)   
10           Give me the squared result of 12    square(12)   
11                 Determine the square of 13    square(13)   
12                    How much is 14 squared?    square(14)   
13                  Square root of 16 squared     square(4)   
14                    I want the square of 15    square

In [4]:

# Training arguments
training_args = TrainingArguments(
    per_device_train_batch_size=BATCH_SIZE,
    gradient_accumulation_steps=GRAD_ACCUMULATION,
    warmup_steps=5,
    max_steps=TRAIN_STEPS,
    learning_rate=LEARNING_RATE,
    fp16=not is_bfloat16_supported(),
    bf16=is_bfloat16_supported(),
    logging_steps=10,
    optim="adamw_8bit",
    weight_decay=0.01,
    lr_scheduler_type="linear",
    seed=SEED,
    output_dir=OUTPUT_DIR,
    report_to="none",
)

# Initialize Trainer
trainer = SFTTrainer(
    model=model,
    tokenizer=tokenizer,
    train_dataset=dataset,
    dataset_text_field="text",
    max_seq_length=MAX_SEQ_LENGTH,
    dataset_num_proc=2,
    packing=False,
    args=training_args,
)

# Train model
trainer_stats = trainer.train()

# Print memory usage and training time
gpu_stats = torch.cuda.get_device_properties(0)
used_memory = round(torch.cuda.max_memory_reserved() / 1024 / 1024 / 1024, 3)
print(f"GPU: {gpu_stats.name}")
print(f"Peak memory usage: {used_memory} GB")
print(f"Training runtime: {trainer_stats.metrics['train_runtime']} seconds")

# Inference
FastLanguageModel.for_inference(model)
input_text = "What is 2 plus 3?"
inputs = tokenizer([input_text], return_tensors="pt").to("cuda")
outputs = model.generate(**inputs, max_new_tokens=16, use_cache=True)
result = tokenizer.batch_decode(outputs, skip_special_tokens=True)[0]
print(f"Generated Function Call: {result}")


Map (num_proc=2):   0%|          | 0/411 [00:00<?, ? examples/s]

max_steps is given, it will override any value given in num_train_epochs
==((====))==  Unsloth - 2x faster free finetuning | Num GPUs = 1
   \\   /|    Num examples = 411 | Num Epochs = 2
O^O/ \_/ \    Batch size per device = 2 | Gradient Accumulation steps = 4
\        /    Total batch size = 8 | Total steps = 100
 "-____-"     Number of trainable parameters = 41,943,040


Step,Training Loss
10,1.9382
20,0.5801
30,0.4238
40,0.3291
50,0.3567
60,0.3368
70,0.3452
80,0.299
90,0.2939
100,0.2852


GPU: NVIDIA A100-SXM4-40GB
Peak memory usage: 5.104 GB
Training runtime: 222.035 seconds
Generated Function Call: What is 2 plus 3? 
Function Call: add(2, 3)

What is


In [5]:
# Inference Adjustments: Make the model generate structured outputs (function calls)
def generate_function_call(input_text):
    FastLanguageModel.for_inference(model)
    inputs = tokenizer([input_text], return_tensors="pt").to("cuda")
    outputs = model.generate(**inputs, max_new_tokens=16, use_cache=True)
    result = tokenizer.batch_decode(outputs, skip_special_tokens=True)[0]

    # Post-process the result to make sure it’s a valid function call
    #print(f"Generated Output: {result}")
    return result

# Example inference (adjust input for testing)
input_text = "What is 2 plus 3?"
res = generate_function_call(input_text)
print(f"[{input_text}] \n [{res}] ")

input_text = "What is two squared plus 6?"
res = generate_function_call(input_text)
print(f"[{input_text}] \n [{res}] ")

input_text = "162 squared plus 1902"
res = generate_function_call(input_text)
print(f"[{input_text}] \n [{res}] ")


[What is 2 plus 3?] 
 [What is 2 plus 3? 
Function Call: add(2, 3)

What is] 
[What is two squared plus 6?] 
 [What is two squared plus 6? 
Function Call: add(square(2), 6)

] 
[162 squared plus 1902] 
 [162 squared plus 1902 
Function Call: add(square(162), 19] 


In [6]:
# Inference
FastLanguageModel.for_inference(model)
input_text = "increase 6 by 2"
inputs = tokenizer([input_text], return_tensors="pt").to("cuda")
outputs = model.generate(**inputs, max_new_tokens=16, use_cache=True)
result = tokenizer.batch_decode(outputs, skip_special_tokens=True)[0]
print(f"Generated Function Call: {result}")


Generated Function Call: increase 6 by 2 
Function Call: add(6, 2)

add 


In [7]:
# Inference
FastLanguageModel.for_inference(model)
input_text = "say wassup to pete"
inputs = tokenizer([input_text], return_tensors="pt").to("cuda")
outputs = model.generate(**inputs, max_new_tokens=16, use_cache=True)
result = tokenizer.batch_decode(outputs, skip_special_tokens=True)[0]
print(f"Generated Function Call: {result}")


Generated Function Call: say wassup to pete 
Function Call: greet("Pete")

say wassup


In [8]:
# Inference
FastLanguageModel.for_inference(model)
input_text = "increase 3 by negative 2"
inputs = tokenizer([input_text], return_tensors="pt").to("cuda")
outputs = model.generate(**inputs, max_new_tokens=16, use_cache=True)
result = tokenizer.batch_decode(outputs, skip_special_tokens=True)[0]
print(f"Generated Function Call: {result}")


Generated Function Call: increase 3 by negative 2 
Function Call: add(3, -2)

add 


In [13]:
# Inference
FastLanguageModel.for_inference(model)
input_text = "add 3 to 2 cubed"
inputs = tokenizer([input_text], return_tensors="pt").to("cuda")
outputs = model.generate(**inputs, max_new_tokens=16, use_cache=True)
result = tokenizer.batch_decode(outputs, skip_special_tokens=True)[0]
print(f"Generated Function Call: {result}")


Generated Function Call: add 3 to 2 cubed 
Function Call: cube(5)

What is the c


In [15]:
from unsloth import FastLanguageModel
from transformers import AutoTokenizer
import torch


# Inference Adjustments: Make the model generate structured outputs (function calls)
def generate_function_call(input_text):
    # Add a special instruction to ensure model only returns function call
    instruction = "Return only the function call, no additional text."

    # Prepare the input by adding instruction
    input_with_instruction = f"{instruction} {input_text}"

    # Tokenize the input
    inputs = tokenizer([input_with_instruction], return_tensors="pt").to("cuda")

    # Generate output from the model
    outputs = model.generate(**inputs, max_new_tokens=16, use_cache=True)

    # Decode the output and clean up (strip any unnecessary text after the function call)
    result = tokenizer.batch_decode(outputs, skip_special_tokens=True)[0]

    # Ensure that the result only contains the function call (strip extra text if present)
    if "(" in result:  # Ensuring a valid function call is present
        return result
    else:
        return "Invalid function call"

# Example inference (adjust input for testing)
input_texts = [
    "What is 2 plus 3?",
    "What is two squared plus 6?",
    "162 squared plus 1902",
    "increase 6 by 2",
    "say wassup to pete",
    "increase 3 by negative 2",
    "add 3 to 2 cubed"
]

# Test the model's response for each input
for input_text in input_texts:
    res = generate_function_call(input_text)
    print(f"Input: [{input_text}] \nGenerated Function Call: [{res}]")


AttributeError: 'tuple' object has no attribute 'generate'