In [58]:
import os
import json
from pathlib import Path
from pprint import pprint
from datetime import datetime

import torch
import torch.nn as nn

from datasets import load_dataset, load_from_disk

import transformers
from transformers import (
    AutoConfig,
    AutoModelForCausalLM,
    AutoTokenizer,
    BitsAndBytesConfig,
)

from peft import (
    LoraConfig,
    PeftConfig,
    PeftModel,
    get_peft_model,
    prepare_model_for_kbit_training,
)

from huggingface_hub import notebook_login

In [28]:
notebook_login()

VBox(children=(HTML(value='<center> <img\nsrc=https://huggingface.co/front/assets/huggingface_logo-noborder.sv…

# Set running device


In [151]:
device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")
device

device(type='cuda', index=0)

# Load model & Tokenizer


In [152]:
MODEL_NAME = "mistralai/Mistral-7B-Instruct-v0.2"
weights_dir = "./weights"

bnb_config = BitsAndBytesConfig(
    load_in_4bit=True,
    bnb_4bit_use_double_quant=True,
    bnb_4bit_quant_type="nf4",
    bnb_4bit_compute_dtype=torch.bfloat16,
)

model = AutoModelForCausalLM.from_pretrained(
    MODEL_NAME,
    device_map=device,
    trust_remote_code=True,
    quantization_config=bnb_config,
    cache_dir=weights_dir,
)

tokenizer = AutoTokenizer.from_pretrained(
    MODEL_NAME, device=device, cache_dir=weights_dir
)
tokenizer.pad_token = tokenizer.eos_token

Loading checkpoint shards:   0%|          | 0/3 [00:00<?, ?it/s]

In [153]:
def print_trainable_parameters(model) -> None:
    """Prints the number of trainable parameters in the model."""
    trainable_params = 0
    all_param = 0
    for _, param in model.named_parameters():
        all_param += param.numel()
        if param.requires_grad:
            trainable_params += param.numel()

    print(
        f"Trainable params num: {trainable_params:,} perzentage: {100 * trainable_params / all_param} || All params: {all_param:,}"
    )

In [154]:
print_trainable_parameters(model)

Trainable params num: 262,410,240 perzentage: 6.993743675173274 || All params: 3,752,071,168


In [155]:
model.gradient_checkpointing_enable()
model = prepare_model_for_kbit_training(model)

In [156]:
config = LoraConfig(
    r=16,
    lora_alpha=32,
    target_modules=[
        "q_proj",
        "k_proj",
        "v_proj",
        "o_proj",
        "gate_proj",
        "up_proj",
        "down_proj",
        "lm_head",
    ],
    lora_dropout=0.05,
    bias="none",
    task_type="CAUSAL_LM",
)

model = get_peft_model(model, config)
print_trainable_parameters(model)

Trainable params num: 42,520,576 perzentage: 1.1205573318192514 || All params: 3,794,591,744


# Test original model


In [157]:
prompt_template_large = """
<s>[INST]
# TASK DESCRIPTION
Translate user queries about industrial robotic operations into JSON outputs for specific function calls.

# JSON PARAMETER VALUES
- functions: Array of function calls from the USER QUERY, identified by keywords like "and", "then".
- function_name: The required robot function, one of: move_tcp, move_joint, get_joint_values.
- input_name: Name of the input parameter for the function.
- input_value: Value for the function input parameter.
- inputs: Array of input_name, input_value pairs, completing the function call.

# FUNCTION MEANING
- move_tcp: Moves robot's tool center point. Inputs: x, y, z (position in mm), q1, q2, q3, q4 (quaternion values for orientation).
- move_joint: Rotates/moves a robot joint. Inputs: joint (joint index), angle (rotation in radians).
- get_joint_values: Retrieves current robot joint status. No input parameters.

# RESPONSE FORMAT
- Include only necessary functions from the query.
- Maintain the order of functions as in the query.
- Note any uncertainties in case of ambiguous queries.

# ADDITIONAL GUIDANCE
- Focus on verbs and technical terms to determine the function.
- Indicate if a query falls outside the available functions.
- Consider practical aspects of robotic operations.
- Treat "base" as the first robot "joint".

# JSON FORMAT
{{
    "functions": [
        {{
            "function_name": "<name>",
            "inputs": [
                {{"name": "<name>", "value": "<value>", "unit": "<unit>"}},
                {{"name": "<name>", "value": "<value>", "unit": "<unit>"}}
            ]
        }},
        {{
            "function_name": "<name>",
            "inputs": [{{"name": "<name>", "value": "<value>", "unit": "<unit>"}}]
        }}
    ]
}}

USER QUERY: {user_query}
[/INST]
""".strip()

In [158]:
prompt_template_small = """
<s>[INST]
# TASK DESCRIPTION
Translate user queries about industrial robotic operations into JSON outputs for specific function calls.

# JSON PARAMETER VALUES
- functions: Array of function calls from the USER QUERY, identified by keywords like "and", "then".
- function_name: The required robot function, one of: move_tcp, move_joint, get_joint_values.
- input_name: Name of the input parameter for the function.
- input_value: Value for the function input parameter.
- inputs: Array of input_name, input_value pairs, completing the function call.

# FUNCTION MEANING
- move_tcp: Moves robot's tool center point. Inputs: x, y, z (position in mm), q1, q2, q3, q4 (quaternion values for orientation).
- move_joint: Rotates/moves a robot joint. Inputs: joint (joint index), angle (rotation in radians).
- get_joint_values: Retrieves current robot joint status. No input parameters.

# JSON FORMAT
{{
    "functions": [
        {{
            "function_name": "<name>",
            "inputs": [
                {{"name": "<name>", "value": "<value>", "unit": "<unit>"}},
                {{"name": "<name>", "value": "<value>", "unit": "<unit>"}}
            ]
        }},
        {{
            "function_name": "<name>",
            "inputs": [{{"name": "<name>", "value": "<value>", "unit": "<unit>"}}]
        }}
    ]
}}

USER QUERY: {user_query}
[/INST]
""".strip()

In [159]:
generation_config = model.generation_config
generation_config.max_new_tokens = 200
generation_config.temperature = 0.7
generation_config.top_p = 0.7
generation_config.num_return_sequences = 1
generation_config.pad_token_id = tokenizer.eos_token_id
generation_config.eos_token_id = tokenizer.eos_token_id

In [160]:
user_query = "Obtain the status of the robot's third joint"

In [77]:
%%time
device = "cuda:0"

encoding = tokenizer(
    prompt_template_small.format(user_query=user_query), return_tensors="pt"
).to(device)
with torch.inference_mode():
    outputs = model.generate(
        input_ids=encoding.input_ids,
        attention_mask=encoding.attention_mask,
        generation_config=generation_config,
    )

print(tokenizer.decode(outputs[0], skip_special_tokens=True))

[INST]
# TASK DESCRIPTION
Translate user queries about industrial robotic operations into JSON outputs for specific function calls.

# JSON PARAMETER VALUES
- functions: Array of function calls from the USER QUERY, identified by keywords like "and", "then".
- function_name: The required robot function, one of: move_tcp, move_joint, get_joint_values.
- input_name: Name of the input parameter for the function.
- input_value: Value for the function input parameter.
- inputs: Array of input_name, input_value pairs, completing the function call.

# FUNCTION MEANING
- move_tcp: Moves robot's tool center point. Inputs: x, y, z (position in mm), q1, q2, q3, q4 (quaternion values for orientation).
- move_joint: Rotates/moves a robot joint. Inputs: joint (joint index), angle (rotation in radians).
- get_joint_values: Retrieves current robot joint status. No input parameters.

# JSON FORMAT
{
    "functions": [
        {
            "function_name": "<name>",
            "inputs": [
             

# Prepare data


In [161]:
dataset_path = Path("../DATA/dataset_2024-Jan-30_23-29-10/")
data_files = {"train": "train.csv", "test": "test.csv"}

In [170]:
dataset = load_from_disk(dataset_path)

In [173]:
test_dataset = dataset["test"]

In [164]:
dataset["train"][0]

{'data': {'function_calling': '{{"functions": [{{"function_name": "", "inputs": []}}]}}',
  'user_query': 'Here are some keywords about a restaurant:\n\nname = The Cambridge Blue, eatType = pub, food = French, priceRange = £20-25, near = Café Brazil. Write a sentence that describes the following attributes of a restaurant.'}}

In [165]:
def generate_prompt(data_point):
    return f"""<s>[INST] 
        # TASK DESCRIPTION: 
        Translate user queries about industrial robotic operations into JSON outputs for specific function calls. 
        # USER QUERY: 
        {data_point['data']['user_query']} 
        # RESPONSE: [/INST]
        {data_point['data']['function_calling']}
    """.strip()


def generate_and_tokenize_prompt(data_point):
    full_prompt = generate_prompt(data_point)
    max_length = 512
    tokenized_full_prompt = tokenizer(
        full_prompt,
        padding="max_length",
        truncation=True,
        max_length=max_length,
    )
    return tokenized_full_prompt

In [166]:
dataset = dataset["train"].shuffle().map(generate_and_tokenize_prompt)

Map:   0%|          | 0/1064 [00:00<?, ? examples/s]

In [167]:
tokenizer.eos_token

'</s>'

# Fine-Tuning


In [168]:
current_time = datetime.now().strftime("%Y-%b-%d_%H-%M-%S")
output_dir = f"./experiments/{current_time}"

training_args = transformers.TrainingArguments(
    per_device_train_batch_size=1,
    gradient_accumulation_steps=4,
    num_train_epochs=1,
    learning_rate=2e-4,
    fp16=True,
    save_total_limit=3,
    logging_steps=1,
    output_dir=output_dir,
    optim="paged_adamw_8bit",
    lr_scheduler_type="cosine",
    warmup_ratio=0.05,
)

trainer = transformers.Trainer(
    model=model,
    train_dataset=dataset,
    args=training_args,
    data_collator=transformers.DataCollatorForLanguageModeling(tokenizer, mlm=False),
)
model.config.use_cache = False
trainer.train()

You're using a LlamaTokenizerFast tokenizer. Please note that with a fast tokenizer, using the `__call__` method is faster than using a method to encode the text followed by a call to the `pad` method to get a padded encoding.


Step,Training Loss
1,3.6751
2,3.3958
3,3.2994
4,2.8188
5,2.2112
6,1.7496
7,1.4124
8,0.9623
9,1.6935
10,1.7115


TrainOutput(global_step=266, training_loss=0.46363698525872443, metrics={'train_runtime': 999.1941, 'train_samples_per_second': 1.065, 'train_steps_per_second': 0.266, 'total_flos': 2.3380943569944576e+16, 'train_loss': 0.46363698525872443, 'epoch': 1.0})

In [150]:
model

0

In [174]:
test_dataset[0]

{'data': {'function_calling': "{'functions': [{'function_name': 'get_joint_values', 'inputs': []}]}",
  'user_query': 'Retrieve robot joint statuses'}}

In [177]:
def generate_and_tokenize_prompt(data_point):
    full_prompt = generate_prompt(data_point)
    max_length = 512
    tokenized_full_prompt = tokenizer(
        full_prompt,
        padding="max_length",
        truncation=True,
        max_length=max_length,
        return_tensors="pt",
    )
    return tokenized_full_prompt

In [178]:
encoding = generate_and_tokenize_prompt(test_dataset[0])

In [179]:
with torch.inference_mode():
    outputs = model.generate(
        input_ids=encoding.input_ids,
        attention_mask=encoding.attention_mask,
        generation_config=generation_config,
    )

print(tokenizer.decode(outputs[0], skip_special_tokens=True))

`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`...


[INST] 
        # TASK DESCRIPTION: 
        Translate user queries about industrial robotic operations into JSON outputs for specific function calls. 
        # USER QUERY: 
        Retrieve robot joint statuses 
        # RESPONSE: [/INST]
        {'functions': [{'function_name': 'get_joint_values', 'inputs': []}]}
        {'functions': [{'function_name': 'get_joint_values', 'inputs': []}]}
        {'functions': [{'function_name': 'get_joint_values', 'inputs': []}]}
        {'functions': [{'function_name': 'get_joint_values', 'inputs': []}]}
        {'functions': [{'function_name': 'get_joint_values', 'inputs': []}]}
        {'functions': [{'function_name': 'get_joint_values', 'inputs': []}]}
        {'functions': [{'function_name': 'get_joint_values', 'inputs': []}]}
        {'functions': [{'function_name': 'get_joint_values', 'inputs': []}]}
        {'functions': [{'function_name': 'get_joint_values', 'inputs': []}]}
        {'functions': [{'function


In [180]:
model.save_pretrained(save_directory="./train_results/2024-Feb-03_23-26-16")

