In [19]:
import os
import json
from pathlib import Path
from pprint import pprint

import torch
import torch.nn as nn

from datasets import load_dataset, load_from_disk

import transformers
from transformers import (
    AutoConfig,
    AutoModelForCausalLM,
    AutoTokenizer,
    BitsAndBytesConfig,
)

from peft import (
    LoraConfig,
    PeftConfig,
    PeftModel,
    get_peft_model,
    prepare_model_for_kbit_training,
)

from huggingface_hub import notebook_login

In [4]:
notebook_login()

VBox(children=(HTML(value='<center> <img\nsrc=https://huggingface.co/front/assets/huggingface_logo-noborder.sv…

# Set running device


In [2]:
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")

# Load model & Tokenizer


In [5]:
MODEL_NAME = "mistralai/Mistral-7B-Instruct-v0.2"
weights_dir = "./weights"

bnb_config = BitsAndBytesConfig(
    load_in_4bit=True,
    bnb_4bit_use_double_quant=True,
    bnb_4bit_quant_type="nf4",
    bnb_4bit_compute_dtype=torch.bfloat16,
)

model = AutoModelForCausalLM.from_pretrained(
    MODEL_NAME,
    device_map=device,
    trust_remote_code=True,
    quantization_config=bnb_config,
    cache_dir=weights_dir,
)

tokenizer = AutoTokenizer.from_pretrained(
    MODEL_NAME, device=device, cache_dir=weights_dir
)
tokenizer.pad_token = tokenizer.eos_token

config.json:   0%|          | 0.00/596 [00:00<?, ?B/s]

model.safetensors.index.json:   0%|          | 0.00/25.1k [00:00<?, ?B/s]

Downloading shards:   0%|          | 0/3 [00:00<?, ?it/s]

model-00001-of-00003.safetensors:   0%|          | 0.00/4.94G [00:00<?, ?B/s]

model-00002-of-00003.safetensors:   0%|          | 0.00/5.00G [00:00<?, ?B/s]

model-00003-of-00003.safetensors:   0%|          | 0.00/4.54G [00:00<?, ?B/s]

Loading checkpoint shards:   0%|          | 0/3 [00:00<?, ?it/s]

generation_config.json:   0%|          | 0.00/111 [00:00<?, ?B/s]

tokenizer_config.json:   0%|          | 0.00/1.46k [00:00<?, ?B/s]

tokenizer.model:   0%|          | 0.00/493k [00:00<?, ?B/s]

tokenizer.json:   0%|          | 0.00/1.80M [00:00<?, ?B/s]

special_tokens_map.json:   0%|          | 0.00/72.0 [00:00<?, ?B/s]

In [9]:
def print_trainable_parameters(model) -> None:
    """Prints the number of trainable parameters in the model."""
    trainable_params = 0
    all_param = 0
    for _, param in model.named_parameters():
        all_param += param.numel()
        if param.requires_grad:
            trainable_params += param.numel()

    print(
        f"Trainable params num: {trainable_params:,} perzentage: {100 * trainable_params / all_param} || All params: {all_param:,}"
    )

In [10]:
print_trainable_parameters(model)

Trainable params num: 262,410,240 perzentage: 6.993743675173274 || All params: 3,752,071,168


In [11]:
model.gradient_checkpointing_enable()
model = prepare_model_for_kbit_training(model)

In [13]:
config = LoraConfig(
    r=16,
    lora_alpha=32,
    target_modules=[
        "q_proj",
        "k_proj",
        "v_proj",
        "o_proj",
        "gate_proj",
        "up_proj",
        "down_proj",
        "lm_head",
    ],
    lora_dropout=0.05,
    bias="none",
    task_type="CAUSAL_LM",
)

model = get_peft_model(model, config)
print_trainable_parameters(model)

Trainable params num: 42,520,576 perzentage: 1.1205573318192514 || All params: 3,794,591,744


# Test original model


In [34]:
prompt_template_large = """
<s>[INST]
# TASK DESCRIPTION
Translate user queries about industrial robotic operations into JSON outputs for specific function calls.

# JSON PARAMETER VALUES
- functions: Array of function calls from the USER QUERY, identified by keywords like "and", "then".
- function_name: The required robot function, one of: move_tcp, move_joint, get_joint_values.
- input_name: Name of the input parameter for the function.
- input_value: Value for the function input parameter.
- inputs: Array of input_name, input_value pairs, completing the function call.

# FUNCTION MEANING
- move_tcp: Moves robot's tool center point. Inputs: x, y, z (position in mm), q1, q2, q3, q4 (quaternion values for orientation).
- move_joint: Rotates/moves a robot joint. Inputs: joint (joint index), angle (rotation in radians).
- get_joint_values: Retrieves current robot joint status. No input parameters.

# RESPONSE FORMAT
- Include only necessary functions from the query.
- Maintain the order of functions as in the query.
- Note any uncertainties in case of ambiguous queries.

# ADDITIONAL GUIDANCE
- Focus on verbs and technical terms to determine the function.
- Indicate if a query falls outside the available functions.
- Consider practical aspects of robotic operations.
- Treat "base" as the first robot "joint".

# JSON FORMAT
{{
    "functions": [
        {{
            "function_name": "<name>",
            "inputs": [
                {{"name": "<name>", "value": "<value>", "unit": "<unit>"}},
                {{"name": "<name>", "value": "<value>", "unit": "<unit>"}}
            ]
        }},
        {{
            "function_name": "<name>",
            "inputs": [{{"name": "<name>", "value": "<value>", "unit": "<unit>"}}]
        }}
    ]
}}

USER QUERY: {user_query}
[/INST]
""".strip()

In [51]:
prompt_template_small = """
<s>[INST]
# TASK DESCRIPTION
Translate user queries about industrial robotic operations into JSON outputs for specific function calls.

# JSON PARAMETER VALUES
- functions: Array of function calls from the USER QUERY, identified by keywords like "and", "then".
- function_name: The required robot function, one of: move_tcp, move_joint, get_joint_values.
- input_name: Name of the input parameter for the function.
- input_value: Value for the function input parameter.
- inputs: Array of input_name, input_value pairs, completing the function call.

# FUNCTION MEANING
- move_tcp: Moves robot's tool center point. Inputs: x, y, z (position in mm), q1, q2, q3, q4 (quaternion values for orientation).
- move_joint: Rotates/moves a robot joint. Inputs: joint (joint index), angle (rotation in radians).
- get_joint_values: Retrieves current robot joint status. No input parameters.

# JSON FORMAT
{{
    "functions": [
        {{
            "function_name": "<name>",
            "inputs": [
                {{"name": "<name>", "value": "<value>", "unit": "<unit>"}},
                {{"name": "<name>", "value": "<value>", "unit": "<unit>"}}
            ]
        }},
        {{
            "function_name": "<name>",
            "inputs": [{{"name": "<name>", "value": "<value>", "unit": "<unit>"}}]
        }}
    ]
}}

USER QUERY: {user_query}
[/INST]
""".strip()

In [75]:
generation_config = model.generation_config
generation_config.max_new_tokens = 200
generation_config.temperature = 0.7
generation_config.top_p = 0.7
generation_config.num_return_sequences = 1
generation_config.pad_token_id = tokenizer.eos_token_id
generation_config.eos_token_id = tokenizer.eos_token_id

In [76]:
user_query = "Obtain the status of the robot's third joint"

In [77]:
%%time
device = "cuda:0"

encoding = tokenizer(
    prompt_template_small.format(user_query=user_query), return_tensors="pt"
).to(device)
with torch.inference_mode():
    outputs = model.generate(
        input_ids=encoding.input_ids,
        attention_mask=encoding.attention_mask,
        generation_config=generation_config,
    )

print(tokenizer.decode(outputs[0], skip_special_tokens=True))

[INST]
# TASK DESCRIPTION
Translate user queries about industrial robotic operations into JSON outputs for specific function calls.

# JSON PARAMETER VALUES
- functions: Array of function calls from the USER QUERY, identified by keywords like "and", "then".
- function_name: The required robot function, one of: move_tcp, move_joint, get_joint_values.
- input_name: Name of the input parameter for the function.
- input_value: Value for the function input parameter.
- inputs: Array of input_name, input_value pairs, completing the function call.

# FUNCTION MEANING
- move_tcp: Moves robot's tool center point. Inputs: x, y, z (position in mm), q1, q2, q3, q4 (quaternion values for orientation).
- move_joint: Rotates/moves a robot joint. Inputs: joint (joint index), angle (rotation in radians).
- get_joint_values: Retrieves current robot joint status. No input parameters.

# JSON FORMAT
{
    "functions": [
        {
            "function_name": "<name>",
            "inputs": [
             

# Prepare data


In [22]:
dataset_path = Path("../DATA/dataset_2024-Jan-30_23-29-10/")
data_files = {"train": "train.csv", "test": "test.csv"}

In [23]:
dataset = load_from_disk(dataset_path)

In [24]:
dataset

DatasetDict({
    train: Dataset({
        features: ['data'],
        num_rows: 1064
    })
    test: Dataset({
        features: ['data'],
        num_rows: 119
    })
})

In [26]:
dataset["train"][6]

{'data': {'function_calling': "{'functions': [{'function_name': 'move_joint', 'inputs': [{'name': 'joint', 'value': [5], 'unit': None}, {'name': 'angle', 'value': [-30.0], 'unit': 'deg'}]}]}",
  'user_query': 'Rotate the sixth robot joint by -30 degrees'}}

# Fine-Tuning
