In [1]:
import sys
import logging

import datasets
from datasets import load_dataset
from peft import LoraConfig
import torch
import transformers
from trl import SFTTrainer
from transformers import AutoModelForCausalLM, AutoTokenizer, TrainingArguments, BitsAndBytesConfig

  from .autonotebook import tqdm as notebook_tqdm
  _torch_pytree._register_pytree_node(
  _torch_pytree._register_pytree_node(


In [56]:
training_config = {
    "bf16": True,
    "do_eval": False,
    "learning_rate": 2.0e-05,
    "log_level": "info",
    "logging_steps": 20,
    "logging_strategy": "steps",
    "lr_scheduler_type": "cosine",
    "num_train_epochs": 3,
    "max_steps": -1,
    "output_dir": "./checkpoint_dir",
    "overwrite_output_dir": True,
    "per_device_eval_batch_size": 4,
    "per_device_train_batch_size": 4,
    "remove_unused_columns": True,
    "save_steps": 100,
    "save_total_limit": 1,
    "seed": 0,
    "gradient_checkpointing": True,
    "gradient_checkpointing_kwargs":{"use_reentrant": False},
    "gradient_accumulation_steps": 1,
    "warmup_ratio": 0.2,
    }

peft_config = {
    "r": 8,
    "lora_alpha": 16,
    "lora_dropout": 0.05,
    "bias": "none",
    "task_type": "CAUSAL_LM",
    "target_modules": [
    "q_proj",
    "v_proj",
    "k_proj",
    "o_proj"
]
}

train_conf = TrainingArguments(**training_config)
peft_conf = LoraConfig(**peft_config)

PyTorch: setting up devices
The default value for the training argument `--report_to` will change in v5 (from all installed integrations to none). In v5, you will need to use `--report_to all` to get the same behavior as now. You should start updating your code and make this info disappear :-).


In [57]:
import os
os.environ['CUDA_VISIBLE_DEVICES']='0'

In [4]:
checkpoint_path = "microsoft/Phi-3-mini-4k-instruct"
# checkpoint_path = "microsoft/Phi-3-mini-128k-instruct"
model_kwargs = dict(
    use_cache=False,
    trust_remote_code=True,
    attn_implementation="flash_attention_2",  # loading the model with flash-attenstion support
    torch_dtype=torch.bfloat16,
    device_map=None
)
model = AutoModelForCausalLM.from_pretrained(checkpoint_path, **model_kwargs)
tokenizer = AutoTokenizer.from_pretrained(checkpoint_path)
tokenizer.model_max_length = 2048
tokenizer.pad_token = tokenizer.unk_token  # use unk rather than eos token to prevent endless generation
tokenizer.pad_token_id = tokenizer.convert_tokens_to_ids(tokenizer.pad_token)
tokenizer.padding_side = 'right'

You are attempting to use Flash Attention 2.0 with a model not initialized on GPU. Make sure to move the model to GPU after initializing it on CPU with `model.to('cuda')`.
Loading checkpoint shards: 100%|██████████| 2/2 [00:01<00:00,  1.79it/s]
generation_config.json: 100%|██████████| 172/172 [00:00<00:00, 871kB/s]
Special tokens have been added in the vocabulary, make sure the associated word embeddings are fine-tuned or trained.


In [58]:
from peft import get_peft_model
model = get_peft_model(model, peft_conf)
model.print_trainable_parameters()

trainable params: 1,572,864 || all params: 3,822,652,416 || trainable%: 0.041145880630335606


In [24]:
import json
prompt_path = "/mnt/datascience1/all_data.json"
# Load the prompt as json file
prompt = json.loads(open(prompt_path).read())
train = prompt[:620]
test = prompt[620:]

In [72]:
dataset_data = [
    {
    "text":"<s><|user|>\n" + row_dict['input'].replace('\n\n','\n').replace('<|user|>\n', '').replace('\n<|end|>', '<|end|>') + '\n<|assistant|>\n' + row_dict['output'].replace('\n\n\n','\n').replace('\n\n','\n').replace('<|assistant|>','') + '<|end|>'
    }
    for row_dict in train# .to_dict(orient="records")
]
with open("/mnt/datascience1/train.json", "w") as f:
   json.dump(dataset_data, f)

dataset_data = [
    {
    "text":"<s><|user|>\n" + row_dict['input'].replace('\n\n','\n').replace('Starts<', '').replace('>Ends', '').replace('<|user|>\n', '').replace('\n<|end|>', '<|end|>') + '\n<|assistant|>\n' + row_dict['output'].replace('Starts<', '').replace('>Ends', '').replace('\n\n\n','\n').replace('\n\n','\n').replace('<|assistant|>','') + '<|end|>'
    }
    for row_dict in test# .to_dict(orient="records")
]
with open("/mnt/datascience1/test.json", "w") as f:
   json.dump(dataset_data, f)

In [6]:
from datasets import load_dataset
final_dataset = load_dataset("json", data_files="/mnt/datascience1/train.json")
final_dataset1 = load_dataset("json", data_files="/mnt/datascience1/test.json")
final_dataset['train'],final_dataset1['train']

(Dataset({
     features: ['text'],
     num_rows: 620
 }),
 Dataset({
     features: ['text'],
     num_rows: 72
 }))

In [18]:
print(final_dataset['train'][0]['text'])

<s><|user|>
Task is move BallG to the center of BoxH
You are a robotic arm with advanced planning capabilities. Your task is to generate Python code using parameterized skills (open_gripper(), close_gripper(), move_to_position(), get_graspable_point(), get_size()) that accomplishes the user's specified task.
Please produce executable Python code that employs these pre-scripted parameterized skills. Remember to import the necessary package before running the code. Carefully think through your plans and code.
When generating plans, consider spatial relationships meticulously. 
For example: If you need to pick up an object, first move to a position above it, then move down to grasp it. Moving directly to the object's position may push it away. Treat it as a two-step process. After this, consider whether the gripper might hit another object while moving to the next position.
Here is an example snippet for your reference, demonstrating how to call the function:
""
python
import numpy as np 

In [60]:
trainer = SFTTrainer(
    model=model,
    args=train_conf,
    peft_config=peft_conf,
    train_dataset=final_dataset['train'],
    eval_dataset=final_dataset1['train'],
    max_seq_length=2048,
    dataset_text_field="text",
    tokenizer=tokenizer,
    packing=True
)
train_result = trainer.train()
metrics = train_result.metrics
trainer.save_state()

Generating train split: 337 examples [00:00, 593.66 examples/s]
Generating train split: 28 examples [00:00, 635.53 examples/s]
Using auto half precision backend
***** Running training *****
  Num examples = 337
  Num Epochs = 3
  Instantaneous batch size per device = 4
  Training with DataParallel so batch size has been adjusted to: 12
  Total train batch size (w. parallel, distributed & accumulation) = 12
  Gradient Accumulation steps = 1
  Total optimization steps = 87
  Number of trainable parameters = 1,572,864


Step,Training Loss
20,1.4661
40,1.3363
60,1.2025
80,1.1355




Training completed. Do not forget to share your model on huggingface.co/models =)




In [61]:
trainer.save_model(train_conf.output_dir)

Saving model checkpoint to ./checkpoint_dir
tokenizer config file saved in ./checkpoint_dir/tokenizer_config.json
Special tokens file saved in ./checkpoint_dir/special_tokens_map.json


In [3]:
from peft import PeftModel, PeftConfig
output_dir = "/mnt/datascience1/checkpoint_dir"

device = 'cuda:0'
model_name = "microsoft/Phi-3-mini-4k-instruct"
model = AutoModelForCausalLM.from_pretrained(model_name, trust_remote_code=True, torch_dtype=torch.bfloat16)
tokenizer = AutoTokenizer.from_pretrained(output_dir, trust_remote_code=True, add_eos_token=True, use_fast=True)


# Load the LoRA adapter configuration
peft_config = PeftConfig.from_pretrained(output_dir)

# Apply the LoRA adapter to the base model
model = PeftModel.from_pretrained(model, output_dir, torch_dtype=torch.bfloat16)
model = model.merge_and_unload()
# model = model.to(device)

Loading checkpoint shards: 100%|██████████| 2/2 [00:00<00:00,  2.42it/s]
Special tokens have been added in the vocabulary, make sure the associated word embeddings are fine-tuned or trained.


In [14]:
from transformers import pipeline
pipe = pipeline("text-generation", model=model, tokenizer=tokenizer)
def test_inference(prompt):
    prompt = pipe.tokenizer.apply_chat_template([{"role": "user", "content": prompt}], tokenize=False, add_generation_prompt=True)
    outputs = pipe(prompt, max_new_tokens = 1024, do_sample=True, num_beams=1, temperature=0.2, top_k=50, top_p=0.95,
                   max_time= 1024)# .to(device) #, eos_token_id=eos_token)
    return outputs[0]['generated_text'][len(prompt):].strip()

print(test_inference(str(prompt[12:])))