## Distributed Fine Tuning


In [12]:
import os 
import torch
from transformers import AutoModelForCausalLM, AutoTokenizer, pipeline
from peft import PeftModel, PeftConfig
import json

os.environ["CUDA_VISIBLE_DEVICES"] = "0"
torch.random.manual_seed(0)

# Ensure the device is set correctly
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")

# Set the directory where the fine-tuned LoRA adapters are saved
output_dir = "phi-3-mini-LoRA/checkpoint-725/"

model_name = "microsoft/Phi-3-mini-4k-instruct"
model = AutoModelForCausalLM.from_pretrained(model_name, trust_remote_code=True, torch_dtype=torch.bfloat16)
tokenizer = AutoTokenizer.from_pretrained(output_dir, trust_remote_code=True, add_eos_token=True, use_fast=True)


# Load the LoRA adapter configuration
peft_config = PeftConfig.from_pretrained(output_dir)

# Apply the LoRA adapter to the base model
# model = PeftModel.from_pretrained(model, output_dir, torch_dtype=torch.bfloat16)
# model = model.merge_and_unload()
model = model.to(device)

# prompt_path = '/home/xilun/ET_robot/prompt_stack_two.txt'
prompt_path = "/home/xilun/ET_robot/dataset/172_dataset.json"
# Load the prompt as json file
prompt = json.loads(open(prompt_path).read())
prompt = prompt[0]["input"]
## delete <|user|> and <|assistant|> and <|end|> if found in prompt 
prompt = prompt.replace("Starts<", "").replace(">Ends", "")
prompt = prompt.replace("<|user|>", "").replace("<|assistant|>", "").replace("<|end|>", "")
print(prompt)
# inputs = tokenizer(prompt, return_tensors="pt").to(device)
# print (inputs)
# input()
# prompt = prompt + "<|assistant|>"
messages = [
    {"role": "user", "content": prompt},
]

# Generate text using the model's generate method
generation_args = {
    "max_new_tokens": 1500,
    "temperature": 0.2,
    "do_sample": False,
    "return_full_text": False,
    # "top_k": 50,
    # "top_p": 0.95
    
}

# Generate the output
pipe = pipeline(
    "text-generation",
    model=model,
    tokenizer=tokenizer,
)

output = pipe(messages, **generation_args)
print(output[0]['generated_text'])



# output = model.generate(
#     input_ids=inputs["input_ids"],
#     max_new_tokens=generation_args["max_new_tokens"],
#     temperature=generation_args["temperature"],
#     do_sample=generation_args["do_sample"],
#     # top_k=generation_args["top_k"],
#     # top_p=generation_args["top_p"],
#     # pad_token_id=tokenizer.pad_token_id,
#     # eos_token_id=tokenizer.eos_token_id,
# )
# # Decode the generated output with chat template



# output_text = tokenizer.decode(output[0], skip_special_tokens=True)
# print(output_text)
del model

# 'import gc' is used to import Python's garbage collector module.
import gc

# 'gc.collect()' is a method that triggers a full garbage collection, which can help to free up memory.
# It's called twice here to ensure that all unreachable objects are collected.
gc.collect()
gc.collect()
torch.cuda.empty_cache()


Loading checkpoint shards: 100%|██████████| 2/2 [00:01<00:00,  1.47it/s]
Special tokens have been added in the vocabulary, make sure the associated word embeddings are fine-tuned or trained.




Task is place CubeA on top of CubeB

You are a robotic arm with advanced planning capabilities. Your task is to generate Python code using parameterized skills (open_gripper(), close_gripper(), move_to_position(), get_graspable_point(), get_size()) that accomplishes the user's specified task.
Please produce executable Python code that employs these pre-scripted parameterized skills. Remember to import the necessary package before running the code. Carefully think through your plans and code.
When generating plans, consider spatial relationships meticulously. 
For example: If you need to pick up an object, first move to a position above it, then move down to grasp it. Moving directly to the object's position may push it away. Treat it as a two-step process. After this, consider whether the gripper might hit another object while moving to the next position.
Here is an example snippet for your reference, demonstrating how to call the function:
"""
python
import numpy as np  # import num

In [13]:
prompt_path = "/home/xilun/ET_robot/dataset/172_dataset.json"
# Load the prompt as json file
prompt = json.loads(open(prompt_path).read())
prompt = prompt[0]["input"]

print (prompt)

<|user|>Starts<

Task is place CubeA on top of CubeB

You are a robotic arm with advanced planning capabilities. Your task is to generate Python code using parameterized skills (open_gripper(), close_gripper(), move_to_position(), get_graspable_point(), get_size()) that accomplishes the user's specified task.
Please produce executable Python code that employs these pre-scripted parameterized skills. Remember to import the necessary package before running the code. Carefully think through your plans and code.
When generating plans, consider spatial relationships meticulously. 
For example: If you need to pick up an object, first move to a position above it, then move down to grasp it. Moving directly to the object's position may push it away. Treat it as a two-step process. After this, consider whether the gripper might hit another object while moving to the next position.
Here is an example snippet for your reference, demonstrating how to call the function:
"""
python
import numpy as n

In [None]:
import os 
import torch
from transformers import AutoModelForCausalLM, AutoTokenizer, pipeline
from peft import PeftModel, PeftConfig, AutoPeftModelForCausalLM
import json

os.environ["CUDA_VISIBLE_DEVICES"] = "0"
torch.random.manual_seed(0)

# Ensure the device is set correctly
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")

# Set the directory where the fine-tuned LoRA adapters are saved
output_dir = "phi-3-mini-LoRA/checkpoint-1674/"
tokenizer = AutoTokenizer.from_pretrained(output_dir, trust_remote_code=True, add_eos_token=True, use_fast=True)

new_model = AutoPeftModelForCausalLM.from_pretrained(
    output_dir,
    low_cpu_mem_usage=True,
    return_dict=True,
    torch_dtype=torch.bfloat16,
    trust_remote_code=True,
)

new_model = new_model.to(device)
new_model = new_model.merge_and_unload()
# prompt_path = '/home/xilun/ET_robot/prompt_stack_two.txt'
prompt_path = "/home/xilun/ET_robot/dataset/172_dataset.json"
# Load the prompt as json file
prompt = json.loads(open(prompt_path).read())
prompt = prompt[0]["input"]
## delete <|user|> and <|assistant|> and <|end|> if found in prompt 
prompt = prompt.replace("<|user|>", "").replace("<|assistant|>", "").replace("<|end|>", "")


# Generate text using the model's generate method
generation_args = {
    "max_new_tokens": 1500,
    "temperature": 0,
    "do_sample": False,
    "return_full_text": True,
    # "top_k": 50,
    # "top_p": 0.95
    
}

# Generate the output
pipe = pipeline(
    "text-generation",
    model=new_model,
    tokenizer=tokenizer,
)

prompt = pipe.tokenizer.apply_chat_template([{"role": "user", "content": prompt}], tokenize=False, add_generation_prompt=True)


outputs = pipe(prompt, max_new_tokens=1500, do_sample=True, num_beams=1, temperature=0.3, top_k=50, top_p=0.95,
                   max_time= 180) #, eos_token_id=eos_token)
print (outputs[0]['generated_text'][len(prompt):].strip())



