In [4]:
from transformers import set_seed, AutoModelForCausalLM, AutoTokenizer
import torch
from tqdm import tqdm
from datasets import load_dataset
from peft import PeftModel
from utils.helping_functions import save_results
from utils.solution_generation import  read_matrix_form_jssp, generate_multiple_solutions

seed = 42
set_seed(seed)

dev_map = f"cuda:0"
# dev_map = f"auto"

checkpoint_path = 'microsoft/Phi-3.5-mini-instruct'
model_kwargs = dict(
    use_cache=False,
    trust_remote_code=True,
    torch_dtype=torch.float16,
    device_map = dev_map
)
model = AutoModelForCausalLM.from_pretrained(checkpoint_path, **model_kwargs)


tokenizer = AutoTokenizer.from_pretrained(checkpoint_path)
tokenizer.model_max_length = 1000
# tokenizer.pad_token = tokenizer.eos_token
tokenizer.pad_token = tokenizer.unk_token  # use unk rather than eos token to prevent endless generation
tokenizer.pad_token_id = tokenizer.convert_tokens_to_ids(tokenizer.pad_token)
tokenizer.padding_side = 'left'

eval_tokenizer = AutoTokenizer.from_pretrained(checkpoint_path, trust_remote_code=True,)# add_bos_token=True, use_fast=False)
eval_tokenizer.pad_token = eval_tokenizer.unk_token

new_adapter_path = "./testing_final_code_peft-phi3-Gcode_Generation_30k/checkpoint-302"

finetuned_model = PeftModel.from_pretrained(model,
                                  new_adapter_path,
                                  torch_dtype=torch.float16,
                                  is_trainable=False,
                                  device_map = dev_map,
                
                  )
finetuned_model = finetuned_model.merge_and_unload()




Loading checkpoint shards: 100%|██████████| 2/2 [00:04<00:00,  2.29s/it]


In [5]:
custom_dataset_name = './gcode_prompt_response_data.json'

# Load dataset
dataset = load_dataset("json", data_files=custom_dataset_name)
print(dataset)


DatasetDict({
    train: Dataset({
        features: ['index', 'prompt', 'response'],
        num_rows: 30000
    })
})


In [6]:
my_model= finetuned_model

def gen(model,p, maxlen=1000, sample=False):
    """
    Generates text using the model based on the provided prompt.

    Args:
        model: The pre-trained language model.
        p (str): The prompt text.
        maxlen (int, optional): Maximum length of the generated text. Defaults to 1000.
        sample (bool, optional): Whether to use sampling. Defaults to False.

    Returns:
        list: A list of generated text sequences.
    """
    toks = eval_tokenizer(p, return_tensors="pt")
    res = my_model.generate(**toks.to("cuda"), max_new_tokens=maxlen, do_sample=sample,num_return_sequences=1).to('cpu')
    return eval_tokenizer.batch_decode(res,skip_special_tokens=True)





In [7]:
index = 2
prompt = dataset['train'][index]['prompt']
summary = dataset['train'][index]['response']

formatted_prompt = f"Instruct: Provide a Gcode for following problem below.\n{prompt}\nOutput:\n"
res = gen(my_model,formatted_prompt,1000,)
# print(res[0])
output = res[0].split('Output:\n')[1]

dash_line = '-'.join('' for x in range(100))
print(dash_line)
print(f'INPUT PROMPT:\n{formatted_prompt}')
print(dash_line)
print(f'SOLUTION FROM GCODE Dataset Generator:\n{summary}\n')
print(dash_line)
print(f'MODEL GENERATION - ZERO SHOT:\n{output}')

The `seen_tokens` attribute is deprecated and will be removed in v4.41. Use the `cache_position` model input instead.
`get_max_cache()` is deprecated for all Cache classes. Use `get_max_cache_shape()` instead. Calling `get_max_cache()` will raise error from v4.48
You are not running the flash-attention implementation, expect numerical differences.


---------------------------------------------------------------------------------------------------
INPUT PROMPT:
Instruct: Provide a Gcode for following problem below.
A rectangular plastic block of length 20mm, width 30mm and a height of 11mm is fixtured in a milling machine,   where the stock has to be milled on all 4 sides by 0mm. The top left corner is placed at (50, 2) in reference to the home postion of the machine   The top left corner needs a Chamfer of 8mm. The top right corner needs a Chamfer of 8mm. The bottom left corner needs a Chamfer of 4mm. The bottom right corner needs a Chamfer of 2mm.
  A mill tool of diameter 8mm   having a side tooth engagement of 20mm which is greater than the thickness of stock is used for milling.
  The bottom tip of the tool will be positioned 2.75mm below the part surface. 
The cutter will be operated at a spindle speed of 8000 RPM   and at a feed rate of 3000 mm/min.
The TM1 mill cutter is in the location T6   with tool length stored in H04,