In [1]:
from transformers import AutoTokenizer, AutoModelForCausalLM, Gemma3ForCausalLM
import torch
from peft import PeftModel, PeftConfig
from datasets import load_dataset,Dataset

In [2]:
import sys
sys.path.append("/srv/chawak/planning-with-llms/src")
import shared.llm_utils as llm_utils

Using a slow image processor as `use_fast` is unset and a slow processor was saved with this model. `use_fast=True` will be the default behavior in v4.52, even if the model was saved with a slow processor. This will result in minor differences in outputs. You'll still be able to use a slow processor with `use_fast=False`.


In [3]:
from huggingface_hub import login
login(token="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")

In [4]:
from transformers import AutoTokenizer, AutoModelForCausalLM
import os
os.environ["CUDA_VISIBLE_DEVICES"] = "1"
#name='google/gemma-3-12b-it'
#cache_dir='/home/chawak/huggingface'

In [5]:
torch.cuda.ipc_collect()
torch.cuda.empty_cache()

In [6]:
base_model,tokenizer=llm_utils.get_model_tokenizer()

Loading checkpoint shards:   0%|          | 0/5 [00:00<?, ?it/s]

### Step 1. Tokenize the dataset ###

In [7]:
split='train'
n=3

#####  SFT: 1-1 Gemma-3 tokenizing #####

In [None]:
#load the dataset
import numpy as np

dataset_path=f'../data/{n}_blocks/SFT_full_{split}_{n}_blocks'
#dataset_path=f'../data/{n}_blocks/SFT_{split}_{n}_blocks_fullPlan'
data=load_dataset("csv",data_files={split:dataset_path})
data=data.remove_columns(['Unnamed: 0', 'init', 'goal', 'demo_init', 'demo_goal', 'demo_plan'])
print(f'Loaded HuggingFace dataset : {data}')


In [None]:
#model,tokenizer=llm_utils.get_model_tokenizer()

In [None]:
print("EOS token:", tokenizer.eos_token)
print("EOS token ID:", tokenizer.eos_token_id)

In [None]:
# import pandas as pd
# train_text={'prompt':['I am cat'],
#             'gold_plan':['I eat fish']}
# val_text={'prompt':['I am dog'],
#           'gold_plan':['I eat chicken']}
# train_df=pd.DataFrame(data=train_text)
# val_df=pd.DataFrame(data=val_text)
# train_df.to_csv('../data/toy_train')
# val_df.to_csv('../data/toy_val')

In [None]:
#load the TOY dataset
# import numpy as np
# split='val'
# #dataset_path=f'../data/{n}_blocks/SFT_full_{split}_{n}_blocks'
# dataset_path=f'../data/toy_{split}'
# data=load_dataset("csv",data_files={split:dataset_path})
# data=data.remove_columns(['Unnamed: 0'])
# print(f'Loaded HuggingFace dataset : {data}')

In [None]:

#helper function for tokenizing
def tokenize_and_mask_function(examples):
    #concatinate prompt and gold plan within our template
    merged_inputs=[f"{p[:-2]}{g[6:]}" for p,g in zip(examples['prompt'],examples['gold_plan'])]

    #tokenize concatinated input
    tokenized=tokenizer(
        merged_inputs,
        truncation=True,
        padding='max_length',
        padding_side='right',
        max_length=1200
    )

    #tokenize ONLY the prompts and get their lengths
    tokenized_prompts=tokenizer(
        examples['prompt'],
        truncation=False
    )

    #print(f'Tokenized prompts: {tokenized_prompts}')
    prompt_lens = [len(ptoken) for ptoken in tokenized_prompts['input_ids']]
    #print("Lengths of tokenized prompts", prompt_lens)
    
    #estimating input token sequence lengths
    merged_input_lens=[len(merged) for merged in tokenized['input_ids']]
    sorted_lens=sorted(merged_input_lens, reverse=True)
    #print('Highest lengths of merged-input token sequence:',sorted_lens[:5])
    #print("Number of prompts tokenized",len(prompt_lens))
    
    #masking prompt tokens for the labels 
    labels=[]
    for input_ids, prompt_length in zip(tokenized['input_ids'],prompt_lens):
        label=input_ids.copy()
        #mask prompt tokens as -100 & adjustment for prompt template
        label[:prompt_length-4]=[-100]*prompt_length
        label=label[:-4]
        labels.append(label)

    tokenized['labels']=labels

    tokenized['input_ids'] = [np.array(ids, dtype=np.int64) for ids in tokenized['input_ids']]
    tokenized['labels']=[np.array(labels, dtype=np.int64) for labels in tokenized['labels']]
    return tokenized


In [None]:
#map tokenizer function to our dataset
tokenized_data=data.map(tokenize_and_mask_function,batched=True)

In [None]:
tokenized_data

In [None]:
# from datasets import load_dataset
# dataset_path=f'../data/{n}_blocks/SFT_{split}_{n}_blocks_fullPlan'
# data=load_dataset("csv",data_files={split:dataset_path})

In [None]:
# path=f'../data/{n}_blocks/SFT_{split}_{n}_blocks_fullPlan'
# data=pd.read_csv(path)
# data=data.drop(columns=['Unnamed: 0', 'init', 'goal', 'demo_init', 'demo_goal', 'demo_plan',])
# data

In [None]:
# path=f'../data/{n}_blocks/SFT_contaminated_test_{n}_blocks_fullPlan'
# cont=pd.read_csv(path)
# cont=cont.drop(columns=['Unnamed: 0', 'init', 'goal', 'demo_init', 'demo_goal', 'demo_plan',])
# cont

In [None]:
# train=pd.concat([data,cont])

In [None]:
# train.to_csv(f'../data/{n}_blocks/SFT_full_{split}_{n}_blocks')

In [None]:
# cont_data=cont_data.remove_columns(['Unnamed: 0', 'init', 'goal', 'demo_init', 'demo_goal', 'demo_plan',])
# cont_data

In [None]:
encoded_ip=tokenized_data[split][0]['input_ids']
encoded_ip

In [None]:
len(encoded_ip)

In [None]:
decoded_ip=tokenizer.decode(encoded_ip)
print(decoded_ip)

In [None]:
encoded_op=tokenized_data[split][0]['labels']
encoded_op

In [None]:
decoded_op=tokenizer.decode([token for token in encoded_op if token != -100])
print(decoded_op)

In [None]:
len(encoded_op)

In [None]:
#tokenized_data.save_to_disk(f"/srv/chawak/planning-with-llms/data/toy_label_nopad")
tokenized_data.save_to_disk(f"/srv/chawak/planning-with-llms/data/{n}_blocks/tokenized_dataset")

#####  GRPO: 1-1 Gemma-3 tokenizing #####

In [8]:
#load the dataset
import numpy as np

#dataset_path=f'/srv/chawak/planning-with-llms/data/{n}_blocks/GRPO_full_{split}_{n}_blocks'
dataset_path=f'/srv/chawak/planning-with-llms/data/{n}_blocks/SFT_{split}_{n}_blocks_fullPlan'
#dataset_path=f'/srv/chawak/planning-with-llms/data/{n}_blocks/SFT_full_{split}_{n}_blocks'

In [9]:
data=load_dataset("csv",data_files={split:dataset_path})
data=data.remove_columns(['Unnamed: 0', 'demo_init', 'demo_goal', 'demo_plan'])
print(f'Loaded HuggingFace dataset : {data}')

Loaded HuggingFace dataset : DatasetDict({
    train: Dataset({
        features: ['init', 'goal', 'prompt', 'gold_plan'],
        num_rows: 99
    })
})


In [10]:
print("EOS token ID:", tokenizer.eos_token_id)
print("Padding token ID:", tokenizer.pad_token_id)

EOS token ID: 1
Padding token ID: 0


In [11]:
system_prompt='''I am a blocksworld plan generator.
I first think about the reasoning process in the mind and then provide the user with the plan.
The reasoning process and plan are enclosed within <think> </think> and [PLAN] [PLAN END] tags, respectively,
i.e., <think> reasoning process here </think> [PLAN] plan here [PLAN END].
'''

In [12]:

#helper function for tokenizing
def tokenize_function(examples):

    #apply chat template before tokenizing
    prompts = [tokenizer.apply_chat_template(
        [
            {"role": "system", "content": system_prompt},
            {"role": "user", "content": prompt}
        ],
        tokenize=False,
        add_generation_prompt=True
    ) for prompt in examples["prompt"]]

    print(prompts[:1])

    #tokenize chat-version of prompt
    tokenized_prompts=tokenizer(
        prompts
    )

    #print(f'Tokenized prompts: {tokenized_prompts}')
    prompt_lens = [len(ptoken) for ptoken in tokenized_prompts['input_ids']]
    print("Lengths of tokenized prompts", prompt_lens)
    
    return tokenized_prompts


In [None]:
#tags="Answer within the [PLAN] [PLAN END] tags."
think="I first think about the reasoning process in the mind and then provide the user with the plan."
think+=" The reasoning process and plan are enclosed within <think> </think> and [PLAN] [PLAN END] tags, respectively,"
think+=" i.e., <think> reasoning process here </think> [PLAN] plan here [PLAN END]."

def add_additional_string(sample):
    
    sample['prompt']=sample['prompt'][:-8]
    print(f"{sample['prompt']}")
    sample['prompt']+=think
    print(f"{sample['prompt']}")

    return sample

In [None]:
data=data.map(add_additional_string)

In [13]:
#map tokenizer function to our dataset
tokenized_data=data.map(tokenize_function,batched=True)

Map:   0%|          | 0/99 [00:00<?, ? examples/s]

['<bos><start_of_turn>user\nI am a blocksworld plan generator.\nI first think about the reasoning process in the mind and then provide the user with the plan.\nThe reasoning process and plan are enclosed within <think> </think> and [PLAN] [PLAN END] tags, respectively,\ni.e., <think> reasoning process here </think> [PLAN] plan here [PLAN END].\n\n\nI am playing with a set of blocks where I need to arrange the blocks into stacks\n        Here are the actions I can do: Pick up a block, Unstack a block from on top of another block, Put down a block, Stack a block on top of another block.\n        I have the following restrictions on my actions:\n        I can only pick up or unstack one block at a time\n        I can only pick up or unstack a block if my hand is empty\n        I can only pick up a block if the block is on the table and the block is clear\n        A block is clear if the block has no other blocks on top of it and if the block is not picked up\n        I can only unstack a 

In [14]:
tokenized_data

DatasetDict({
    train: Dataset({
        features: ['init', 'goal', 'prompt', 'gold_plan', 'input_ids', 'attention_mask'],
        num_rows: 99
    })
})

In [15]:
tokenized_data.save_to_disk(f"/srv/chawak/planning-with-llms/data/{n}_blocks/GRPO_systhink_tokenized_dataset")

Saving the dataset (0/1 shards):   0%|          | 0/99 [00:00<?, ? examples/s]

#####  1-2 Dummy data tokenizing #####

In [None]:
op=[2, 236777,   1006,   4799, 236777,   9039,  12480,      1,   -100, -100]
ip=[     2, 236777,   1006,   4799, 236777,   9039,  12480,      1,      0,
             0]
decoded_op=tokenizer.decode([token for token in op if token != -100])
print(f'Decoded labels : {decoded_op}')

In [None]:
#verifying encoded file storage
from datasets import Dataset
split='val'
n=3
# /srv/chawak/planning-with-llms/data/3_blocks/tokenized_dataset/test/
# ds= Dataset.from_file(f'/srv/chawak/planning-with-llms/data/{n}_blocks/tokenized_dataset/{split}/data-00000-of-00001.arrow')
ds=Dataset.from_file(f"/srv/chawak/planning-with-llms/data/{n}_blocks/GRPO_tags_tokenized_dataset/{split}/data-00000-of-00001.arrow")
#ds= Dataset.from_file(f'/srv/chawak/planning-with-llms/data/toy_label_nopad/{split}/data-00000-of-00001.arrow')

In [None]:
ds

In [None]:
encoded_input=ds[0]['input_ids']
encoded_input

In [None]:
ds[0]

In [None]:
len(encoded_input)

In [None]:
encoded_labels=ds[0]['labels']
len(encoded_labels)

In [None]:
decoded_ip=tokenizer.decode(encoded_input)
decoded_ip

In [None]:
decoded_op=tokenizer.decode([token for token in encoded_labels if token != -100])
decoded_op

In [None]:
for name in model.named_modules():
    print(name)

### Step 2. Define hyperparamters ###

In [None]:
import yaml
from transformers import TrainingArguments, AdamW
from trl import SFTConfig, SFTTrainer
from datasets import load_dataset, DatasetDict, load_from_disk
from peft import LoraConfig, get_peft_model

In [None]:
#load dataset
n=3
split='train'
data_path=f'../data/{n}_blocks/tokenized_dataset/{split}'
train_data=load_from_disk(data_path)
train_data

In [None]:
#load dataset
n=3
split='val'
data_path=f'../data/{n}_blocks/tokenized_dataset/{split}'
eval_data=load_from_disk(data_path)
eval_data

In [None]:
cache_dir='../results/SFT'

In [None]:
#load model and tokenizer
tokenizer=AutoTokenizer.from_pretrained(cfg['model']['name'],cache_dir=cache_dir)
model=AutoModelForCausalLM.from_pretrained(
    cfg['model']['name'],
    cache_dir=cache_dir,
    device_map=cfg['model']['device_map'],
    torch_dtype=torch.bfloat16
)

In [None]:
#dataset=Dataset.from_file(data_path)

In [None]:
#get config file
with open("config.yaml", "r") as f:
    cfg=yaml.safe_load(f)

In [None]:
#LORA config 
peft_args=LoraConfig(
    r=cfg['peft']['r'],
    lora_alpha=cfg['peft']['lora_alpha'],
    lora_dropout=cfg['peft']['lora_dropout'],
    task_type=cfg['peft']['task_type']
)

In [None]:
#get LORA model
lora_model=get_peft_model(model,peft_args)

In [None]:
lora_layers=lora_model.parameters()

In [None]:
lora_layers

In [None]:
training_args=SFTConfig(
    output_dir=cfg['training']['output_dir']+f'/{n}_blocks',
    num_train_epochs=cfg['training']['num_train_epochs'],
    per_device_train_batch_size= cfg['training']['per_device_train_batch_size'],
    gradient_accumulation_steps=cfg['training']['gradient_accumulation_steps'],
    learning_rate=cfg['training']['learning_rate'],
    weight_decay=cfg['training']['weight_decay'],
    warmup_ratio=cfg['training']['warmup_ratio'],
    #adam_epsilon=cfg['training']['adam_epsilon'],
    #optim=cfg['training']['optim'],
    logging_steps=cfg['training']['logging_steps'],
    save_steps=cfg['training']['save_steps'],
    eval_steps=cfg['training']['eval_steps'],
    evaluation_strategy=cfg["training"]["evaluation_strategy"],
    save_strategy=cfg["training"]["save_strategy"],
    fp16=cfg["training"]["fp16"],
    bf16=cfg["training"]["bf16"],
    report_to=cfg["training"]["report_to"]
)

In [None]:
#optimizer args dictionary
optimizer = {
    'params': lora_layers,
    'lr': float(cfg['training']['learning_rate']),
}

In [None]:
type(optimizer)

In [None]:
trainer=SFTTrainer(
    model=lora_model,
    args=training_args,
    train_dataset=train_data,
    eval_dataset=eval_data,
    peft_config=peft_args,
    optimizer_cls_and_kwargs=(AdamW,optimizer)
)

In [None]:
trainer.train()

In [None]:
metrics = trainer.evaluate()
print(metrics)

### Step 3 : Infer on the trained model ###

In [None]:
model_path='/srv/chawak/planning-with-llms/results/SFT/training/training_21-05/checkpoint-8172'

In [None]:
model=base_model #init model is base
base_dir='/srv/chawak/planning-with-llms/results/SFT'
model_path=base_dir+f'/training/training_21-05/checkpoint-8172'
peft_model=PeftModel.from_pretrained(model,model_path,is_trainable=False,adapter_name="default")
print(f'\n\n++++++++ Loading model from: {model_path}')

In [None]:
prompt=f'''A conversation between User and Assistant. The user asks a question, and the Assistant solves it.
The assistant first thinks about the reasoning process in the mind and then provides the user with the answer.
The reasoning process and answer are enclosed within <think> </think> and <answer> </answer> tags, respectively, i.e., <think> reasoning process here </think> <answer> answer here </answer>.
User:{question}'''

In [None]:
tokenized_input, processor=llm_utils.get_tokenized_input(prompt=prompt,model=peft_model)

In [None]:
r= llm_utils.query_local_model(tokenized_input=tokenized_input,processor=processor,model=model,temperature=0.1)

In [None]:
print(r)

In [None]:
#manual mode
with torch.no_grad():
    outputs=model.generate(
        **inputs,
        max_new_tokens=256,
        temperature=0,
    )
print(f'Response from LLM: {tokenizer.decode(outputs[0][input_len:],skip_special_tokens=True)}')

In [None]:
question='''I am playing with a set of blocks where I need to arrange the blocks into stacks
        Here are the actions I can do: Pick up a block, Unstack a block from on top of another block, Put down a block, Stack a block on top of another block.
        I have the following restrictions on my actions:
        I can only pick up or unstack one block at a time
        I can only pick up or unstack a block if my hand is empty
        I can only pick up a block if the block is on the table and the block is clear
        A block is clear if the block has no other blocks on top of it and if the block is not picked up
        I can only unstack a block from on top of another block if the block I am unstacking was really on top of the other block
        I can only unstack a block from on top of another block if the block I am unstacking is clear
        Once I pick up or unstack a block, I am holding the block
        I can only put down a block that I am holding
        I can only stack a block on top of another block if I am holding the block being stacked
        I can only stack a block on top of another block if the block onto which I am stacking the block is clear
        Once I put down or stack a block, my hand becomes empty
        Once you stack a block on top of a second block, the second block is no longer clear

[STATEMENT]
As initial conditions I have that, the green block is clear,  the hand is empty, the red block is on the table, the pink block is on top of the red block, the green block is on top of the pink block.
My goal is to have that  the green block is on the table, the red block is on top of the green block, the pink block is on top of the red block.

My plan is as follows:

[PLAN]
unstack the green block from on top of the pink block
put down the green block
unstack the pink block from on top of the red block
put down the pink block
pick up the red block
stack the red block on top of the green block
pick up the pink block
stack the pink block on top of the red block
[PLAN END]

[STATEMENT]
As initial conditions I have that, the brown block is clear,  the hand is empty, the violet block is on the table, the teal block is on top of the violet block, the brown block is on top of the teal block.
My goal is to have that  the violet block is on the table, the teal block is on the table, the brown block is on the table.

My plan is as follows: 

[PLAN]
Answer within [PLAN] [PLAN END] tags.
'''

In [None]:
ds

In [None]:
ds['input_ids'][0]

In [None]:
ds['labels'][0]

In [None]:
de=tokenizer.decode([235309,
  42446,
  16960,
  235307])
print(de)

In [None]:
from datasets import Features,Sequence, Value
import sys
print(sys.executable)

In [None]:
n=4
split='val'

In [None]:
#load the dataset
import numpy as np

#dataset_path=f'../data/{n}_blocks/SFT_full_{split}_{n}_blocks'
dataset_path=f'../data/{n}_blocks/SFT_{split}_{n}_blocks_fullPlan'
data=load_dataset("csv",data_files={split:dataset_path})
data=data.remove_columns(['Unnamed: 0', 'init', 'goal', 'demo_init', 'demo_goal', 'demo_plan'])
print(f'Loaded HuggingFace dataset : {data}')

#helper function for tokenizing
def tokenize_and_mask_function(examples):
    #concatinate prompt and gold plan within our template
    merged_inputs=[f"{p[:-2]}{g[6:]}" for p,g in zip(examples['prompt'],examples['gold_plan'])]

    #tokenize concatinated input
    tokenized=tokenizer(
        merged_inputs,
        truncation=True,
        padding='max_length',
        padding_side='right',
        max_length=1200
    )

    #tokenize ONLY the prompts and get their lengths
    tokenized_prompts=tokenizer(
        examples['prompt'],
        truncation=False
    )

    #print(f'Tokenized prompts: {tokenized_prompts}')
    prompt_lens = [len(ptoken) for ptoken in tokenized_prompts['input_ids']]
    #print("Lengths of tokenized prompts", prompt_lens)
    
    #estimating input token sequence lengths
    merged_input_lens=[len(merged) for merged in tokenized['input_ids']]
    sorted_lens=sorted(merged_input_lens, reverse=True)
    #print('Highest lengths of merged-input token sequence:',sorted_lens[:5])
    #print("Number of prompts tokenized",len(prompt_lens))
    
    #masking prompt tokens for the labels 
    labels=[]
    for input_ids, prompt_length in zip(tokenized['input_ids'],prompt_lens):
        label=input_ids.copy()
        #mask prompt tokens as -100 & adjustment for prompt template
        label[:prompt_length-4]=[-100]*prompt_length
        label=label[:-4]
        labels.append(label)

    tokenized['labels']=labels

    tokenized['input_ids'] = [np.array(ids, dtype=np.int64) for ids in tokenized['input_ids']]
    tokenized['labels']=[np.array(labels, dtype=np.int64) for labels in tokenized['labels']]
    return tokenized

#map tokenizer function to our dataset
tokenized_data=data.map(tokenize_and_mask_function,batched=True)

In [None]:
tokenized_data

In [None]:
new_features = Features({
    "prompt": Value("string"),
    "gold_plan": Value("string"),
    "input_ids": Sequence(Value("int64")),
    "attention_mask": Sequence(Value("int64")),
    "labels": Sequence(Value("int64")),
})

tokenized_data = tokenized_data.cast(new_features)

In [None]:
#sanity checks on tokenized data
print(f'Data dictonary after tokenization: {tokenized_data}')
#decoding for merged-text
encoded_text=tokenized_data[split][0]['input_ids']
print(f'Length of encoded merged-text : {len(encoded_text)}')
decoded_text=tokenizer.decode(encoded_text, skip_special_tokens=False)
print(f'Length of decoded merged-text : {len(encoded_text)}')
print(f'Decoded merged-text: {decoded_text}')
#deocding for lables
encoded_text=tokenized_data[split][0]['labels']
print(f'Length of encoded labels : {len(encoded_text)}')
decoded_text=tokenizer.decode(
    [token_id for token_id in encoded_text if token_id!=-100],
    skip_special_tokens=True)
print(f'Decoded labels: {decoded_text}')


In [None]:
tokenized_data.set_format(type="torch", columns=["input_ids", "attention_mask", "labels"])

#save to file
tokenized_data.save_to_disk(f"/srv/chawak/planning-with-llms/data/{n}_blocks/tokenized_dataset")


In [None]:
type(tokenized_data['train'][0]['input_ids'])

In [None]:
from datasets import load_from_disk
import llm_utils

In [None]:
model,tokenizer=llm_utils.get_model_tokenizer()

In [None]:
n=3
split='val'
data_path=f'../data/{n}_blocks/tokenized_dataset/{split}'
train_data=load_from_disk(data_path)
train_data

In [None]:
print(tokenizer.decode(train_data[0]['input_ids']))

In [None]:
prompt='''I am playing with a set of blocks where I need to arrange the blocks into stacks
        Here are the actions I can do: Pick up a block, Unstack a block from on top of another block, Put down a block, Stack a block on top of another block.
        I have the following restrictions on my actions:
        I can only pick up or unstack one block at a time
        I can only pick up or unstack a block if my hand is empty
        I can only pick up a block if the block is on the table and the block is clear
        A block is clear if the block has no other blocks on top of it and if the block is not picked up
        I can only unstack a block from on top of another block if the block I am unstacking was really on top of the other block
        I can only unstack a block from on top of another block if the block I am unstacking is clear
        Once I pick up or unstack a block, I am holding the block
        I can only put down a block that I am holding
        I can only stack a block on top of another block if I am holding the block being stacked
        I can only stack a block on top of another block if the block onto which I am stacking the block is clear
        Once I put down or stack a block, my hand becomes empty
        Once you stack a block on top of a second block, the second block is no longer clear

[STATEMENT]
As initial conditions I have that, the brown block is clear, the teal block is clear,  the hand is empty, the violet block is on the table, the brown block is on top of the violet block, the teal block is on the table.
My goal is to have that  the brown block is on the table, the violet block is on top of the brown block, the teal block is on top of the violet block.

My plan is as follows:

[PLAN]
unstack the brown block from on top of the violet block
put down the brown block
pick up the violet block
stack the violet block on top of the brown block
pick up the teal block
stack the teal block on top of the violet block
[PLAN END]

[STATEMENT]
As initial conditions I have that, the green block is clear,  the hand is empty, the pink block is on the table, the red block is on top of the pink block, the green block is on top of the red block.
My goal is to have that  the pink block is on the table, the red block is on top of the pink block, the green block is on the table.

My plan is as follows: 
[PLAN]

Strategise at each step before you choose an action, then form a final plan. Answer within the [PLAN] [PLAN END] tags.
'''

In [None]:
input_ids=train_data[0]['input_ids']

In [None]:
input_ids = input_ids.unsqueeze(0)

In [None]:
inputs = {'input_ids': input_ids}
response=model.generate(**inputs, max_new_tokens=32)

In [None]:
input_ids.shape

In [None]:
os.environ["CUDA_VISIBLE_DEVICES"] = "1"
torch.cuda.empty_cache

In [None]:
print(decoded)

In [None]:
n=10
for i in range(n):
    print(i)

In [None]:
#load evaluation dataset
import pandas as pd

n=3
split='train'
data_path=f'../data/{n}_blocks/SFT_{split}_{n}_blocks_fullPlan'
eval_data=pd.read_csv(data_path)
eval_data=eval_data.drop(columns=['Unnamed: 0'])
eval_data=eval_data.iloc[0]


In [None]:
eval_data

### SCRAP ###

In [None]:
prompt='''A conversation between User and Assistant. The user asks a question, and the Assistant solves it.
The assistant first thinks about the reasoning process in the mind and then provides the user with the answer.
The reasoning process and answer are enclosed within <think> </think> and <answer> </answer> tags, respectively, i.e., <think> reasoning process here </think> <answer> answer here </answer>.'''

In [None]:
model,tokenizer=llm_utils.get_model_tokenizer()

In [None]:
#tags="Answer within the [PLAN] [PLAN END] tags."
response=infer(model=peft_model,prompt=prompt,temp=0.1)

In [None]:
import llm_utils

In [None]:
plan='''[PLAN]
1. Unstack brown from violet.
2. Put down brown.
3. Unstack violet from teal.
4. Put down violet.
5. Pick up teal.
6. Stack teal on brown.
[PLAN END]'''

In [None]:
parsed=llm_utils.parse_action_tuples(plan)

In [None]:
parsed

In [None]:
for model_it in range(9,10):
    print(model_it)

In [None]:
ds= llm_utils.load_tokenized_data(3)

In [None]:
ds

In [None]:
ds[0]

In [None]:
print(tokenizer.decode(ds[0]['input_ids'][0]))

In [None]:
import pandas
train=pandas.read_csv('/srv/chawak/planning-with-llms/data/4_blocks/SFT_train_4_blocks_fullPlan')
contam_test=pandas.read_csv('/srv/chawak/planning-with-llms/data/4_blocks/SFT_contaminated_test_4_blocks_fullPlan')

In [None]:
train.drop(columns=['Unnamed: 0'],inplace=True)
contam_test.drop(columns=['Unnamed: 0'],inplace=True)

In [None]:
train

In [None]:
contam_test

In [None]:
merged=pandas.concat([train,contam_test],ignore_index=True)

In [None]:
merged

In [None]:
merged.to_csv('/srv/chawak/planning-with-llms/data/4_blocks/SFT_full_train_4_blocks')