In [1]:
from transformers import AutoTokenizer, AutoModelForCausalLM, Gemma3ForCausalLM
import torch
from peft import PeftModel, PeftConfig
from datasets import load_dataset
import llm_utils

Using a slow image processor as `use_fast` is unset and a slow processor was saved with this model. `use_fast=True` will be the default behavior in v4.52, even if the model was saved with a slow processor. This will result in minor differences in outputs. You'll still be able to use a slow processor with `use_fast=False`.


In [2]:
from huggingface_hub import login
login(token="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")

In [3]:
from transformers import AutoTokenizer, AutoModelForCausalLM
import os
os.environ["CUDA_VISIBLE_DEVICES"] = "1"
name='google/gemma-3-12b-it'
cache_dir='/home/chawak/huggingface'

In [4]:

torch.cuda.ipc_collect()
torch.cuda.empty_cache()

In [5]:
model,tokenizer=llm_utils.get_model_tokenizer()

Loading checkpoint shards:   0%|          | 0/5 [00:00<?, ?it/s]

### Step 1. Tokenize the dataset ###

In [6]:
split='val'
n=4

#####  1-1 Gemma-3 tokenizing #####

In [7]:
#load the dataset
import numpy as np

#dataset_path=f'../data/{n}_blocks/SFT_full_{split}_{n}_blocks'
dataset_path=f'../data/{n}_blocks/SFT_{split}_{n}_blocks_fullPlan'
data=load_dataset("csv",data_files={split:dataset_path})
data=data.remove_columns(['Unnamed: 0', 'init', 'goal', 'demo_init', 'demo_goal', 'demo_plan'])
print(f'Loaded HuggingFace dataset : {data}')


Loaded HuggingFace dataset : DatasetDict({
    val: Dataset({
        features: ['prompt', 'gold_plan'],
        num_rows: 483
    })
})


In [8]:
#model,tokenizer=llm_utils.get_model_tokenizer()

In [8]:
print("EOS token:", tokenizer.eos_token)
print("EOS token ID:", tokenizer.eos_token_id)

EOS token: <eos>
EOS token ID: 1


In [35]:
# import pandas as pd
# train_text={'prompt':['I am cat'],
#             'gold_plan':['I eat fish']}
# val_text={'prompt':['I am dog'],
#           'gold_plan':['I eat chicken']}
# train_df=pd.DataFrame(data=train_text)
# val_df=pd.DataFrame(data=val_text)
# train_df.to_csv('../data/toy_train')
# val_df.to_csv('../data/toy_val')

In [53]:
#load the TOY dataset
# import numpy as np
# split='val'
# #dataset_path=f'../data/{n}_blocks/SFT_full_{split}_{n}_blocks'
# dataset_path=f'../data/toy_{split}'
# data=load_dataset("csv",data_files={split:dataset_path})
# data=data.remove_columns(['Unnamed: 0'])
# print(f'Loaded HuggingFace dataset : {data}')

Loaded HuggingFace dataset : DatasetDict({
    val: Dataset({
        features: ['prompt', 'gold_plan'],
        num_rows: 1
    })
})


In [9]:

#helper function for tokenizing
def tokenize_and_mask_function(examples):
    #concatinate prompt and gold plan within our template
    merged_inputs=[f"{p[:-2]}{g[6:]}" for p,g in zip(examples['prompt'],examples['gold_plan'])]

    #tokenize concatinated input
    tokenized=tokenizer(
        merged_inputs,
        truncation=True,
        padding='max_length',
        padding_side='right',
        max_length=1200
    )

    #tokenize ONLY the prompts and get their lengths
    tokenized_prompts=tokenizer(
        examples['prompt'],
        truncation=False
    )

    #print(f'Tokenized prompts: {tokenized_prompts}')
    prompt_lens = [len(ptoken) for ptoken in tokenized_prompts['input_ids']]
    #print("Lengths of tokenized prompts", prompt_lens)
    
    #estimating input token sequence lengths
    merged_input_lens=[len(merged) for merged in tokenized['input_ids']]
    sorted_lens=sorted(merged_input_lens, reverse=True)
    #print('Highest lengths of merged-input token sequence:',sorted_lens[:5])
    #print("Number of prompts tokenized",len(prompt_lens))
    
    #masking prompt tokens for the labels 
    labels=[]
    for input_ids, prompt_length in zip(tokenized['input_ids'],prompt_lens):
        label=input_ids.copy()
        #mask prompt tokens as -100 & adjustment for prompt template
        label[:prompt_length-4]=[-100]*prompt_length
        label=label[:-4]
        labels.append(label)

    tokenized['labels']=labels

    tokenized['input_ids'] = [np.array(ids, dtype=np.int64) for ids in tokenized['input_ids']]
    tokenized['labels']=[np.array(labels, dtype=np.int64) for labels in tokenized['labels']]
    return tokenized


In [10]:
#map tokenizer function to our dataset
tokenized_data=data.map(tokenize_and_mask_function,batched=True)

In [11]:
tokenized_data

DatasetDict({
    val: Dataset({
        features: ['prompt', 'gold_plan', 'input_ids', 'attention_mask', 'labels'],
        num_rows: 483
    })
})

In [31]:
# from datasets import load_dataset
# dataset_path=f'../data/{n}_blocks/SFT_{split}_{n}_blocks_fullPlan'
# data=load_dataset("csv",data_files={split:dataset_path})

In [32]:
# path=f'../data/{n}_blocks/SFT_{split}_{n}_blocks_fullPlan'
# data=pd.read_csv(path)
# data=data.drop(columns=['Unnamed: 0', 'init', 'goal', 'demo_init', 'demo_goal', 'demo_plan',])
# data

In [33]:
# path=f'../data/{n}_blocks/SFT_contaminated_test_{n}_blocks_fullPlan'
# cont=pd.read_csv(path)
# cont=cont.drop(columns=['Unnamed: 0', 'init', 'goal', 'demo_init', 'demo_goal', 'demo_plan',])
# cont

In [34]:
# train=pd.concat([data,cont])

In [35]:
# train.to_csv(f'../data/{n}_blocks/SFT_full_{split}_{n}_blocks')

In [36]:
# cont_data=cont_data.remove_columns(['Unnamed: 0', 'init', 'goal', 'demo_init', 'demo_goal', 'demo_plan',])
# cont_data

In [12]:
encoded_ip=tokenized_data[split][0]['input_ids']
encoded_ip

[2,
 236777,
 1006,
 5662,
 607,
 496,
 1076,
 529,
 11802,
 1298,
 564,
 1202,
 531,
 28913,
 506,
 11802,
 1131,
 59556,
 107,
 144,
 8291,
 659,
 506,
 7419,
 564,
 740,
 776,
 236787,
 24067,
 872,
 496,
 3355,
 236764,
 1286,
 9607,
 496,
 3355,
 699,
 580,
 1903,
 529,
 2264,
 3355,
 236764,
 13499,
 1679,
 496,
 3355,
 236764,
 28566,
 496,
 3355,
 580,
 1903,
 529,
 2264,
 3355,
 236761,
 107,
 144,
 236777,
 735,
 506,
 2269,
 14523,
 580,
 1041,
 7419,
 236787,
 107,
 144,
 236777,
 740,
 1186,
 4351,
 872,
 653,
 723,
 9607,
 886,
 3355,
 657,
 496,
 990,
 107,
 144,
 236777,
 740,
 1186,
 4351,
 872,
 653,
 723,
 9607,
 496,
 3355,
 768,
 1041,
 1526,
 563,
 7738,
 107,
 144,
 236777,
 740,
 1186,
 4351,
 872,
 496,
 3355,
 768,
 506,
 3355,
 563,
 580,
 506,
 2633,
 532,
 506,
 3355,
 563,
 3582,
 107,
 144,
 236776,
 3355,
 563,
 3582,
 768,
 506,
 3355,
 815,
 951,
 1032,
 11802,
 580,
 1903,
 529,
 625,
 532,
 768,
 506,
 3355,
 563,
 711,
 14917,
 872,
 107,
 144,
 236

In [13]:
len(encoded_ip)

1200

In [14]:
decoded_ip=tokenizer.decode(encoded_ip)
print(decoded_ip)

<bos>I am playing with a set of blocks where I need to arrange the blocks into stacks
        Here are the actions I can do: Pick up a block, Unstack a block from on top of another block, Put down a block, Stack a block on top of another block.
        I have the following restrictions on my actions:
        I can only pick up or unstack one block at a time
        I can only pick up or unstack a block if my hand is empty
        I can only pick up a block if the block is on the table and the block is clear
        A block is clear if the block has no other blocks on top of it and if the block is not picked up
        I can only unstack a block from on top of another block if the block I am unstacking was really on top of the other block
        I can only unstack a block from on top of another block if the block I am unstacking is clear
        Once I pick up or unstack a block, I am holding the block
        I can only put down a block that I am holding
        I can only stack a blo

In [15]:
encoded_op=tokenized_data[split][0]['labels']
encoded_op

[-100,
 -100,
 -100,
 -100,
 -100,
 -100,
 -100,
 -100,
 -100,
 -100,
 -100,
 -100,
 -100,
 -100,
 -100,
 -100,
 -100,
 -100,
 -100,
 -100,
 -100,
 -100,
 -100,
 -100,
 -100,
 -100,
 -100,
 -100,
 -100,
 -100,
 -100,
 -100,
 -100,
 -100,
 -100,
 -100,
 -100,
 -100,
 -100,
 -100,
 -100,
 -100,
 -100,
 -100,
 -100,
 -100,
 -100,
 -100,
 -100,
 -100,
 -100,
 -100,
 -100,
 -100,
 -100,
 -100,
 -100,
 -100,
 -100,
 -100,
 -100,
 -100,
 -100,
 -100,
 -100,
 -100,
 -100,
 -100,
 -100,
 -100,
 -100,
 -100,
 -100,
 -100,
 -100,
 -100,
 -100,
 -100,
 -100,
 -100,
 -100,
 -100,
 -100,
 -100,
 -100,
 -100,
 -100,
 -100,
 -100,
 -100,
 -100,
 -100,
 -100,
 -100,
 -100,
 -100,
 -100,
 -100,
 -100,
 -100,
 -100,
 -100,
 -100,
 -100,
 -100,
 -100,
 -100,
 -100,
 -100,
 -100,
 -100,
 -100,
 -100,
 -100,
 -100,
 -100,
 -100,
 -100,
 -100,
 -100,
 -100,
 -100,
 -100,
 -100,
 -100,
 -100,
 -100,
 -100,
 -100,
 -100,
 -100,
 -100,
 -100,
 -100,
 -100,
 -100,
 -100,
 -100,
 -100,
 -100,
 -100,
 -100,
 -100,

In [17]:
decoded_op=tokenizer.decode([token for token in encoded_op if token != -100])
print(decoded_op)

[PLAN]
unstack the teal block from on top of the purple block
put down the teal block
unstack the purple block from on top of the orange block
put down the purple block
unstack the orange block from on top of the white block
put down the orange block
pick up the teal block
stack the teal block on top of the orange block
pick up the white block
stack the white block on top of the teal block
[PLAN END]<pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><p

In [19]:
len(encoded_op)

1200

In [20]:
#tokenized_data.save_to_disk(f"/srv/chawak/planning-with-llms/data/toy_label_nopad")
tokenized_data.save_to_disk(f"/srv/chawak/planning-with-llms/data/{n}_blocks/tokenized_dataset")

Saving the dataset (0/1 shards):   0%|          | 0/483 [00:00<?, ? examples/s]

#####  1-2 Dummy data tokenizing #####

In [10]:
op=[2, 236777,   1006,   4799, 236777,   9039,  12480,      1,   -100, -100]
ip=[     2, 236777,   1006,   4799, 236777,   9039,  12480,      1,      0,
             0]
decoded_op=tokenizer.decode([token for token in op if token != -100])
print(f'Decoded labels : {decoded_op}')

Decoded labels : <bos>I am dogI eat chicken<eos>


In [39]:
#verifying encoded file storage
from datasets import Dataset
split='train'
n=3
# /srv/chawak/planning-with-llms/data/3_blocks/tokenized_dataset/test/
ds= Dataset.from_file(f'/srv/chawak/planning-with-llms/data/{n}_blocks/tokenized_dataset/{split}/data-00000-of-00001.arrow')
#ds= Dataset.from_file(f'/srv/chawak/planning-with-llms/data/toy_label_nopad/{split}/data-00000-of-00001.arrow')

In [40]:
encoded_input=ds[0]['input_ids']
encoded_input

[2,
 236777,
 1006,
 5662,
 607,
 496,
 1076,
 529,
 11802,
 1298,
 564,
 1202,
 531,
 28913,
 506,
 11802,
 1131,
 59556,
 107,
 144,
 8291,
 659,
 506,
 7419,
 564,
 740,
 776,
 236787,
 24067,
 872,
 496,
 3355,
 236764,
 1286,
 9607,
 496,
 3355,
 699,
 580,
 1903,
 529,
 2264,
 3355,
 236764,
 13499,
 1679,
 496,
 3355,
 236764,
 28566,
 496,
 3355,
 580,
 1903,
 529,
 2264,
 3355,
 236761,
 107,
 144,
 236777,
 735,
 506,
 2269,
 14523,
 580,
 1041,
 7419,
 236787,
 107,
 144,
 236777,
 740,
 1186,
 4351,
 872,
 653,
 723,
 9607,
 886,
 3355,
 657,
 496,
 990,
 107,
 144,
 236777,
 740,
 1186,
 4351,
 872,
 653,
 723,
 9607,
 496,
 3355,
 768,
 1041,
 1526,
 563,
 7738,
 107,
 144,
 236777,
 740,
 1186,
 4351,
 872,
 496,
 3355,
 768,
 506,
 3355,
 563,
 580,
 506,
 2633,
 532,
 506,
 3355,
 563,
 3582,
 107,
 144,
 236776,
 3355,
 563,
 3582,
 768,
 506,
 3355,
 815,
 951,
 1032,
 11802,
 580,
 1903,
 529,
 625,
 532,
 768,
 506,
 3355,
 563,
 711,
 14917,
 872,
 107,
 144,
 236

In [41]:
ds[0]

{'prompt': 'I am playing with a set of blocks where I need to arrange the blocks into stacks\n        Here are the actions I can do: Pick up a block, Unstack a block from on top of another block, Put down a block, Stack a block on top of another block.\n        I have the following restrictions on my actions:\n        I can only pick up or unstack one block at a time\n        I can only pick up or unstack a block if my hand is empty\n        I can only pick up a block if the block is on the table and the block is clear\n        A block is clear if the block has no other blocks on top of it and if the block is not picked up\n        I can only unstack a block from on top of another block if the block I am unstacking was really on top of the other block\n        I can only unstack a block from on top of another block if the block I am unstacking is clear\n        Once I pick up or unstack a block, I am holding the block\n        I can only put down a block that I am holding\n        I ca

In [42]:
len(encoded_input)

1200

In [43]:
encoded_labels=ds[0]['labels']
len(encoded_labels)

1200

In [44]:
decoded_ip=tokenizer.decode(encoded_input)
decoded_ip

'<bos>I am playing with a set of blocks where I need to arrange the blocks into stacks\n        Here are the actions I can do: Pick up a block, Unstack a block from on top of another block, Put down a block, Stack a block on top of another block.\n        I have the following restrictions on my actions:\n        I can only pick up or unstack one block at a time\n        I can only pick up or unstack a block if my hand is empty\n        I can only pick up a block if the block is on the table and the block is clear\n        A block is clear if the block has no other blocks on top of it and if the block is not picked up\n        I can only unstack a block from on top of another block if the block I am unstacking was really on top of the other block\n        I can only unstack a block from on top of another block if the block I am unstacking is clear\n        Once I pick up or unstack a block, I am holding the block\n        I can only put down a block that I am holding\n        I can only

In [45]:
decoded_op=tokenizer.decode([token for token in encoded_labels if token != -100])
decoded_op

'[PLAN]\nunstack the green block from on top of the red block\nput down the green block\n[PLAN END]<eos>'

In [26]:
for name in model.named_modules():
    print(name)

('', Gemma3ForCausalLM(
  (model): Gemma3TextModel(
    (embed_tokens): Gemma3TextScaledWordEmbedding(262208, 3840, padding_idx=0)
    (layers): ModuleList(
      (0-47): 48 x Gemma3DecoderLayer(
        (self_attn): Gemma3Attention(
          (q_proj): Linear(in_features=3840, out_features=4096, bias=False)
          (k_proj): Linear(in_features=3840, out_features=2048, bias=False)
          (v_proj): Linear(in_features=3840, out_features=2048, bias=False)
          (o_proj): Linear(in_features=4096, out_features=3840, bias=False)
          (q_norm): Gemma3RMSNorm((256,), eps=1e-06)
          (k_norm): Gemma3RMSNorm((256,), eps=1e-06)
        )
        (mlp): Gemma3MLP(
          (gate_proj): Linear(in_features=3840, out_features=15360, bias=False)
          (up_proj): Linear(in_features=3840, out_features=15360, bias=False)
          (down_proj): Linear(in_features=15360, out_features=3840, bias=False)
          (act_fn): PytorchGELUTanh()
        )
        (input_layernorm): Gemma3R

: 

### Step 2. Define hyperparamters ###

In [21]:
import yaml
from transformers import TrainingArguments, AdamW
from trl import SFTConfig, SFTTrainer
from datasets import load_dataset, DatasetDict, load_from_disk
from peft import LoraConfig, get_peft_model

In [1]:
#load dataset
n=3
split='train'
data_path=f'../data/{n}_blocks/tokenized_dataset/{split}'
train_data=load_from_disk(data_path)
train_data

NameError: name 'load_from_disk' is not defined

In [23]:
#load dataset
n=3
split='val'
data_path=f'../data/{n}_blocks/tokenized_dataset/{split}'
eval_data=load_from_disk(data_path)
eval_data

Dataset({
    features: ['prompt', 'gold_plan', 'input_ids', 'attention_mask', 'labels'],
    num_rows: 15
})

In [None]:
cache_dir='../results/SFT'

In [24]:
#load model and tokenizer
tokenizer=AutoTokenizer.from_pretrained(cfg['model']['name'],cache_dir=cache_dir)
model=AutoModelForCausalLM.from_pretrained(
    cfg['model']['name'],
    cache_dir=cache_dir,
    device_map=cfg['model']['device_map'],
    torch_dtype=torch.bfloat16
)

NameError: name 'cfg' is not defined

In [None]:
#dataset=Dataset.from_file(data_path)

In [25]:
#get config file
with open("config.yaml", "r") as f:
    cfg=yaml.safe_load(f)

In [66]:
#LORA config 
peft_args=LoraConfig(
    r=cfg['peft']['r'],
    lora_alpha=cfg['peft']['lora_alpha'],
    lora_dropout=cfg['peft']['lora_dropout'],
    task_type=cfg['peft']['task_type']
)

In [67]:
#get LORA model
lora_model=get_peft_model(model,peft_args)

In [68]:
lora_layers=lora_model.parameters()

In [69]:
lora_layers

<generator object Module.parameters at 0x7fc3a9152f80>

In [70]:
training_args=SFTConfig(
    output_dir=cfg['training']['output_dir']+f'/{n}_blocks',
    num_train_epochs=cfg['training']['num_train_epochs'],
    per_device_train_batch_size= cfg['training']['per_device_train_batch_size'],
    gradient_accumulation_steps=cfg['training']['gradient_accumulation_steps'],
    learning_rate=cfg['training']['learning_rate'],
    weight_decay=cfg['training']['weight_decay'],
    warmup_ratio=cfg['training']['warmup_ratio'],
    #adam_epsilon=cfg['training']['adam_epsilon'],
    #optim=cfg['training']['optim'],
    logging_steps=cfg['training']['logging_steps'],
    save_steps=cfg['training']['save_steps'],
    eval_steps=cfg['training']['eval_steps'],
    evaluation_strategy=cfg["training"]["evaluation_strategy"],
    save_strategy=cfg["training"]["save_strategy"],
    fp16=cfg["training"]["fp16"],
    bf16=cfg["training"]["bf16"],
    report_to=cfg["training"]["report_to"]
)



In [71]:
#optimizer args dictionary
optimizer = {
    'params': lora_layers,
    'lr': float(cfg['training']['learning_rate']),
}

In [72]:
type(optimizer)

dict

In [73]:
trainer=SFTTrainer(
    model=lora_model,
    args=training_args,
    train_dataset=train_data,
    eval_dataset=eval_data,
    peft_config=peft_args,
    optimizer_cls_and_kwargs=(AdamW,optimizer)
)

In [74]:
trainer.train()



KeyboardInterrupt: 

In [None]:
metrics = trainer.evaluate()
print(metrics)

### Step 3 : Infer on the trained model ###

In [1]:
cache_dir='./results/SFT/3_blocks/checkpoint-12/'

In [7]:
peft_config=PeftConfig.from_pretrained(cache_dir)
base_model = AutoModelForCausalLM.from_pretrained(
    peft_config.base_model_name_or_path,
    torch_dtype=torch.float16,
    device_map="cuda:3"
)

Loading checkpoint shards:   0%|          | 0/4 [00:00<?, ?it/s]

In [8]:
model=PeftModel.from_pretrained(base_model,cache_dir)
model.eval()

PeftModelForCausalLM(
  (base_model): LoraModel(
    (model): Gemma2ForCausalLM(
      (model): Gemma2Model(
        (embed_tokens): Embedding(256000, 3584, padding_idx=0)
        (layers): ModuleList(
          (0-41): 42 x Gemma2DecoderLayer(
            (self_attn): Gemma2Attention(
              (q_proj): lora.Linear(
                (base_layer): Linear(in_features=3584, out_features=4096, bias=False)
                (lora_dropout): ModuleDict(
                  (default): Dropout(p=0.05, inplace=False)
                )
                (lora_A): ModuleDict(
                  (default): Linear(in_features=3584, out_features=16, bias=False)
                )
                (lora_B): ModuleDict(
                  (default): Linear(in_features=16, out_features=4096, bias=False)
                )
                (lora_embedding_A): ParameterDict()
                (lora_embedding_B): ParameterDict()
                (lora_magnitude_vector): ModuleDict()
              )
              (k

In [9]:
tokenizer=AutoTokenizer.from_pretrained(peft_config.base_model_name_or_path)

In [12]:
inputs=tokenizer(prompt,return_tensors='pt').to(model.device)

In [13]:
input_len=inputs['input_ids'].shape[1]

In [14]:
inputs=tokenizer(prompt, return_tensors='pt').to('cuda:3')
#print(input_id.device_map)
#print(model.device_map)

response=model.generate(**inputs, max_new_tokens=32)
#print(f'Response from LLM: {tokenizer.decode(response[0][input_len:],skip_special_tokens=True)}')

The 'batch_size' attribute of HybridCache is deprecated and will be removed in v4.49. Use the more precisely named 'self.max_batch_size' attribute instead.


In [15]:
with torch.no_grad():
    outputs=model.generate(
        **inputs,
        max_new_tokens=256,
        temperature=0,
    )
print(f'Response from LLM: {tokenizer.decode(outputs[0][input_len:],skip_special_tokens=True)}')



Response from LLM: [PLAN]
unstack the brown block from on top of the teal block
put down the brown block
unstack the teal block from on top of the violet block
put down the teal block
[PLAN END]






In [11]:
prompt='''I am playing with a set of blocks where I need to arrange the blocks into stacks
        Here are the actions I can do: Pick up a block, Unstack a block from on top of another block, Put down a block, Stack a block on top of another block.
        I have the following restrictions on my actions:
        I can only pick up or unstack one block at a time
        I can only pick up or unstack a block if my hand is empty
        I can only pick up a block if the block is on the table and the block is clear
        A block is clear if the block has no other blocks on top of it and if the block is not picked up
        I can only unstack a block from on top of another block if the block I am unstacking was really on top of the other block
        I can only unstack a block from on top of another block if the block I am unstacking is clear
        Once I pick up or unstack a block, I am holding the block
        I can only put down a block that I am holding
        I can only stack a block on top of another block if I am holding the block being stacked
        I can only stack a block on top of another block if the block onto which I am stacking the block is clear
        Once I put down or stack a block, my hand becomes empty
        Once you stack a block on top of a second block, the second block is no longer clear

[STATEMENT]
As initial conditions I have that, the green block is clear,  the hand is empty, the red block is on the table, the pink block is on top of the red block, the green block is on top of the pink block.
My goal is to have that  the green block is on the table, the red block is on top of the green block, the pink block is on top of the red block.

My plan is as follows:

[PLAN]
unstack the green block from on top of the pink block
put down the green block
unstack the pink block from on top of the red block
put down the pink block
pick up the red block
stack the red block on top of the green block
pick up the pink block
stack the pink block on top of the red block
[PLAN END]

[STATEMENT]
As initial conditions I have that, the brown block is clear,  the hand is empty, the violet block is on the table, the teal block is on top of the violet block, the brown block is on top of the teal block.
My goal is to have that  the violet block is on the table, the teal block is on the table, the brown block is on the table.

My plan is as follows: 

[PLAN]
Answer within [PLAN] [PLAN END] tags.
'''

In [7]:
ds

Dataset({
    features: ['prompt', 'gold_plan', 'input_ids', 'attention_mask', 'labels'],
    num_rows: 99
})

In [13]:
ds['input_ids'][0]

[2,
 235285,
 1144,
 6155,
 675,
 476,
 1142,
 576,
 13854,
 1570,
 590,
 1476,
 577,
 9900,
 573,
 13854,
 1280,
 63297,
 108,
 145,
 4858,
 708,
 573,
 8737,
 590,
 798,
 749,
 235292,
 17350,
 908,
 476,
 3963,
 235269,
 2132,
 8388,
 476,
 3963,
 774,
 611,
 2267,
 576,
 2550,
 3963,
 235269,
 13298,
 1706,
 476,
 3963,
 235269,
 23850,
 476,
 3963,
 611,
 2267,
 576,
 2550,
 3963,
 235265,
 108,
 145,
 235285,
 791,
 573,
 2412,
 16842,
 611,
 970,
 8737,
 235292,
 108,
 145,
 235285,
 798,
 1297,
 4788,
 908,
 689,
 748,
 8388,
 974,
 3963,
 696,
 476,
 1069,
 108,
 145,
 235285,
 798,
 1297,
 4788,
 908,
 689,
 748,
 8388,
 476,
 3963,
 1013,
 970,
 1634,
 603,
 8144,
 108,
 145,
 235285,
 798,
 1297,
 4788,
 908,
 476,
 3963,
 1013,
 573,
 3963,
 603,
 611,
 573,
 3037,
 578,
 573,
 3963,
 603,
 3110,
 108,
 145,
 235280,
 3963,
 603,
 3110,
 1013,
 573,
 3963,
 919,
 793,
 1156,
 13854,
 611,
 2267,
 576,
 665,
 578,
 1013,
 573,
 3963,
 603,
 780,
 15532,
 908,
 108,
 145,
 2

In [14]:
ds['labels'][0]

[-100,
 -100,
 -100,
 -100,
 -100,
 -100,
 -100,
 -100,
 -100,
 -100,
 -100,
 -100,
 -100,
 -100,
 -100,
 -100,
 -100,
 -100,
 -100,
 -100,
 -100,
 -100,
 -100,
 -100,
 -100,
 -100,
 -100,
 -100,
 -100,
 -100,
 -100,
 -100,
 -100,
 -100,
 -100,
 -100,
 -100,
 -100,
 -100,
 -100,
 -100,
 -100,
 -100,
 -100,
 -100,
 -100,
 -100,
 -100,
 -100,
 -100,
 -100,
 -100,
 -100,
 -100,
 -100,
 -100,
 -100,
 -100,
 -100,
 -100,
 -100,
 -100,
 -100,
 -100,
 -100,
 -100,
 -100,
 -100,
 -100,
 -100,
 -100,
 -100,
 -100,
 -100,
 -100,
 -100,
 -100,
 -100,
 -100,
 -100,
 -100,
 -100,
 -100,
 -100,
 -100,
 -100,
 -100,
 -100,
 -100,
 -100,
 -100,
 -100,
 -100,
 -100,
 -100,
 -100,
 -100,
 -100,
 -100,
 -100,
 -100,
 -100,
 -100,
 -100,
 -100,
 -100,
 -100,
 -100,
 -100,
 -100,
 -100,
 -100,
 -100,
 -100,
 -100,
 -100,
 -100,
 -100,
 -100,
 -100,
 -100,
 -100,
 -100,
 -100,
 -100,
 -100,
 -100,
 -100,
 -100,
 -100,
 -100,
 -100,
 -100,
 -100,
 -100,
 -100,
 -100,
 -100,
 -100,
 -100,
 -100,
 -100,
 -100,

In [12]:
de=tokenizer.decode([235309,
  42446,
  16960,
  235307])
print(de)

țiuniprocessor equity lăn


In [57]:
from datasets import Features,Sequence, Value
import sys
print(sys.executable)

/srv/chawak/envs/planwllm/bin/python


In [7]:
n=4
split='val'

In [13]:
#load the dataset
import numpy as np

#dataset_path=f'../data/{n}_blocks/SFT_full_{split}_{n}_blocks'
dataset_path=f'../data/{n}_blocks/SFT_{split}_{n}_blocks_fullPlan'
data=load_dataset("csv",data_files={split:dataset_path})
data=data.remove_columns(['Unnamed: 0', 'init', 'goal', 'demo_init', 'demo_goal', 'demo_plan'])
print(f'Loaded HuggingFace dataset : {data}')

#helper function for tokenizing
def tokenize_and_mask_function(examples):
    #concatinate prompt and gold plan within our template
    merged_inputs=[f"{p[:-2]}{g[6:]}" for p,g in zip(examples['prompt'],examples['gold_plan'])]

    #tokenize concatinated input
    tokenized=tokenizer(
        merged_inputs,
        truncation=True,
        padding='max_length',
        padding_side='right',
        max_length=1200
    )

    #tokenize ONLY the prompts and get their lengths
    tokenized_prompts=tokenizer(
        examples['prompt'],
        truncation=False
    )

    #print(f'Tokenized prompts: {tokenized_prompts}')
    prompt_lens = [len(ptoken) for ptoken in tokenized_prompts['input_ids']]
    #print("Lengths of tokenized prompts", prompt_lens)
    
    #estimating input token sequence lengths
    merged_input_lens=[len(merged) for merged in tokenized['input_ids']]
    sorted_lens=sorted(merged_input_lens, reverse=True)
    #print('Highest lengths of merged-input token sequence:',sorted_lens[:5])
    #print("Number of prompts tokenized",len(prompt_lens))
    
    #masking prompt tokens for the labels 
    labels=[]
    for input_ids, prompt_length in zip(tokenized['input_ids'],prompt_lens):
        label=input_ids.copy()
        #mask prompt tokens as -100 & adjustment for prompt template
        label[:prompt_length-4]=[-100]*prompt_length
        label=label[:-4]
        labels.append(label)

    tokenized['labels']=labels

    tokenized['input_ids'] = [np.array(ids, dtype=np.int64) for ids in tokenized['input_ids']]
    tokenized['labels']=[np.array(labels, dtype=np.int64) for labels in tokenized['labels']]
    return tokenized

#map tokenizer function to our dataset
tokenized_data=data.map(tokenize_and_mask_function,batched=True)

Loaded HuggingFace dataset : DatasetDict({
    val: Dataset({
        features: ['init', 'goal', 'demo_init', 'demo_goal', 'demo_plan', 'prompt', 'gold_plan'],
        num_rows: 483
    })
})


Map:   0%|          | 0/483 [00:00<?, ? examples/s]

In [14]:
tokenized_data

DatasetDict({
    val: Dataset({
        features: ['init', 'goal', 'demo_init', 'demo_goal', 'demo_plan', 'prompt', 'gold_plan', 'input_ids', 'attention_mask', 'labels'],
        num_rows: 483
    })
})

In [65]:
new_features = Features({
    "prompt": Value("string"),
    "gold_plan": Value("string"),
    "input_ids": Sequence(Value("int64")),
    "attention_mask": Sequence(Value("int64")),
    "labels": Sequence(Value("int64")),
})

tokenized_data = tokenized_data.cast(new_features)

In [55]:
#sanity checks on tokenized data
print(f'Data dictonary after tokenization: {tokenized_data}')
#decoding for merged-text
encoded_text=tokenized_data[split][0]['input_ids']
print(f'Length of encoded merged-text : {len(encoded_text)}')
decoded_text=tokenizer.decode(encoded_text, skip_special_tokens=False)
print(f'Length of decoded merged-text : {len(encoded_text)}')
print(f'Decoded merged-text: {decoded_text}')
#deocding for lables
encoded_text=tokenized_data[split][0]['labels']
print(f'Length of encoded labels : {len(encoded_text)}')
decoded_text=tokenizer.decode(
    [token_id for token_id in encoded_text if token_id!=-100],
    skip_special_tokens=True)
print(f'Decoded labels: {decoded_text}')


Data dictonary after tokenization: DatasetDict({
    train: Dataset({
        features: ['prompt', 'gold_plan', 'input_ids', 'attention_mask', 'labels'],
        num_rows: 114
    })
})
Length of encoded merged-text : 1200
Length of decoded merged-text : 1200
Decoded merged-text: <bos>I am playing with a set of blocks where I need to arrange the blocks into stacks
        Here are the actions I can do: Pick up a block, Unstack a block from on top of another block, Put down a block, Stack a block on top of another block.
        I have the following restrictions on my actions:
        I can only pick up or unstack one block at a time
        I can only pick up or unstack a block if my hand is empty
        I can only pick up a block if the block is on the table and the block is clear
        A block is clear if the block has no other blocks on top of it and if the block is not picked up
        I can only unstack a block from on top of another block if the block I am unstacking was real

Saving the dataset (0/1 shards):   0%|          | 0/114 [00:00<?, ? examples/s]

In [66]:
tokenized_data.set_format(type="torch", columns=["input_ids", "attention_mask", "labels"])

#save to file
tokenized_data.save_to_disk(f"/srv/chawak/planning-with-llms/data/{n}_blocks/tokenized_dataset")


Saving the dataset (0/1 shards):   0%|          | 0/114 [00:00<?, ? examples/s]

In [61]:
type(tokenized_data['train'][0]['input_ids'])

torch.Tensor

In [5]:
from datasets import load_from_disk
import llm_utils

In [8]:
model,tokenizer=llm_utils.get_model_tokenizer()

Loading checkpoint shards:   0%|          | 0/5 [00:00<?, ?it/s]

In [3]:
n=3
split='val'
data_path=f'../data/{n}_blocks/tokenized_dataset/{split}'
train_data=load_from_disk(data_path)
train_data

Dataset({
    features: ['prompt', 'gold_plan', 'input_ids', 'attention_mask', 'labels'],
    num_rows: 114
})

In [12]:
print(tokenizer.decode(train_data[0]['input_ids']))

<bos>I am playing with a set of blocks where I need to arrange the blocks into stacks
        Here are the actions I can do: Pick up a block, Unstack a block from on top of another block, Put down a block, Stack a block on top of another block.
        I have the following restrictions on my actions:
        I can only pick up or unstack one block at a time
        I can only pick up or unstack a block if my hand is empty
        I can only pick up a block if the block is on the table and the block is clear
        A block is clear if the block has no other blocks on top of it and if the block is not picked up
        I can only unstack a block from on top of another block if the block I am unstacking was really on top of the other block
        I can only unstack a block from on top of another block if the block I am unstacking is clear
        Once I pick up or unstack a block, I am holding the block
        I can only put down a block that I am holding
        I can only stack a blo

In [9]:
prompt='''I am playing with a set of blocks where I need to arrange the blocks into stacks
        Here are the actions I can do: Pick up a block, Unstack a block from on top of another block, Put down a block, Stack a block on top of another block.
        I have the following restrictions on my actions:
        I can only pick up or unstack one block at a time
        I can only pick up or unstack a block if my hand is empty
        I can only pick up a block if the block is on the table and the block is clear
        A block is clear if the block has no other blocks on top of it and if the block is not picked up
        I can only unstack a block from on top of another block if the block I am unstacking was really on top of the other block
        I can only unstack a block from on top of another block if the block I am unstacking is clear
        Once I pick up or unstack a block, I am holding the block
        I can only put down a block that I am holding
        I can only stack a block on top of another block if I am holding the block being stacked
        I can only stack a block on top of another block if the block onto which I am stacking the block is clear
        Once I put down or stack a block, my hand becomes empty
        Once you stack a block on top of a second block, the second block is no longer clear

[STATEMENT]
As initial conditions I have that, the brown block is clear, the teal block is clear,  the hand is empty, the violet block is on the table, the brown block is on top of the violet block, the teal block is on the table.
My goal is to have that  the brown block is on the table, the violet block is on top of the brown block, the teal block is on top of the violet block.

My plan is as follows:

[PLAN]
unstack the brown block from on top of the violet block
put down the brown block
pick up the violet block
stack the violet block on top of the brown block
pick up the teal block
stack the teal block on top of the violet block
[PLAN END]

[STATEMENT]
As initial conditions I have that, the green block is clear,  the hand is empty, the pink block is on the table, the red block is on top of the pink block, the green block is on top of the red block.
My goal is to have that  the pink block is on the table, the red block is on top of the pink block, the green block is on the table.

My plan is as follows: 
[PLAN]

Strategise at each step before you choose an action, then form a final plan. Answer within the [PLAN] [PLAN END] tags.
'''

In [26]:
input_ids=train_data[0]['input_ids']

In [38]:
input_ids = input_ids.unsqueeze(0)

In [39]:
inputs = {'input_ids': input_ids}
response=model.generate(**inputs, max_new_tokens=32)



Unsupported: Unexpected type in sourceless builder transformers.models.gemma3.configuration_gemma3.Gemma3TextConfig

from user code:
   File "/srv/chawak/envs/planwllm/lib/python3.11/site-packages/accelerate/hooks.py", line 170, in new_forward
    output = module._old_forward(*args, **kwargs)
  File "/srv/chawak/envs/planwllm/lib/python3.11/site-packages/transformers/utils/generic.py", line 969, in wrapper
    output = func(self, *args, **kwargs)
  File "/srv/chawak/envs/planwllm/lib/python3.11/site-packages/transformers/models/gemma3/modeling_gemma3.py", line 1319, in forward
    causal_mask = self._update_causal_mask(
  File "/srv/chawak/envs/planwllm/lib/python3.11/site-packages/transformers/models/gemma3/modeling_gemma3.py", line 1114, in _update_causal_mask
    if self.config.text_config._attn_implementation == "flash_attention_2":
  File "/srv/chawak/envs/planwllm/lib/python3.11/site-packages/transformers/configuration_utils.py", line 211, in __getattribute__
    return super().__getattribute__(key)

Set TORCHDYNAMO_VERBOSE=1 for the internal stack trace (please do this especially if you're reporting a bug to PyTorch). For even more developer context, set TORCH_LOGS="+dynamo"


In [41]:
input_ids.shape

torch.Size([1, 1200])

In [4]:
os.environ["CUDA_VISIBLE_DEVICES"] = "1"
torch.cuda.empty_cache

<function torch.cuda.memory.empty_cache() -> None>

In [None]:
print(decoded)

In [1]:
n=10
for i in range(n):
    print(i)

0
1
2
3
4
5
6
7
8
9


In [2]:
#load evaluation dataset
import pandas as pd

n=3
split='train'
data_path=f'../data/{n}_blocks/SFT_{split}_{n}_blocks_fullPlan'
eval_data=pd.read_csv(data_path)
eval_data=eval_data.drop(columns=['Unnamed: 0'])
eval_data=eval_data.iloc[0]


In [3]:
eval_data

init                               (('pink', 'red', 'green'),)
goal                             (('pink', 'red'), ('green',))
demo_init    the brown block is clear, the teal block is cl...
demo_goal     the brown block is on the table, the violet b...
demo_plan    [PLAN]\nunstack the brown block from on top of...
prompt       I am playing with a set of blocks where I need...
gold_plan    [PLAN]\nunstack the green block from on top of...
Name: 0, dtype: object

### SCRAP ###

In [4]:
prompt='''
Whats the weather like today
'''

In [5]:
#tags="Answer within the [PLAN] [PLAN END] tags."
response=infer(model=peft_model,prompt=prompt,temp=0.1)

Prompt in the chat-template:<bos><start_of_turn>user
Whats the weather like today<end_of_turn>



.......Querying LLM for a plan......... iteration: #0




######################### Response from LLM:
Whats the weather like today
model
Okay, let's get you the weather! To give you the most accurate forecast, I need to know **your location**.

However, I can give you a general idea based on some major locations:

*   **New York City:**

After EXTRCAT PLAN ACTIONS: False


.......Querying LLM for a plan......... iteration: #1
######################### Response from LLM:
Whats the weather like today
model
Okay, let's get you the weather! To give you the most accurate forecast, I need to know **your location**.

However, I can give you a general idea based on some major locations:

*   **New York City:**

After EXTRCAT PLAN ACTIONS: False


.......Querying LLM for a plan......... iteration: #2
######################### Response from LLM:
Whats the weather like today
model
Okay, let's get you the weather! To give you the most accurate forecast, I need to know **your location**.

However, I can give you a general overview for some major US citie

In [1]:
import llm_utils

Using a slow image processor as `use_fast` is unset and a slow processor was saved with this model. `use_fast=True` will be the default behavior in v4.52, even if the model was saved with a slow processor. This will result in minor differences in outputs. You'll still be able to use a slow processor with `use_fast=False`.


In [2]:
plan='''[PLAN]
1. Unstack brown from violet.
2. Put down brown.
3. Unstack violet from teal.
4. Put down violet.
5. Pick up teal.
6. Stack teal on brown.
[PLAN END]'''

In [3]:
parsed=llm_utils.parse_action_tuples(plan)


After EXTRCAT PLAN ACTIONS: ['1. Unstack brown from violet.', '2. Put down brown.', '3. Unstack violet from teal.', '4. Put down violet.', '5. Pick up teal.', '6. Stack teal on brown.']


 Action:1. Unstack brown from violet. after seperators:['1', 'Unstack', 'brown', 'from', 'violet']


 Action: 1. Unstack brown from violet., predicate: 1, counter: 0


 Action: 1. Unstack brown from violet., predicate: unstack, counter: 1

For predicate: Unstack the result is : ('unstack', 'brown', 'violet')


 Action:2. Put down brown. after seperators:['2', 'Put', 'down', 'brown']


 Action: 2. Put down brown., predicate: 2, counter: 0


 Action: 2. Put down brown., predicate: put, counter: 1


 Action:3. Unstack violet from teal. after seperators:['3', 'Unstack', 'violet', 'from', 'teal']


 Action: 3. Unstack violet from teal., predicate: 3, counter: 0


 Action: 3. Unstack violet from teal., predicate: unstack, counter: 1

For predicate: Unstack the result is : ('unstack', 'violet', 'teal')


 A

In [4]:
parsed

[('unstack', 'brown', 'violet'),
 ('put-down', 'brown'),
 ('unstack', 'violet', 'teal'),
 ('put-down', 'violet'),
 ('pick-up', 'teal'),
 ('stack', 'teal', 'brown')]

In [1]:
for model_it in range(9,10):
    print(model_it)

9


In [19]:
ds= llm_utils.load_tokenized_data(3)

In [20]:
ds

(Dataset({
     features: ['prompt', 'gold_plan', 'input_ids', 'attention_mask', 'labels'],
     num_rows: 114
 }),
 Dataset({
     features: ['prompt', 'gold_plan', 'input_ids', 'attention_mask', 'labels'],
     num_rows: 15
 }))

In [21]:
ds[0]

Dataset({
    features: ['prompt', 'gold_plan', 'input_ids', 'attention_mask', 'labels'],
    num_rows: 114
})

In [25]:
print(tokenizer.decode(ds[0]['input_ids'][0]))

<bos>I am playing with a set of blocks where I need to arrange the blocks into stacks
        Here are the actions I can do: Pick up a block, Unstack a block from on top of another block, Put down a block, Stack a block on top of another block.
        I have the following restrictions on my actions:
        I can only pick up or unstack one block at a time
        I can only pick up or unstack a block if my hand is empty
        I can only pick up a block if the block is on the table and the block is clear
        A block is clear if the block has no other blocks on top of it and if the block is not picked up
        I can only unstack a block from on top of another block if the block I am unstacking was really on top of the other block
        I can only unstack a block from on top of another block if the block I am unstacking is clear
        Once I pick up or unstack a block, I am holding the block
        I can only put down a block that I am holding
        I can only stack a blo