In [None]:
import torch
from huggingface_hub import login
# Load model directly
from transformers import AutoTokenizer, AutoModelForCausalLM
import os

hf_token = os.getenv('HF_TOKEN')

login(token=hf_token)

device = torch.device("cuda" if torch.cuda.is_available() else "auto")
print(device)
tokenizer = AutoTokenizer.from_pretrained("meta-llama/Llama-3.2-1B")
model = AutoModelForCausalLM.from_pretrained("meta-llama/Llama-3.2-1B").to(device)


In [None]:
torch.cuda.is_available()

In [None]:
import pandas as pd
import os
from datasets import load_dataset, Dataset

os.environ["HF_HOME"] = "G:\\HuggingFace"

# Step 1: Load the dataset
csv_path = r'./minigridfinetune.csv'
data = pd.read_csv(csv_path)
dataset = Dataset.from_pandas(data)

In [None]:
def tokenize_function(example):
    text = (
        f"Observation: {example['initial_observation']}. "
        f"Intrinsic Reward: {example['intrinsic_reward']}. "
        f"Reward: {example['reward']}."
    )
    tokenizer.truncation_side = "left"
    return tokenizer(text, truncation=True, return_tensors="np")


if tokenizer.pad_token is None:
    tokenizer.add_special_tokens({'pad_token': '[PAD]'})
    model.resize_token_embeddings(len(tokenizer))


# Tokenize the columns and ensure consistent length
def tokenize_column(column):
    return column.apply(
        lambda x: tokenizer(x, return_tensors='pt', padding='max_length', truncation=True,
                            max_length=512).input_ids.squeeze(0).tolist()
    )


# dataset['initial_observation'] = tokenize_column(dataset['initial_observation'].astype(str))
# dataset['intrinsic_reward'] = tokenize_column(dataset['intrinsic_reward'].astype(str))
# dataset['action'] = tokenize_column(dataset['action'].astype(str))
# dataset['reward'] = tokenize_column(dataset['reward'].astype(str))


# Map the dataset to create input and label columns
def create_input_and_label(example):
    input_text = f"Observation: {example['initial_observation']} Intrinsic Reward: {example['intrinsic_reward']} Reward: {example['reward']}."
    target_text = f"Action: {example['action']}"
    return {'input_text': input_text, 'target_text': target_text}


dataset = dataset.map(create_input_and_label)


# Tokenize the input and target texts
def tokenize_function(example):
    input_encoding = tokenizer(example['input_text'], truncation=True, padding='max_length', max_length=512)
    target_encoding = tokenizer(example['target_text'], truncation=True, padding='max_length', max_length=512)
    return {
        'input_ids': input_encoding['input_ids'],
        'attention_mask': input_encoding['attention_mask'],
        'labels': target_encoding['input_ids']
    }


# Remove only the columns that exist in the dataset
columns_to_remove = ['timestamp', 'initial_observation', 'intrinsic_reward', 'reward', 'action', 'input_text',
                     'target_text']
tokenized_dataset = dataset.map(tokenize_function, batched=True, remove_columns=columns_to_remove)


In [None]:
from transformers import TrainingArguments, Trainer
from transformers import DataCollatorForSeq2Seq

training_args = TrainingArguments(
    output_dir=r"G:\HuggingFace\fine_tuned_model",
    overwrite_output_dir=True,
    num_train_epochs=3,
    per_device_train_batch_size=1,
    per_device_eval_batch_size=1,
    gradient_accumulation_steps=8,
    evaluation_strategy="steps",
    eval_steps=500,
    save_steps=500,
    logging_steps=100,
    learning_rate=5e-5,
    fp16=True,
    remove_unused_columns=False,
    report_to="none",
)

data_collator = DataCollatorForSeq2Seq(tokenizer, model=model)

train_test_split = tokenized_dataset.train_test_split(test_size=0.1)
train_dataset = train_test_split['train']
eval_dataset = train_test_split['test']

# Initialize the Trainer with the correct datasets
trainer = Trainer(
    model=model,
    args=training_args,
    train_dataset=train_dataset,
    eval_dataset=eval_dataset,
    tokenizer=tokenizer,
    data_collator=data_collator,
)

trainer.train()