In [10]:
import pandas as pd

# Load the CSV file
csv_path = r'./minigridfinetune.csv'
data = pd.read_csv(csv_path)

In [None]:
import os

os.environ["HF_HOME"] = "G:\\HuggingFace"

from huggingface_hub import login
from transformers import LlamaTokenizer, LlamaForCausalLM
import torch

hf_token = os.getenv('HF_TOKEN')

login(token=hf_token)

model_name = 'openlm-research/open_llama_3b'

tokenizer = LlamaTokenizer.from_pretrained(model_name)
tokenizer.add_special_tokens({'pad_token': '[PAD]'})
model = LlamaForCausalLM.from_pretrained(
    model_name, torch_dtype=torch.float16, device_map="auto",
    offload_folder=r"G:\HuggingFace\offload_folder"
)

In [None]:
# Convert DataFrame into Hugging Face Dataset
from datasets import Dataset

data['initial_observation'] = data['initial_observation'].astype(str)
data['intrinsic_reward'] = data['intrinsic_reward'].astype(str)
data['action'] = data['action'].astype(str)
data['reward'] = data['reward'].astype(str)

# Tokenize the 'initial_observation' and convert the tensors to lists
data['initial_observation'] = data['initial_observation'].apply(
    lambda x: tokenizer(x, return_tensors='pt', padding='max_length', truncation=True).input_ids.squeeze(0).tolist()
)

# Tokenize the 'intrinsic_reward' and convert the tensors to lists
data['intrinsic_reward'] = data['intrinsic_reward'].apply(
    lambda x: tokenizer(x, return_tensors='pt', padding='max_length', truncation=True).input_ids.squeeze(0).tolist()
)

# Tokenize the 'action' and convert the tensors to lists
data['action'] = data['action'].apply(
    lambda x: tokenizer(x, return_tensors='pt', padding='max_length', truncation=True).input_ids.squeeze(0).tolist()
)

# Tokenize the 'reward' and convert the tensors to lists
data['reward'] = data['reward'].apply(
    lambda x: tokenizer(x, return_tensors='pt', padding='max_length', truncation=True).input_ids.squeeze(0).tolist()
)

dataset = Dataset.from_pandas(data)


In [None]:
def create_input_and_label(example):
    # Combine fields into input text
    input_text = f"Observation: {example['initial_observation']}"

    # Target text is the action
    target_text = f"Action: {example['action']}"

    input_text += f" Intrinsic Reward: {example['intrinsic_reward']}, Reward: {example['reward']}."

    return {
        'input_text': input_text,
        'target_text': target_text
    }


dataset = dataset.map(create_input_and_label)


def tokenize_function(example):
    # Tokenize input_text and target_text
    input_encoding = tokenizer(example['input_text'], truncation=True)
    target_encoding = tokenizer(example['target_text'], truncation=True)

    # Prepare the labels
    labels = target_encoding['input_ids']

    return {
        'input_ids': input_encoding['input_ids'],
        'attention_mask': input_encoding['attention_mask'],
        'labels': labels
    }


tokenized_dataset = dataset.map(tokenize_function, batched=True, remove_columns=dataset.column_names)

In [None]:
from transformers import AutoModelForCausalLM, TrainingArguments, Trainer, DataCollatorForSeq2Seq

training_args = TrainingArguments(
    output_dir=r"G:\HuggingFace\fine_tuned_model",
    overwrite_output_dir=True,
    num_train_epochs=3,
    per_device_train_batch_size=1,
    per_device_eval_batch_size=1,
    gradient_accumulation_steps=8,
    evaluation_strategy="steps",
    eval_steps=500,
    save_steps=500,
    logging_steps=100,
    learning_rate=5e-5,
    fp16=True,
    remove_unused_columns=False,
)

# Use DataCollatorForSeq2Seq for sequence-to-sequence tasks
from transformers import DataCollatorForSeq2Seq

data_collator = DataCollatorForSeq2Seq(tokenizer, model=model)

trainer = Trainer(
    model=model,
    args=training_args,
    train_dataset=tokenized_dataset['train'],
    eval_dataset=tokenized_dataset['validation'],
    tokenizer=tokenizer,
    data_collator=data_collator,
)

trainer.train()




In [None]:
import os
import torch
from datasets import load_dataset
from transformers import (
    AutoModelForCausalLM,
    DataCollatorForLanguageModeling,
    TrainingArguments,
    Trainer,
)
from huggingface_hub import login
from transformers import LlamaTokenizer, LlamaForCausalLM

os.environ["HF_HOME"] = "G:\\HuggingFace"

# Step 1: Load the dataset
csv_path = r'/AIFeedback/minigridfinetune.csv'
dataset = load_dataset('csv', data_files={'train': csv_path})


# Step 2: Create text from data
def create_text(example):
    text = (
        f"Observation: {example['initial_observation']}. "
        f"Action: {example['action']}. "
        f"Intrinsic Reward: {example['intrinsic_reward']}. "
        f"Reward: {example['reward']}."
    )
    return {'text': text}


dataset['train'] = dataset['train'].map(create_text)

model_name = 'meta-llama/Llama-3.2-1B'

tokenizer = LlamaTokenizer.from_pretrained(model_name)
tokenizer.add_special_tokens({'pad_token': '[PAD]'})
model = LlamaForCausalLM.from_pretrained(
    model_name, torch_dtype=torch.float16, device_map=0,
    offload_folder=r"G:\HuggingFace\offload_folder"
)


def tokenize_function(examples):
    return tokenizer(examples['text'], truncation=True)


dataset['train'] = dataset['train'].map(
    tokenize_function,
    batched=True,
    remove_columns=dataset['train'].column_names
)


# Step 4: Prepare labels
def prepare_labels(examples):
    examples['labels'] = examples['input_ids'].copy()
    return examples


dataset['train'] = dataset['train'].map(prepare_labels, batched=True)

# Optional Step: Split the dataset
train_test_split = dataset['train'].train_test_split(test_size=0.1)
dataset['train'] = train_test_split['train']
dataset['validation'] = train_test_split['test']

# Step 5: Set up training arguments
training_args = TrainingArguments(
    output_dir=r"G:\HuggingFace\training_args",
    overwrite_output_dir=True,
    num_train_epochs=3,
    per_device_train_batch_size=1,
    gradient_accumulation_steps=8,
    evaluation_strategy="steps",  # Change to "no" if not using validation
    eval_steps=500,
    save_steps=500,
    logging_steps=100,
    learning_rate=5e-5,
    fp16=True,
    remove_unused_columns=False,
)

# Step 6: Initialize data collator and trainer
data_collator = DataCollatorForLanguageModeling(
    tokenizer=tokenizer,
    mlm=False,
)

trainer = Trainer(
    model=model,
    args=training_args,
    train_dataset=dataset['train'],
    eval_dataset=dataset.get('validation'),  # Use validation set if available
    tokenizer=tokenizer,
    data_collator=data_collator,
)

# Step 7: Fine-tune the model
trainer.train()

# Step 8: Save the fine-tuned model
trainer.save_model(r"G:\HuggingFace\fine_tuned_model")


In [9]:
# Load model directly
from transformers import AutoTokenizer, AutoModelForCausalLM

tokenizer = AutoTokenizer.from_pretrained("meta-llama/Llama-3.2-1B")
model = AutoModelForCausalLM.from_pretrained("meta-llama/Llama-3.2-1B")

The token has not been saved to the git credentials helper. Pass `add_to_git_credential=True` in this function directly or `--add-to-git-credential` if using via `huggingface-cli` if you want to set the git credential as well.
Token is valid (permission: write).
Your token has been saved to G:\\HuggingFace\token
Login successful


tokenizer_config.json:   0%|          | 0.00/50.5k [00:00<?, ?B/s]

To support symlinks on Windows, you either need to activate Developer Mode or to run Python as an administrator. In order to activate developer mode, see this article: https://docs.microsoft.com/en-us/windows/apps/get-started/enable-your-device-for-development


tokenizer.json:   0%|          | 0.00/9.09M [00:00<?, ?B/s]

special_tokens_map.json:   0%|          | 0.00/301 [00:00<?, ?B/s]

config.json:   0%|          | 0.00/843 [00:00<?, ?B/s]

model.safetensors:   0%|          | 0.00/2.47G [00:00<?, ?B/s]

generation_config.json:   0%|          | 0.00/186 [00:00<?, ?B/s]