In [1]:
#!huggingface-cli login

In [1]:
import pandas as pd
from datasets import load_dataset, Dataset, DatasetDict
from transformers import (
    AutoTokenizer,
    LlamaForCausalLM,
    Trainer,
    TrainingArguments,
    DataCollatorForLanguageModeling
)
from peft import LoraConfig, get_peft_model
import torch
import numpy as np

  from .autonotebook import tqdm as notebook_tqdm


In [2]:
import pandas as pd

# Load the dataset
data = pd.read_json('../creating_data_src/combined_shuffled.json')

# Display the first few rows
print(data.head())

# Check data types of each column
print(data.dtypes)

                                               input  \
0    What are some fun activities to do in New York?   
1  Can you find 4-star hotels with a swimming poo...   
2  I need to plan a trip from Toronto (YYZ) to Li...   
3     What are some fun things to do in Mexico City?   
4  I need a flight from AER to BKK on 2023-11-15 ...   

                                              output  
0   {'action': ['activities'], 'destination': 'NYC'}  
1  {'action': ['search_hotels'], 'action_input': ...  
2  {'action': ['search_flights', 'search_hotels',...  
3   {'action': ['activities'], 'destination': 'MEX'}  
4  {'action': ['search_flights', 'activities'], '...  
input     object
output    object
dtype: object


In [3]:
import json

# Ensure 'output' is of type string by converting dictionaries to JSON strings
data['output'] = data['output'].apply(json.dumps)

# Verify the conversion
print(data.head())
print(data.dtypes)

                                               input  \
0    What are some fun activities to do in New York?   
1  Can you find 4-star hotels with a swimming poo...   
2  I need to plan a trip from Toronto (YYZ) to Li...   
3     What are some fun things to do in Mexico City?   
4  I need a flight from AER to BKK on 2023-11-15 ...   

                                              output  
0   {"action": ["activities"], "destination": "NYC"}  
1  {"action": ["search_hotels"], "action_input": ...  
2  {"action": ["search_flights", "search_hotels",...  
3   {"action": ["activities"], "destination": "MEX"}  
4  {"action": ["search_flights", "activities"], "...  
input     object
output    object
dtype: object


In [4]:
from datasets import Dataset, DatasetDict, Features, Value

# Select the first 700 entries (adjust as needed)
dataset_pandas = data.iloc[:10]

dataset_hf = Dataset.from_pandas(dataset_pandas)

In [5]:
# Perform train-test split
split_dataset = dataset_hf.train_test_split(test_size=0.2)

dataset = DatasetDict({
    'train': split_dataset['train'],
    'validation': split_dataset['test']
})

# 2. Initialize the Tokenizer
tokenizer = AutoTokenizer.from_pretrained("meta-llama/Llama-3.2-1B")
if tokenizer.pad_token is None:
    tokenizer.pad_token = tokenizer.eos_token

# 3. Preprocess the Data
def preprocess_function(examples):
    inputs = examples['input']
    outputs = examples['output']
    formatted_inputs = [f"User: {inp}\nAssistant: {out}" for inp, out in zip(inputs, outputs)]
    return tokenizer(formatted_inputs, truncation=True, padding='max_length', max_length=512)

# Apply the preprocessing to the dataset
tokenized_datasets = dataset.map(preprocess_function, batched=True)

# 4. Load the Pre-trained LLaMA Model
model = LlamaForCausalLM.from_pretrained(
    'meta-llama/Llama-3.2-1B',
    load_in_8bit=False,
    torch_dtype=torch.float32,  # Ensure float32 is used
    device_map='auto'             # auto
)

# Verify that all model parameters are in float32
for param in model.parameters():
    if param.dtype != torch.float32:
        print(f"Parameter {param.name} is not in float32!")

# 5. Configure LoRA
lora_config = LoraConfig(
    r=8,
    lora_alpha=32,
    lora_dropout=0.1,
    target_modules=["q_proj", "k_proj", "v_proj", "o_proj"],
    bias="none",
    task_type="CAUSAL_LM"
)
model = get_peft_model(model, lora_config)
model.print_trainable_parameters()

Map: 100%|██████████| 8/8 [00:00<00:00, 571.42 examples/s]
Map: 100%|██████████| 2/2 [00:00<00:00, 399.99 examples/s]


trainable params: 1,703,936 || all params: 1,237,518,336 || trainable%: 0.1377


In [None]:
training_args = TrainingArguments(
    output_dir='../model',          # Directory to save the fine-tuned model
    num_train_epochs=5,             # Number of training epochs
    per_device_train_batch_size=4,  # Batch size per device during training
    per_device_eval_batch_size=8,   # Batch size for evaluation
    gradient_accumulation_steps=4,  # Accumulate gradients to simulate larger batch size
    learning_rate=2e-4,             # Learning rate
    weight_decay=0.01,              # Weight decay for optimizer
    evaluation_strategy="epoch",    # Evaluate at the end of each epoch
    save_strategy="epoch",          # Save checkpoint at the end of each epoch
    logging_dir='./logs',           # Directory for logs
    logging_steps=10,               # Log every 10 steps
    save_total_limit=2,             # Maximum number of checkpoints to save
    fp16=False,                     # Disable mixed precision
    no_cuda=True,                   # Disable CUDA/MPS and use CPU
    load_best_model_at_end=True,    # Load the best model when finished training
    metric_for_best_model="loss",   # Use loss to evaluate the best model
    greater_is_better=False,        # Lower loss is better
    save_safetensors=False           # **Disable safetensors**
)

# 7. Initialize the Data Collator
# Using Hugging Face's built-in DataCollator to handle padding and other preprocessing
data_collator = DataCollatorForLanguageModeling(
    tokenizer=tokenizer,
    mlm=False,  # Set to False for causal language modeling
)

# 8. Initialize the Trainer
trainer = Trainer(
    model=model,
    args=training_args,
    train_dataset=tokenized_datasets['train'],
    eval_dataset=tokenized_datasets['validation'],
    tokenizer=tokenizer,
    data_collator=data_collator
)

  trainer = Trainer(


In [None]:
# 9. Start Training
trainer_stats = trainer.train()

  0%|          | 0/25 [00:00<?, ?it/s]

In [None]:
# 10. Save the Fine-Tuned Model and Tokenizer
trainer.save_model('./lora_llama_finetuned/model_finetuned')  # Saves both base model and adapters
tokenizer.save_pretrained('./lora_llama_finetuned/tokenizer_finetuned')
model.save_pretrained('./lora_llama_finetuned/model_finetuned')

# check you can load it

In [None]:
from transformers import LlamaForCausalLM, AutoTokenizer
from peft import PeftModel
import torch

In [None]:
# Paths
model_path = './lora_llama_finetuned/model_finetuned'      # Path to the LoRA adapters
base_model_path = './lora_llama_finetuned/base_model'      # Path to the base model (if saved separately)
tokenizer_path = './lora_llama_finetuned/tokenizer_finetuned'

# Load the base model
base_model = LlamaForCausalLM.from_pretrained(
    "meta-llama/Llama-3.2-1B",
    torch_dtype=torch.float32,
    device_map='auto'  # Automatically maps to available devices
)

# Load the LoRA adapters onto the base model
model_loaded = PeftModel.from_pretrained(
    base_model,
    model_path
)

# Load the tokenizer
tokenizer_loaded = AutoTokenizer.from_pretrained(tokenizer_path)

# Ensure the pad token is set (if not already done during training)
if tokenizer_loaded.pad_token is None:
    tokenizer_loaded.pad_token = tokenizer_loaded.eos_token

# Move the model to the appropriate device
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
model_loaded.to(device)

In [None]:
model_loaded.eval()

In [None]:
# Define a formatted prompt consistent with training
prompt = "User: I need a flight from london to CBR on 2023-12-15 for 2 adults and 1 child in business class, non-stop only.\nAssistant:"

# Tokenize the input
inputs = tokenizer_loaded(prompt, return_tensors="pt").to(device)

# Generate a response
with torch.no_grad():
    output_tokens = model_loaded.generate(
        **inputs,
        max_length=250,        # Increase if you need a longer answer
        temperature=0.1,       # Adjust temperature for more or less randomness
        top_p=0.9,             # Adjust top_p for nucleus sampling
        do_sample=True         # Set to False for deterministic output (greedy)
    )

# Decode the output tokens to text
response = tokenizer_loaded.decode(output_tokens[0], skip_special_tokens=True)

print(response)