In [1]:
!pip install --upgrade transformers datasets accelerate bitsandbytes

Collecting transformers
  Downloading transformers-4.55.2-py3-none-any.whl.metadata (41 kB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m42.0/42.0 kB[0m [31m1.4 MB/s[0m eta [36m0:00:00[0m
Collecting datasets
  Downloading datasets-4.0.0-py3-none-any.whl.metadata (19 kB)
Collecting accelerate
  Downloading accelerate-1.10.0-py3-none-any.whl.metadata (19 kB)
Collecting bitsandbytes
  Downloading bitsandbytes-0.47.0-py3-none-manylinux_2_24_x86_64.whl.metadata (11 kB)
Collecting huggingface-hub<1.0,>=0.34.0 (from transformers)
  Downloading huggingface_hub-0.34.4-py3-none-any.whl.metadata (14 kB)
Collecting fsspec<=2025.3.0,>=2023.1.0 (from fsspec[http]<=2025.3.0,>=2023.1.0->datasets)
  Downloading fsspec-2025.3.0-py3-none-any.whl.metadata (11 kB)
Collecting nvidia-cuda-nvrtc-cu12==12.4.127 (from torch>=2.0.0->accelerate)
  Downloading nvidia_cuda_nvrtc_cu12-12.4.127-py3-none-manylinux2014_x86_64.whl.metadata (1.5 kB)
Collecting nvidia-cuda-runtime-cu

In [2]:
import pandas as pd
import torch
from datasets import Dataset, DatasetDict
from transformers import (
    AutoTokenizer,
    AutoModelForCausalLM,
    TrainingArguments,
    Trainer,
    DataCollatorForLanguageModeling,
    EarlyStoppingCallback, # Import the callback
    BitsAndBytesConfig
)
from peft import LoraConfig, get_peft_model
from sklearn.model_selection import train_test_split
import wandb
import os

2025-08-15 13:18:06.720550: E external/local_xla/xla/stream_executor/cuda/cuda_fft.cc:477] Unable to register cuFFT factory: Attempting to register factory for plugin cuFFT when one has already been registered
E0000 00:00:1755263886.920912      19 cuda_dnn.cc:8310] Unable to register cuDNN factory: Attempting to register factory for plugin cuDNN when one has already been registered
E0000 00:00:1755263886.976225      19 cuda_blas.cc:1418] Unable to register cuBLAS factory: Attempting to register factory for plugin cuBLAS when one has already been registered


In [3]:
# --- 1. SETUP & AUTHENTICATION ---
# This will securely access the secret you just created.
from kaggle_secrets import UserSecretsClient
user_secrets = UserSecretsClient()
hf_token = user_secrets.get_secret("HF_TOKEN")

In [4]:
# Login to Hugging Face
from huggingface_hub import login
login(token=hf_token)

# --- 2. CONFIGURATION ---
MODEL_CHECKPOINT = "meta-llama/Llama-3.1-8B-Instruct"
OUTPUT_DIR = "./results/llama2_peft_finetuned"
DATA_FILE = "/kaggle/input/emollm/labeled_2k.csv"
TEXT_COLUMN = "message"
LABEL_COLUMN = "reconciled_emotion"

In [5]:
# --- 3. DATA PREPARATION (IMPROVED PROMPT) ---
print("--- Preparing data with a richer prompt format ---")
df = pd.read_csv(DATA_FILE)

# A more descriptive prompt provides better context for the LLM.
def format_prompt(row):
    return (f"### Human:\n"
            f"Your task is to analyze a GitHub commit message and classify the developer's emotion. "
            f"The emotion must be one of: satisfaction, frustration, neutral, caution.\n\n"
            f"### Message:\n"
            f"'{row[TEXT_COLUMN]}'\n\n"
            f"### Assistant:\n"
            f"{row[LABEL_COLUMN]}")

--- Preparing data with a richer prompt format ---


In [6]:
df['text'] = df.apply(format_prompt, axis=1)

train_df, val_df = train_test_split(df, test_size=0.2, random_state=42)
raw_datasets = DatasetDict({
    'train': Dataset.from_pandas(train_df),
    'validation': Dataset.from_pandas(val_df)
})

In [7]:
print(f"--- Loading base model: {MODEL_CHECKPOINT} with 4-bit quantization ---")

# Define the quantization configuration
quantization_config = BitsAndBytesConfig(
    load_in_4bit=True,
    bnb_4bit_quant_type="nf4",
    bnb_4bit_compute_dtype=torch.bfloat16
)

tokenizer = AutoTokenizer.from_pretrained(MODEL_CHECKPOINT)
tokenizer.pad_token = tokenizer.eos_token

# Load the model with the quantization config
model = AutoModelForCausalLM.from_pretrained(
    MODEL_CHECKPOINT,
    quantization_config=quantization_config, # APPLY THE CONFIG HERE
    device_map="auto"
)


--- Loading base model: meta-llama/Llama-3.1-8B-Instruct with 4-bit quantization ---


tokenizer_config.json:   0%|          | 0.00/55.4k [00:00<?, ?B/s]

tokenizer.json:   0%|          | 0.00/9.09M [00:00<?, ?B/s]

special_tokens_map.json:   0%|          | 0.00/296 [00:00<?, ?B/s]

config.json:   0%|          | 0.00/855 [00:00<?, ?B/s]

model.safetensors.index.json:   0%|          | 0.00/23.9k [00:00<?, ?B/s]

Fetching 4 files:   0%|          | 0/4 [00:00<?, ?it/s]

model-00003-of-00004.safetensors:   0%|          | 0.00/4.92G [00:00<?, ?B/s]

model-00004-of-00004.safetensors:   0%|          | 0.00/1.17G [00:00<?, ?B/s]

model-00001-of-00004.safetensors:   0%|          | 0.00/4.98G [00:00<?, ?B/s]

model-00002-of-00004.safetensors:   0%|          | 0.00/5.00G [00:00<?, ?B/s]

Loading checkpoint shards:   0%|          | 0/4 [00:00<?, ?it/s]

generation_config.json:   0%|          | 0.00/184 [00:00<?, ?B/s]

In [8]:
# --- 5. PEFT/LORA CONFIGURATION ---
print("--- Configuring LoRA for PEFT ---")
lora_config = LoraConfig(
    r=16,
    lora_alpha=32,
    target_modules=["q_proj", "v_proj"],
    lora_dropout=0.05,
    bias="none",
    task_type="CAUSAL_LM"
)
model = get_peft_model(model, lora_config)
model.print_trainable_parameters()

--- Configuring LoRA for PEFT ---
trainable params: 6,815,744 || all params: 8,037,076,992 || trainable%: 0.0848


In [9]:
# --- 6. TOKENIZE AND TRAIN (HARDENED) ---
tokenized_datasets = raw_datasets.map(lambda examples: tokenizer(examples["text"], truncation=True, max_length=512), batched=True)

Map:   0%|          | 0/1600 [00:00<?, ? examples/s]

Map:   0%|          | 0/400 [00:00<?, ? examples/s]

In [10]:
# Define the manual callback for early stopping
early_stopping_callback = EarlyStoppingCallback(early_stopping_patience=2)

In [11]:
trainer = Trainer(
    model=model,
    train_dataset=tokenized_datasets["train"],
    eval_dataset=tokenized_datasets["validation"],
    args=TrainingArguments(
        output_dir=OUTPUT_DIR,
        per_device_train_batch_size=2,
        gradient_accumulation_steps=4,
        num_train_epochs=5, # Set a reasonable max
        learning_rate=2e-4,
        logging_steps=10,
        eval_strategy="epoch", # Use 'eval_strategy' for old transformers versions
        save_strategy="epoch",
        load_best_model_at_end=True,
        metric_for_best_model="eval_loss", # For LLMs, monitor loss
        save_total_limit=1, # LESSON: Prevents disk space errors
        report_to="none", # LESSON: Prevents wandb login errors
    ),
    data_collator=DataCollatorForLanguageModeling(tokenizer, mlm=False),
    callbacks=[early_stopping_callback], # LESSON: Manual early stopping for old envs
)

In [12]:
print("--- Starting PEFT/LoRA Fine-Tuning ---")
trainer.train()
print("--- Fine-Tuning Complete ---")

--- Starting PEFT/LoRA Fine-Tuning ---


Epoch,Training Loss,Validation Loss
1,1.8186,1.970235
2,1.8884,1.927503
3,1.671,1.929039
4,1.5965,1.945198


--- Fine-Tuning Complete ---


In [13]:
true_labels = val_df[LABEL_COLUMN].tolist()
class_names = sorted(df[LABEL_COLUMN].unique().tolist())

# --- 3. GENERATE PREDICTIONS (BATCHED) ---
print(f"--- Generating predictions for {len(val_df)} validation samples ---")
predictions = []
batch_size = 4 # Keep small to avoid OOM errors

for i in tqdm(range(0, len(val_df), batch_size)):
    batch_df = val_df.iloc[i:i+batch_size]
    
    # Format the prompt for inference (only the Human part)
    prompts = [
        (f"### Human:\n"
         f"Your task is to analyze a GitHub commit message and classify the developer's emotion. "
         f"The emotion must be one of: satisfaction, frustration, neutral, caution.\n\n"
         f"### Message:\n"
         f"'{row[TEXT_COLUMN]}'\n\n"
         f"### Assistant:\n")
        for _, row in batch_df.iterrows()
    ]
    
    inputs = tokenizer(prompts, return_tensors="pt", padding=True).to("cuda")
    
    # Generate output
    with torch.no_grad():
        outputs = model.generate(**inputs, max_new_tokens=5) # Generate only a few tokens for the label
    
    # Decode and parse the generated text
    decoded_outputs = tokenizer.batch_decode(outputs, skip_special_tokens=True)
    
    for output in decoded_outputs:
        # Robust parsing: find the assistant's response and clean it
        try:
            # Split by the final prompt marker
            parsed_label = output.split("### Assistant:\n")[-1].strip().lower()
            # Find the first valid label in the parsed text
            found = False
            for label in class_names:
                if label in parsed_label:
                    predictions.append(label)
                    found = True
                    break
            if not found:
                predictions.append("neutral") # Default to neutral if parsing fails
        except:
            predictions.append("neutral") # Default on any error

# --- 4. VISUALIZE AND REPORT ---
print("--- Generating Final Confusion Matrix and Classification Report ---")

# Generate Confusion Matrix
cm = confusion_matrix(true_labels, predictions, labels=class_names)
plt.figure(figsize=(10, 8))
sns.heatmap(cm, annot=True, fmt='d', cmap='Blues', 
            xticklabels=class_names, yticklabels=class_names)
plt.xlabel('Predicted Emotion')
plt.ylabel('Manually Labeled Emotion')
plt.title('Fine-Tuned Llama 3.1 8B Performance')
plt.show()

# Generate Classification Report
report = classification_report(true_labels, predictions, target_names=class_names)
print("\nClassification Report:\n")
print(report)


--- Generating predictions for 400 validation samples ---


NameError: name 'tqdm' is not defined