# üöÄ Vision-LLM Zero to Hero: Optimized Fine-Tuning (QLoRA)

## Introduction
Ce notebook est la solution **"Best Ever"** pour entra√Æner un mod√®le Vision-LLM (comme **Qwen-VL**) sur le dataset RAF-CE.

### Points Cl√©s
1.  **Vision-LLM (SOTA)** : Utilisation de Qwen-VL-Chat pour une compr√©hension visuelle et textuelle avanc√©e.
2.  **QLoRA (4-bit)** : Optimisation m√©moire pour entra√Æner sur GPU standard.
3.  **Data Pipeline Robuste** : Chargement des donn√©es personnalis√© pour RAF-CE.

---

In [None]:
# --- CELL 1: Imports & Environment Setup ---
# Imports all necessary libraries and mounts Google Drive.

import os
import time
import copy
import zipfile
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
from PIL import Image
from collections import Counter
from tqdm import tqdm

import torch
import torch.nn as nn
import torch.optim as optim
from torch.utils.data import Dataset, DataLoader, WeightedRandomSampler
from torchvision import models, transforms
from sklearn.metrics import classification_report, confusion_matrix, f1_score, accuracy_score
from sklearn.preprocessing import StandardScaler
from sklearn.model_selection import train_test_split
import seaborn as sns

# QLoRA / Vision-LLM specific imports
from transformers import (
    AutoModelForCausalLM,
    AutoTokenizer,
    BitsAndBytesConfig,
    AutoProcessor,
    TrainingArguments,
    Trainer
)
from peft import (
    LoraConfig,
    get_peft_model,
    prepare_model_for_kbit_training,
    TaskType
)
from datasets import Dataset as HFDataset

# Mount Google Drive
from google.colab import drive
try:
    drive.mount('/content/drive')
except:
    print("Drive already mounted or not running in Colab.")

# Check for GPU
device = "cuda" if torch.cuda.is_available() else "cpu"
print(f"‚úÖ Using device: {device}")

In [None]:
# --- CELL 2: Configuration & Constants ---
# Sets up paths and hyperparameters.

# --- PATHS ---
# Update this path if your folder structure changes
DATASET_PATH = '/content/drive/MyDrive/Colab Datasets'
IMAGE_EXTRACT_PATH = '/content/raf-ce-images'  # Where we will unzip images locally

# --- HYPERPARAMETERS ---
BATCH_SIZE = 8 # Reduced for Vision-LLM + QLoRA
GRADIENT_ACCUMULATION = 4 # Simulates batch size 32
NUM_EPOCHS = 10     # As requested
LEARNING_RATE = 2e-4 # QLoRA handles higher LRs well
NUM_CLASSES = 15    # RAF-CE has 15 compound emotions

# Emotion Labels Mapping (0-14)
emotion_map = {
    0: 'Happily surprised', 1: 'Happily disgusted', 2: 'Sadly fearful',
    3: 'Sadly angry', 4: 'Sadly surprised', 5: 'Sadly disgusted',
    6: 'Fearfully angry', 7: 'Fearfully surprised', 8: 'Fearfully disgusted',
    9: 'Angrily surprised', 10: 'Angrily disgusted', 11: 'Disgustedly surprised',
    12: 'Happily fearful', 13: 'Happily angry', 14: 'Happily sad'
}

print("‚úÖ Configuration loaded.")

In [None]:
# --- CELL 3: Data Preparation Functions ---
# Handles unzipping and parsing label files.

def prepare_data(dataset_path, extract_to):
    """
    1. Unzips the 'aligned.zip' file to a local folder (faster access).
    2. Reads 'RAFCE_emolabel.txt' (Emotion labels).
    3. Reads 'RAFCE_AUlabel.txt' (Action Unit labels).
    4. Merges them into a single pandas DataFrame.
    """
    
    # --- Step 1: Unzip Images ---
    zip_file = os.path.join(dataset_path, 'aligned.zip')
    
    if not os.path.exists(zip_file):
        # Fallback for testing without Drive
        print(f"‚ö†Ô∏è Zip file not found at: {zip_file}. Checking local directory...")
        if os.path.exists("aligned.zip"):
             zip_file = "aligned.zip"
        else:
             print("‚ùå No dataset found. Please upload aligned.zip.")
             return None, None
        
    if not os.path.exists(extract_to):
        print(f"üìÇ Extracting {zip_file}...")
        os.makedirs(extract_to, exist_ok=True)
        with zipfile.ZipFile(zip_file, 'r') as zip_ref:
            zip_ref.extractall(extract_to)
        print("‚úÖ Extraction complete.")
    else:
        print("‚ÑπÔ∏è Images already extracted (skipping unzip).")

    # Locate the specific folder inside the extracted path
    extracted_items = os.listdir(extract_to)
    # Heuristic: if there's only one folder inside, that's our root
    if len(extracted_items) == 1 and os.path.isdir(os.path.join(extract_to, extracted_items[0])):
        img_root = os.path.join(extract_to, extracted_items[0])
    else:
        img_root = extract_to
        
    print(f"üñºÔ∏è Images located at: {img_root}")

    # --- Step 2: Parse Emotion Labels ---
    emo_path = os.path.join(dataset_path, 'RAFCE_emolabel.txt')
    if not os.path.exists(emo_path):
         # Fallback check
         if os.path.exists('RAFCE_emolabel.txt'): emo_path = 'RAFCE_emolabel.txt'
         else: 
            print(f"‚ùå Label file not found: {emo_path}")
            return None, None
        
    print(f"üìñ Reading Emotions from: {emo_path}")
    # RAF-CE format is typically: "filename label_index"
    df_emo = pd.read_csv(emo_path, sep=r'\s+', header=None, names=['filename', 'label'])

    # --- Step 3: Parse Action Unit (AU) Labels ---
    au_path = os.path.join(dataset_path, 'RAFCE_AUlabel.txt')
    if not os.path.exists(au_path):
        if os.path.exists('RAFCE_AUlabel.txt'): au_path = 'RAFCE_AUlabel.txt'
        else: print(f"‚ö†Ô∏è AU file not found: {au_path}. Proceeding without AUs.")

    if os.path.exists(au_path):
        print(f"üìñ Reading AUs from: {au_path}")
        # RAF-CE AU format: "filename au1 au2 ... auN"
        df_au = pd.read_csv(au_path, sep=r'\s+', header=None)
        
        # Rename columns (0 is filename, rest are AUs)
        au_col_names = ['filename'] + [f'AU_{i}' for i in range(1, len(df_au.columns))]
        df_au.columns = au_col_names

        # --- FIX: Sanitize AU Columns ---
        print("üßπ Sanitizing AU labels to ensure numeric values...")
        for col in au_col_names[1:]:  # Skip filename column
            df_au[col] = pd.to_numeric(df_au[col], errors='coerce').fillna(0.0)

        # --- Step 4: Merge Data ---
        df_merged = pd.merge(df_emo, df_au, on='filename')
    else:
        df_merged = df_emo
    
    # Add text labels for LLM
    df_merged['label_text'] = df_merged['label'].map(emotion_map)
    
    # Add full image path
    df_merged['path'] = df_merged['filename'].apply(lambda x: os.path.join(img_root, x) if not x.endswith('.jpg') else os.path.join(img_root, x.replace('.jpg', '_aligned.jpg')))
    
    return df_merged, img_root

# Build the dataset
df, img_root = prepare_data(DATASET_PATH, IMAGE_EXTRACT_PATH)
print(f"‚úÖ Data Loaded: {len(df) if df is not None else 0} samples")
if df is not None: display(df.head())

## 4. Configuration du Mod√®le (QLoRA) üß†

In [None]:
MODEL_ID = "Qwen/Qwen-VL-Chat-Int4"  # Version optimis√©e

def load_model_and_processor():
    print(f"üîÑ Loading {MODEL_ID}...")
    
    tokenizer = AutoTokenizer.from_pretrained(MODEL_ID, trust_remote_code=True)
    processor = AutoProcessor.from_pretrained(MODEL_ID, trust_remote_code=True)
    
    # BitsAndBytes Config (4-bit)
    bnb_config = BitsAndBytesConfig(
        load_in_4bit=True,
        bnb_4bit_quant_type="nf4",
        bnb_4bit_compute_dtype=torch.float16,
        bnb_4bit_use_double_quant=True,
    )
    
    model = AutoModelForCausalLM.from_pretrained(
        MODEL_ID,
        quantization_config=bnb_config,
        device_map="auto",
        trust_remote_code=True
    )
    
    model = prepare_model_for_kbit_training(model)
    
    # LoRA Config
    peft_config = LoraConfig(
        r=16,
        lora_alpha=32,
        target_modules=["c_attn", "attn.c_proj", "w1", "w2"],
        lora_dropout=0.05,
        bias="none",
        task_type="CAUSAL_LM"
    )
    
    model = get_peft_model(model, peft_config)
    model.print_trainable_parameters()
    
    return model, processor, tokenizer

model, processor, tokenizer = load_model_and_processor()

## 5. Dataset Wrapper pour Vision-LLM üìÇ

In [None]:
class RAFCE_LLM_Dataset(Dataset):
    def __init__(self, dataframe, processor, tokenizer):
        self.data = dataframe
        self.processor = processor
        self.tokenizer = tokenizer

    def __len__(self):
        return len(self.data)

    def __getitem__(self, idx):
        item = self.data.iloc[idx]
        img_path = item["path"]
        label_text = item["label_text"]
        
        # Simplified explanation generation
        explanation = f"The facial expression corresponds to {label_text}."

        # Qwen-VL Prompt Format
        prompt = f"User: <img>{img_path}</img> Analyze the emotional state.\nAssistant: {explanation}<|endoftext|>"
        
        # Process using Qwen's processor
        inputs = self.processor(
            text=[prompt],
            images=None, 
            return_tensors="pt",
            padding="max_length",
            max_length=256,
            truncation=True
        )
        
        return {
            "input_ids": inputs["input_ids"].squeeze(),
            "attention_mask": inputs["attention_mask"].squeeze(),
            "labels": inputs["input_ids"].squeeze()
        }

if df is not None:
    train_df, val_df = train_test_split(df, test_size=0.1, stratify=df['label'])
    train_dataset = RAFCE_LLM_Dataset(train_df, processor, tokenizer)
    val_dataset = RAFCE_LLM_Dataset(val_df, processor, tokenizer)

## 6. Training Execution üî•

In [None]:
def run_training():
    if df is None:
        print("‚ùå No data loaded. Aborting training.")
        return

    training_args = TrainingArguments(
        output_dir="./qwen_rafce_results",
        per_device_train_batch_size=BATCH_SIZE,
        gradient_accumulation_steps=GRADIENT_ACCUMULATION,
        num_train_epochs=NUM_EPOCHS,
        learning_rate=LEARNING_RATE,
        bf16=True,
        logging_steps=10,
        save_steps=100,
        evaluation_strategy="steps",
        eval_steps=100,
        save_total_limit=2,
        remove_unused_columns=False
    )
    
    trainer = Trainer(
        model=model,
        args=training_args,
        train_dataset=train_dataset,
        eval_dataset=val_dataset,
    )
    
    print("üî• Starting Training...")
    trainer.train()
    model.save_pretrained("./best_adapter")
    print("‚úÖ Training Complete!")

# run_training()