In [1]:
%load_ext autoreload
%autoreload 2
import sys
sys.path.append('..')
import torch
import torch.nn as nn
from transformers import AutoProcessor, LlavaForConditionalGeneration, Trainer, TrainingArguments, BitsAndBytesConfig
from PIL import Image
import json
import numpy as np
import os
from pathlib import Path
from typing import List, Dict, Optional, Tuple
from collections import Counter
import matplotlib.pyplot as plt
import seaborn as sns
import pandas as pd
from datetime import datetime
import warnings
from model.video_emotion_dataset import EmotionDataset
from model.llava_classifier import LLaVaEmotionClassifier as EmotionClassifier
warnings.filterwarnings("ignore")

In [2]:
torch.manual_seed(42)
np.random.seed(42)

print(f"CUDA Available: {torch.cuda.is_available()}")
if torch.cuda.is_available():
    print(f"GPU: {torch.cuda.get_device_name(0)}")
    print(f"GPU Memory: {torch.cuda.get_device_properties(0).total_memory / 1024**3:.1f} GB")

device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
print(f"Using device: {device}")

CUDA Available: True
GPU: NVIDIA GeForce RTX 4050 Laptop GPU
GPU Memory: 6.0 GB
Using device: cuda


In [4]:
CONFIG = {
    "train_folder": "../data/images/images/train",           # train/angry/, train/happy/, etc
    "test_folder": "path/to/your/test",             # test/person_1/, test/person_2/, etc
    "val_folder": "../data/images/images/validation",                             # Optional: validation data
    
    # Model settings
    "model_name": "llava-hf/llava-v1.6-mistral-7b-hf",
    "use_quantization": True,                       # Set False if you have 16GB+ VRAM
    
    # Training parameters
    "num_epochs": 3,
    "batch_size": 2,                               
    "learning_rate": 2e-5,
    "max_length": 256,
    "gradient_accumulation_steps": 2,
    
    # Emotion labels
    "emotion_labels": ['angry', 'disgust', 'fear', 'happy', 'neutral', 'sad', 'surprise'],
    
    # Output directories
    "output_model_dir": "./fine_tuned_emotion_model",
    "results_dir": "./emotion_analysis_results",
    
    # Testing settings
    "test_batch_size": 6,                          
    "save_predictions": True,
}

os.path.isdir(CONFIG["train_folder"])
os.path.isdir(CONFIG["val_folder"])


True

In [5]:
def validate_data_structure(train_folder, test_folder, emotion_labels):
    train_path = Path(train_folder)
    if not train_path.exists():
        print(f"Training folder not found: {train_folder}")
        return False
    print(f"Training folder found: {train_folder}")
    train_stats = {}
    total_train_images = 0

    for emotion in emotion_labels:
        emotion_path = train_path / emotion
        if emotion_path.exists():
            image_extensions = ['.jpg', '.jpeg', '.png', '.bmp', '.tiff']
            image_count = 0
            for ext in image_extensions:
                image_count += len(list(emotion_path.glob(f'*{ext}')))
                image_count += len(list(emotion_path.glob(f"*{ext.upper()}")))

            train_stats[emotion] = image_count
            total_train_images += image_count
            print(f"   {emotion}: {image_count} images")

        else:
            train_stats[emotion] = 0
            print(f"   {emotion}: Folder not found")
    
    print(f"   Total training images: {total_train_images}")

    test_path = Path(test_folder)
    if not test_path.exists():
        print(f"Test folder not found: {test_folder}")
        return False
    
    print(f"\nTest Data Structure:")
    test_stats = {}
    total_test_images = 0
    
    person_folders = [d for d in test_path.iterdir() if d.is_dir()]
    for person_folder in person_folders:
        person_id = person_folder.name        
        image_extensions = ['.jpg', '.jpeg', '.png', '.bmp', '.tiff']
        image_count = 0
        for ext in image_extensions:
            image_count += len(list(person_folder.glob(f"*{ext}")))
            image_count += len(list(person_folder.glob(f"*{ext.upper()}")))
        
        test_stats[person_id] = image_count
        total_test_images += image_count
        print(f"   {person_id}: {image_count} images")
    
    print(f"   Total test images: {total_test_images}")
    
    # Visualization
    create_data_visualization(train_stats, test_stats)
    
    return True

def create_data_visualization(train_stats, test_stats):
    """Create visualization of data distribution"""
    
    fig, (ax1, ax2) = plt.subplots(1, 2, figsize=(15, 6))
    
    emotions = list(train_stats.keys())
    counts = list(train_stats.values())
    
    ax1.bar(emotions, counts, color='skyblue', alpha=0.7)
    ax1.set_title('Training Data Distribution', fontsize=14, fontweight='bold')
    ax1.set_xlabel('Emotions')
    ax1.set_ylabel('Number of Images')
    ax1.tick_params(axis='x', rotation=45)
    
    for i, count in enumerate(counts):
        ax1.text(i, count + max(counts)*0.01, str(count), ha='center', va='bottom')
    
    persons = list(test_stats.keys())
    test_counts = list(test_stats.values())
    
    ax2.bar(persons, test_counts, color='lightcoral', alpha=0.7)
    ax2.set_title('Test Data Distribution', fontsize=14, fontweight='bold')
    ax2.set_xlabel('Persons')
    ax2.set_ylabel('Number of Images')
    ax2.tick_params(axis='x', rotation=45)
    
    for i, count in enumerate(test_counts):
        ax2.text(i, count + max(test_counts)*0.01, str(count), ha='center', va='bottom')
    
    plt.tight_layout()
    plt.show()
    
    print(f"\n Data Summary:")
    print(f"   Training samples: {sum(train_stats.values())} images across {len([c for c in train_stats.values() if c > 0])} emotions")
    print(f"   Test samples: {sum(test_stats.values())} images across {len(test_stats)} persons")
    print(f"   Average images per emotion: {sum(train_stats.values()) / len([c for c in train_stats.values() if c > 0]):.1f}")
    print(f"   Average images per person: {sum(test_stats.values()) / len(test_stats):.1f}")


validate_data_structure(
    CONFIG["train_folder"], 
    CONFIG["test_folder"], 
    CONFIG["emotion_labels"]
)

Training folder found: ../data/images/images/train
   angry: 7986 images
   disgust: 872 images
   fear: 8206 images
   happy: 14328 images
   neutral: 9964 images
   sad: 9876 images
   surprise: 6410 images
   Total training images: 57642
Test folder not found: path/to/your/test


False

In [6]:
def apply_rtx4050_optimizations():
    """Apply RTX 4050 6GB optimizations before model loading"""
    print("Applying RTX 4050 6GB optimizations...")
    
    if torch.cuda.is_available():
        torch.cuda.set_per_process_memory_fraction(0.9)
        print("  Memory fraction set to 90%")        
        torch.backends.cuda.enable_flash_sdp(True)
        print("  Flash attention enabled")
        
        try:
            torch.cuda.memory._set_allocator_settings('expandable_segments:True')
            print("  Expandable segments enabled")
        except:
            print("  Expandable segments not available (older PyTorch)")
        
        gpu_name = torch.cuda.get_device_name(0)
        total_memory = torch.cuda.get_device_properties(0).total_memory / 1024**3
        print(f" GPU: {gpu_name}")
        print(f" Total Memory: {total_memory:.1f} GB")
        
        if "4050" in gpu_name:
            print("RTX 4050 detected - optimizations applied!")
        elif total_memory < 7:
            print(" Low VRAM detected - optimizations still applied")
        else:
            print("Optimizations applied for your GPU")
            
    else:
        print("CUDA not available - using CPU mode")

def monitor_memory_usage(stage=""):
    """Monitor GPU memory usage"""
    if torch.cuda.is_available():
        allocated = torch.cuda.memory_allocated() / 1024**3
        reserved = torch.cuda.memory_reserved() / 1024**3
        total = torch.cuda.get_device_properties(0).total_memory / 1024**3
        usage_percent = (reserved / total) * 100
        
        print(f" {stage} Memory Status:")
        print(f"   Allocated: {allocated:.2f} GB")
        print(f"   Reserved: {reserved:.2f} GB") 
        print(f"   Usage: {usage_percent:.1f}% of {total:.1f} GB")        
        if usage_percent > 90:
            print(" CRITICAL: Memory > 90% - OOM risk!")
            return "critical"
        elif usage_percent > 80:
            print("  WARNING: Memory > 80%")
            return "warning"
        elif usage_percent > 60:
            print(" CAUTION: Memory > 60%")
            return "caution"
        else:
            print(" GOOD: Memory usage acceptable")
            return "good"
    return "no_cuda"


In [7]:
apply_rtx4050_optimizations()
monitor_memory_usage("Initial")

print("Initializing Emotion Classifier...")

classifier = EmotionClassifier(
    model_name=CONFIG["model_name"],
    emotion_labels=CONFIG["emotion_labels"],
    use_quantization=CONFIG["use_quantization"] 
)

print("EmotionClassifier initialized!")

monitor_memory_usage("After Model Loading")

print("\nTesting model with sample image...")

try:
    if torch.cuda.is_available():
        torch.cuda.empty_cache()
    
    test_image = Image.new('RGB', (224, 224), color=(128, 128, 128))
    test_result = classifier.predict_single_image(test_image)
    
    print(f"Test prediction: {test_result['predicted_emotion']}")
    print("Model test successful!")    
    monitor_memory_usage("After Test")
    
except Exception as e:
    print(f"Model test failed: {e}")
    if "out of memory" in str(e).lower():
        print("RTX 4050 OOM detected - try restarting notebook")

Applying RTX 4050 6GB optimizations...
  Memory fraction set to 90%
  Flash attention enabled
  Expandable segments enabled
 GPU: NVIDIA GeForce RTX 4050 Laptop GPU
 Total Memory: 6.0 GB
RTX 4050 detected - optimizations applied!
 Initial Memory Status:
   Allocated: 0.00 GB
   Reserved: 0.00 GB
   Usage: 0.0% of 6.0 GB
 GOOD: Memory usage acceptable
Initializing Emotion Classifier...
Initializing EmotionClassifier
Model: llava-hf/llava-v1.6-mistral-7b-hf
Emotions: ['angry', 'disgust', 'fear', 'happy', 'neutral', 'sad', 'surprise']


Using a slow image processor as `use_fast` is unset and a slow processor was saved with this model. `use_fast=True` will be the default behavior in v4.52, even if the model was saved with a slow processor. This will result in minor differences in outputs. You'll still be able to use a slow processor with `use_fast=False`.


Using enhanced 8-bit quantization for 6GB VRAM


You are using a model of type llava_next to instantiate a model of type llava. This is not supported for all configurations of models and can yield errors.


ValueError: Some modules are dispatched on the CPU or the disk. Make sure you have enough GPU RAM to fit the quantized model. If you want to dispatch the model on the CPU or the disk while keeping these modules in 32-bit, you need to set `llm_int8_enable_fp32_cpu_offload=True` and pass a custom `device_map` to `from_pretrained`. Check https://huggingface.co/docs/transformers/main/en/main_classes/quantization#offload-between-cpu-and-gpu for more details. 

In [None]:
def train_emotion_model():
    print("Starting Training Phase...")
    print("=" * 50)
    
    if os.path.exists(CONFIG["output_model_dir"]):
        response = input(f"Model directory {CONFIG['output_model_dir']} exists. Overwrite? (y/n): ")
        if response.lower() != 'y':
            print("Training cancelled.")
            return
    
    print("    Training Configuration:")
    print(f"   Model: {CONFIG['model_name']}")
    print(f"   Training folder: {CONFIG['train_folder']}")
    print(f"   Validation folder: {CONFIG['val_folder']}")
    print(f"   Epochs: {CONFIG['num_epochs']}")
    print(f"   Batch size: {CONFIG['batch_size']}")
    print(f"   Learning rate: {CONFIG['learning_rate']}")
    print(f"   Quantization: {CONFIG['use_quantization']}")
    
    start_time = datetime.now()
    print(f"\n Training started at: {start_time}")
    
    try:
        classifier.fine_tune(
            train_folder=CONFIG["train_folder"],
            val_folder=CONFIG["val_folder"],
            output_dir=CONFIG["output_model_dir"],
            num_epochs=CONFIG["num_epochs"],
            batch_size=CONFIG["batch_size"],
            learning_rate=CONFIG["learning_rate"]
        )
        
        end_time = datetime.now()
        duration = end_time - start_time
        
        print(f"\n Training completed!")
        print(f"   Duration: {duration}")
        print(f"   Model saved to: {CONFIG['output_model_dir']}")
        
        config_path = Path(CONFIG["output_model_dir"]) / "training_config.json"
        with open(config_path, 'w') as f:
            training_config = CONFIG.copy()
            training_config['training_start_time'] = start_time.isoformat()
            training_config['training_end_time'] = end_time.isoformat()
            training_config['training_duration'] = str(duration)
            json.dump(training_config, f, indent=2)
        
        print(f"   Training config saved to: {config_path}")
        
        return True
        
    except Exception as e:
        print(f" Training failed: {e}")
        print("💡 Try reducing batch_size or enabling quantization if OOM error")
        return False
training_success = train_emotion_model()

if training_success:
    print("\n Training completed successfully!")
    print(" Ready for testing phase...")
else:
    print("\nTraining failed. Please check the error messages above.")