In [1]:
import pandas as pd
import numpy as np
import os
from glob import glob
from sklearn.model_selection import train_test_split, GridSearchCV, StratifiedKFold
from sklearn.ensemble import RandomForestClassifier, GradientBoostingClassifier
from sklearn.metrics import classification_report, accuracy_score, confusion_matrix
from sklearn.preprocessing import StandardScaler, LabelEncoder
import joblib
from collections import Counter
import matplotlib.pyplot as plt
import seaborn as sns
import json
from datetime import datetime
import time
from scipy import stats

In [6]:
BASE_DIR = os.getcwd()

class Config:
    CSV_DIR = os.path.join(BASE_DIR, 'processed_videos')
    SAVE_DIR = os.path.join(BASE_DIR, 'feature_experiments')
    
    # Gaze features
    GAZE_FEATURES = ['gaze_yaw', 'gaze_pitch']
    
    # Action Unit
    AU_FEATURES = [
        'au_shape_dim1', 'au_shape_dim2', 'au_shape_dim3',
        'au_shape_dim4', 'au_shape_dim5', 'au_shape_dim6',
        'au_shape_dim7', 'au_shape_dim8'
    ]
    # Mediapipe 
    POSE_FEATURES = [
        'mp_pose_yaw_deg', 
        'mp_pose_pitch_deg', 
        'mp_pose_roll_deg'
    ]

    # Blendshape (52 features)
    BLENDSHAPE_FEATURES = [
        'bs__neutral',
        'bs_browDownLeft', 'bs_browDownRight', 'bs_browInnerUp',
        'bs_browOuterUpLeft', 'bs_browOuterUpRight',
        'bs_cheekPuff', 'bs_cheekSquintLeft', 'bs_cheekSquintRight',
        'bs_eyeBlinkLeft', 'bs_eyeBlinkRight',
        'bs_eyeLookDownLeft', 'bs_eyeLookDownRight',
        'bs_eyeLookInLeft', 'bs_eyeLookInRight',
        'bs_eyeLookOutLeft', 'bs_eyeLookOutRight',
        'bs_eyeLookUpLeft', 'bs_eyeLookUpRight',
        'bs_eyeSquintLeft', 'bs_eyeSquintRight',
        'bs_eyeWideLeft', 'bs_eyeWideRight',
        'bs_jawForward', 'bs_jawLeft', 'bs_jawOpen', 'bs_jawRight',
        'bs_mouthClose',
        'bs_mouthDimpleLeft', 'bs_mouthDimpleRight',
        'bs_mouthFrownLeft', 'bs_mouthFrownRight',
        'bs_mouthFunnel', 'bs_mouthLeft',
        'bs_mouthLowerDownLeft', 'bs_mouthLowerDownRight',
        'bs_mouthPressLeft', 'bs_mouthPressRight',
        'bs_mouthPucker', 'bs_mouthRight',
        'bs_mouthRollLower', 'bs_mouthRollUpper',
        'bs_mouthShrugLower', 'bs_mouthShrugUpper',
        'bs_mouthSmileLeft', 'bs_mouthSmileRight',
        'bs_mouthStretchLeft', 'bs_mouthStretchRight',
        'bs_mouthUpperUpLeft', 'bs_mouthUpperUpRight',
        'bs_noseSneerLeft', 'bs_noseSneerRight'
    ]

    EYE_BLENDSHAPES = [
        'bs_eyeBlinkLeft', 'bs_eyeBlinkRight',
        'bs_eyeSquintLeft', 'bs_eyeSquintRight',
        'bs_eyeWideLeft', 'bs_eyeWideRight'
    ]
    
    MOUTH_BLENDSHAPES = [
        'bs_jawOpen',
        'bs_mouthSmileLeft', 'bs_mouthSmileRight',
        'bs_mouthFrownLeft', 'bs_mouthFrownRight',
        'bs_mouthPucker', 'bs_mouthFunnel',
        'bs_mouthStretchLeft', 'bs_mouthStretchRight'
    ]

    BROW_BLENDSHAPES = [
        'bs_browDownLeft', 'bs_browDownRight',
        'bs_browInnerUp',
        'bs_browOuterUpLeft', 'bs_browOuterUpRight'
    ]

    KEY_BLENDSHAPES = EYE_BLENDSHAPES + MOUTH_BLENDSHAPES + BROW_BLENDSHAPES

    FEATURE_SETS = {
        # Single feature types
        'AU_Only': AU_FEATURES,
        'Gaze_Only': GAZE_FEATURES,
        'Pose_Only': POSE_FEATURES,
        'Blendshapes_Only': BLENDSHAPE_FEATURES,
        
        # Key subsets
        'Key_Blendshapes': KEY_BLENDSHAPES,
        'Eye_Features': EYE_BLENDSHAPES,
        'Mouth_Features': MOUTH_BLENDSHAPES,
        
        # Combinations
        'AU_Gaze': AU_FEATURES + GAZE_FEATURES,
        'AU_Gaze_Pose': AU_FEATURES + GAZE_FEATURES + POSE_FEATURES,
        'AU_KeyBlendshapes': AU_FEATURES + KEY_BLENDSHAPES,
        'Gaze_Pose_KeyBlendshapes': GAZE_FEATURES + POSE_FEATURES + KEY_BLENDSHAPES,
        
        # Everything
        'All_Features': AU_FEATURES + GAZE_FEATURES + POSE_FEATURES + BLENDSHAPE_FEATURES,
        
        # Your original best
        'Original_Best': GAZE_FEATURES + AU_FEATURES
    }

    # Training parameters
    TEST_SIZE = 0.2
    VAL_SIZE = 0.1
    RANDOM_STATE = 42
    
    EMOTIONS = ['happy', 'sad', 'angry', 'surprise', 'neutral', 'fear', 'disgust']

config = Config()
os.makedirs(config.SAVE_DIR, exist_ok=True)

print(f"Feature Selection Experiments")
print(f"Testing {len(config.FEATURE_SETS)} feature combinations")

print(f"Available Features:")
print(f"  • Gaze: {len(config.GAZE_FEATURES)} features")
print(f"  • AU Shape: {len(config.AU_FEATURES)} features")
print(f"  • Pose: {len(config.POSE_FEATURES)} features")
print(f"  • Blendshapes: {len(config.BLENDSHAPE_FEATURES)} features")
print(f"  • Key Blendshapes: {len(config.KEY_BLENDSHAPES)} features")
print(f"\nTotal available: {len(config.AU_FEATURES + config.GAZE_FEATURES + config.POSE_FEATURES + config.BLENDSHAPE_FEATURES)} features")

Feature Selection Experiments
Testing 13 feature combinations
Available Features:
  • Gaze: 2 features
  • AU Shape: 8 features
  • Pose: 3 features
  • Blendshapes: 52 features
  • Key Blendshapes: 20 features

Total available: 65 features


In [11]:
class DataLoader:
    def __init__(self, csv_dir, feature_columns):
        self.csv_dir = csv_dir
        self.feature_columns = feature_columns
    
    def extract_emotion_from_filename(self, filename):
        base = os.path.basename(filename).lower()
        base = base.replace('_processed.csv', '')
        
        for emotion in config.EMOTIONS:
            if emotion in base:
                return emotion
        return None
    
    def load_all_csvs(self):
        csv_files = glob(os.path.join(self.csv_dir, '*_processed.csv'))
        
        if len(csv_files) == 0:
            csv_files = glob(os.path.join(self.csv_dir, '*.csv'))
        
        all_data = []
        all_labels = []
        file_stats = []
        
        for csv_file in csv_files:
            emotion = self.extract_emotion_from_filename(csv_file)
            
            if emotion is None:
                print(f"Skipping {os.path.basename(csv_file)} - no emotion detected")
                continue
            
            try:
                df = pd.read_csv(csv_file)
                
                if len(df) == 0:
                    print(f"Skipping {os.path.basename(csv_file)} - empty")
                    continue
                
                missing_cols = [col for col in self.feature_columns if col not in df.columns]
                if missing_cols:
                    print(f"Skipping {os.path.basename(csv_file)} - missing: {missing_cols}")
                    continue
                
                features = df[self.feature_columns].values
                valid_mask = ~np.any(np.isnan(features) | np.isinf(features), axis=1)
                features = features[valid_mask]
                
                if len(features) == 0:
                    print(f"Skipping {os.path.basename(csv_file)}, no valid data")
                    continue
                
                all_data.append(features)
                all_labels.extend([emotion] * len(features))
                
                file_stats.append({
                    'file': os.path.basename(csv_file),
                    'emotion': emotion,
                    'frames': len(features)
                })
            
                
            except Exception as e:
                print(f"Error loading {os.path.basename(csv_file)}: {e}")
        
        if not all_data:
            print("\n No data loaded!")
            return None, None, None
        
        X = np.vstack(all_data)
        y = np.array(all_labels)
    
        
        emotion_counts = Counter(y)
        for emotion in sorted(emotion_counts.keys()):
            count = emotion_counts[emotion]
            percentage = (count / len(y)) * 100
            print(f"  {emotion:12s}: {count:6,d} frames ({percentage:5.1f}%)")
        
        return X, y, file_stats

In [None]:
all_experiment_results = []

print(f"FEATURE SELECTION EXPERIMENTS")
print(f"{'='*70}")
print(f"Testing {len(config.FEATURE_SETS)} feature combinations\n")

for experiment_num, (set_name, features) in enumerate(config.FEATURE_SETS.items(), 1):
    
    print(f"\n{'#'*70}")
    print(f"# EXPERIMENT {experiment_num}/{len(config.FEATURE_SETS)}: {set_name}")
    print(f"# Features: {len(features)}")
    print(f"{'#'*70}\n")
    
    start_time = time.time()
    
    try:
        loader = DataLoader(config.CSV_DIR, features) 
        X, y, file_stats = loader.load_all_csvs()
        
        if X is None:
            print(f"Failed to load {set_name}. Skipping.\n")
            continue
        
        label_encoder = LabelEncoder()
        y_encoded = label_encoder.fit_transform(y)
        
    
        X_train, X_temp, y_train, y_temp = train_test_split(X, y_encoded, test_size=config.TEST_SIZE + config.VAL_SIZE, 
                                                            stratify=y_encoded, random_state=config.RANDOM_STATE)
        
        val_ratio = config.VAL_SIZE / (config.TEST_SIZE + config.VAL_SIZE)
        X_val, X_test, y_val, y_test = train_test_split(X_temp, y_temp, test_size=1 - val_ratio, stratify=y_temp,
                                                        random_state=config.RANDOM_STATE)
        

        scaler = StandardScaler()
        X_train_scaled = scaler.fit_transform(X_train)
        X_val_scaled = scaler.transform(X_val)
        X_test_scaled = scaler.transform(X_test)
        
        
        simple_grid = {'n_estimators': [300],
                       'max_depth': [20],
                       'min_samples_split': [2],
                       'min_samples_leaf': [1],
                       'max_features': ['sqrt']}
        
        rf_model = RandomForestClassifier(random_state=config.RANDOM_STATE, n_jobs=-1)
        
        cv = StratifiedKFold(n_splits=3, shuffle=True, random_state=config.RANDOM_STATE)
        
        grid_search = GridSearchCV(rf_model, simple_grid, cv=cv, scoring='accuracy', n_jobs=-1, verbose=0)
        
        grid_search.fit(X_train_scaled, y_train)
        
        best_model = grid_search.best_estimator_
        
        train_score = best_model.score(X_train_scaled, y_train)
        val_score = best_model.score(X_val_scaled, y_val)
        test_score = best_model.score(X_test_scaled, y_test)
        
        # Per-class metrics
        y_pred = best_model.predict(X_test_scaled)
        
        from sklearn.metrics import precision_recall_fscore_support
        precision, recall, f1, support = precision_recall_fscore_support(
            y_test, y_pred, average=None, labels=range(len(label_encoder.classes_)))
        
        class_metrics = {}
        for i, emotion in enumerate(label_encoder.classes_):
            class_metrics[emotion] = {
                'precision': float(precision[i]),
                'recall': float(recall[i]),
                'f1': float(f1[i]),
                'support': int(support[i])}
        
        elapsed = time.time() - start_time
        
        experiment_result = {
            'feature_set_name': set_name,
            'num_features': len(features),
            'features': features,
            'train_accuracy': float(train_score),
            'val_accuracy': float(val_score),
            'test_accuracy': float(test_score),
            'cv_score': float(grid_search.best_score_),
            'class_metrics': class_metrics,
            'training_time_seconds': elapsed,
            'confusion_matrix': confusion_matrix(y_test, y_pred).tolist()}
        
        all_experiment_results.append(experiment_result)
        
        exp_dir = os.path.join(config.SAVE_DIR, set_name)
        os.makedirs(exp_dir, exist_ok=True)
        
        joblib.dump(best_model, os.path.join(exp_dir, 'model.pkl'))
        joblib.dump(scaler, os.path.join(exp_dir, 'scaler.pkl'))
        joblib.dump(label_encoder, os.path.join(exp_dir, 'label_encoder.pkl'))
        
        with open(os.path.join(exp_dir, 'results.json'), 'w') as f:
            json.dump(experiment_result, f, indent=2)

        print(f"\n {set_name} Complete!")
        print(f"   Test Accuracy: {test_score:.4f}")
        print(f"   Time: {elapsed:.1f}s")

        
    except Exception as e:
        print(f"\n Error in {set_name}: {e}")
        import traceback
        traceback.print_exc()
        continue

FEATURE SELECTION EXPERIMENTS
Testing 13 feature combinations


######################################################################
# EXPERIMENT 1/13: AU_Only
# Features: 8
######################################################################

  angry       :  8,752 frames ( 13.8%)
  disgust     :  8,830 frames ( 14.0%)
  fear        :  9,089 frames ( 14.4%)
  happy       :  9,034 frames ( 14.3%)
  neutral     :  8,892 frames ( 14.1%)
  sad         :  9,582 frames ( 15.2%)
  surprise    :  9,061 frames ( 14.3%)

 AU_Only Complete!
   Test Accuracy: 0.7529
   Time: 8.6s

######################################################################
# EXPERIMENT 2/13: Gaze_Only
# Features: 2
######################################################################

  angry       :  8,752 frames ( 13.8%)
  disgust     :  8,830 frames ( 14.0%)
  fear        :  9,089 frames ( 14.4%)
  happy       :  9,034 frames ( 14.3%)
  neutral     :  8,892 frames ( 14.1%)
  sad         :  9,582 frames ( 15.2%)

In [17]:
print(f"\n\n{'='*70}")
print(f"FINAL RESULTS - ALL FEATURE COMBINATIONS")
print(f"{'='*70}\n")

if len(all_experiment_results) == 0:
    print("No experiments completed successfully!")
else:
    # Sort by test accuracy
    all_experiment_results.sort(key=lambda x: x['test_accuracy'], reverse=True)
    
    # Print table
    print(f"{'Rank':<6} {'Feature Set':<25} {'#Features':<12} {'Test Acc':<12} {'Time':<10}")
    print(f"{'-'*70}")
    
    for rank, result in enumerate(all_experiment_results, 1):
        print(f"{rank:<6} {result['feature_set_name']:<25} {result['num_features']:<12} "
              f"{result['test_accuracy']:.4f}       {result['training_time_seconds']:.1f}s")
    
    # Save comparison
    comparison = {
        'timestamp': datetime.now().isoformat(),
        'experiments': all_experiment_results
    }
    
    with open(os.path.join(config.SAVE_DIR, 'all_results.json'), 'w') as f:
        json.dump(comparison, f, indent=2)
    



FINAL RESULTS - ALL FEATURE COMBINATIONS

Rank   Feature Set               #Features    Test Acc     Time      
----------------------------------------------------------------------
1      All_Features              65           0.9861       28.5s
2      Gaze_Pose_KeyBlendshapes  25           0.9759       19.3s
3      Blendshapes_Only          52           0.9757       28.5s
4      AU_KeyBlendshapes         28           0.9683       16.8s
5      Key_Blendshapes           20           0.9516       17.0s
6      AU_Gaze_Pose              13           0.9462       10.3s
7      Mouth_Features            9            0.8698       13.3s
8      AU_Gaze                   10           0.8198       12.3s
9      Original_Best             10           0.8188       11.2s
10     AU_Only                   8            0.7529       8.6s
11     Eye_Features              6            0.7112       10.4s
12     Pose_Only                 3            0.6780       7.4s
13     Gaze_Only                 2   

In [18]:
BEST_FEATURE_SET = "Gaze_Pose_KeyBlendshapes"
final_features = config.FEATURE_SETS[BEST_FEATURE_SET]

loader = DataLoader(config.CSV_DIR, final_features)
X, y, file_stats = loader.load_all_csvs()

Skipping grant_neutral_1_processed.csv - missing: ['bs_eyeBlinkLeft', 'bs_eyeBlinkRight', 'bs_eyeSquintLeft', 'bs_eyeSquintRight', 'bs_eyeWideLeft', 'bs_eyeWideRight', 'bs_jawOpen', 'bs_mouthSmileLeft', 'bs_mouthSmileRight', 'bs_mouthFrownLeft', 'bs_mouthFrownRight', 'bs_mouthPucker', 'bs_mouthFunnel', 'bs_mouthStretchLeft', 'bs_mouthStretchRight', 'bs_browDownLeft', 'bs_browDownRight', 'bs_browInnerUp', 'bs_browOuterUpLeft', 'bs_browOuterUpRight']
Skipping grant_neutral_5_processed.csv - missing: ['bs_eyeBlinkLeft', 'bs_eyeBlinkRight', 'bs_eyeSquintLeft', 'bs_eyeSquintRight', 'bs_eyeWideLeft', 'bs_eyeWideRight', 'bs_jawOpen', 'bs_mouthSmileLeft', 'bs_mouthSmileRight', 'bs_mouthFrownLeft', 'bs_mouthFrownRight', 'bs_mouthPucker', 'bs_mouthFunnel', 'bs_mouthStretchLeft', 'bs_mouthStretchRight', 'bs_browDownLeft', 'bs_browDownRight', 'bs_browInnerUp', 'bs_browOuterUpLeft', 'bs_browOuterUpRight']
Skipping grant_neutral_2_processed.csv - missing: ['bs_eyeBlinkLeft', 'bs_eyeBlinkRight', 'bs_

In [19]:
def create_windowed_predictions(model, X, window_size, stride=None):
    if stride is None:
        stride = window_size
    
    predictions = []
    confidences = []
    
    for i in range(0, len(X) - window_size + 1, stride):
        window = X[i:i + window_size]
        window_pred = model.predict(window)
        
        mode_pred = stats.mode(window_pred, keepdims=True)[0][0]
        predictions.append(mode_pred)
        
        confidence = np.sum(window_pred == mode_pred) / len(window_pred)
        confidences.append(confidence)
    
    return predictions, confidences


def aggregate_predictions(predictions, confidences=None, method='mode'):
    """Aggregate predictions into single label"""
    if method == 'mode':
        return stats.mode(predictions, keepdims=True)[0][0]
    
    elif method == 'weighted' and confidences is not None:
        weighted_votes = {}
        for pred, conf in zip(predictions, confidences):
            weighted_votes[pred] = weighted_votes.get(pred, 0) + conf
        return max(weighted_votes, key=weighted_votes.get)
    
    elif method == 'confident' and confidences is not None:
        threshold = 0.7
        confident_preds = [p for p, c in zip(predictions, confidences) if c >= threshold]
        if confident_preds:
            return stats.mode(confident_preds, keepdims=True)[0][0]
        return stats.mode(predictions, keepdims=True)[0][0]
    
    return stats.mode(predictions, keepdims=True)[0][0]

In [None]:
param_grids = {
    'Random Forest': {
        'n_estimators': [300],
        'max_depth': [20],
        'min_samples_split': [2],
        'min_samples_leaf': [1],
        'max_features': ['sqrt']
    },
    
    'Gradient Boosting': {
        'n_estimators': [200],
        'learning_rate': [0.01, 0.1],
        'max_depth': [3, 5],
        'subsample': [0.8],
        'min_samples_split': [2]
    }
}

In [21]:
def train_with_grid_search(X_train, y_train, X_val, y_val, model_name, base_model, param_grid):
    print(f"Grid Search: {model_name}")
    print(f"{'-'*70}")
    print(f"Testing {np.prod([len(v) for v in param_grid.values()])} combinations...")
    
    cv = StratifiedKFold(n_splits=5, shuffle=True, random_state=config.RANDOM_STATE)
    
    grid_search = GridSearchCV(base_model, param_grid, cv=cv, scoring='accuracy', n_jobs=-1, verbose=1)
    
    grid_search.fit(X_train, y_train)
    
    print(f"\n Best parameters: {grid_search.best_params_}")
    print(f"Best CV score: {grid_search.best_score_:.4f}")
    
    val_score = grid_search.score(X_val, y_val)
    print(f"Validation score: {val_score:.4f}")
    
    return grid_search.best_estimator_, grid_search.best_params_, grid_search.best_score_


def evaluate_windowing_strategies(model, X, y, label_encoder, window_sizes=[10, 50, 100, None]):
    print("WINDOWING STRATEGY COMPARISON")
    print(f"{'-'*70}")
    
    results = {}
    
    for window_size in window_sizes:
        if window_size is None:
            preds = model.predict(X)
            final_pred = stats.mode(preds, keepdims=True)[0][0]
            accuracy = accuracy_score(y, [final_pred] * len(y))
        else:
            if len(X) >= window_size:
                predictions, confidences = create_windowed_predictions(model, X, window_size)
                final_pred = aggregate_predictions(predictions, confidences, 'weighted')
                accuracy = accuracy_score(y[:len(predictions)], predictions)
            else:
                preds = model.predict(X)
                final_pred = stats.mode(preds, keepdims=True)[0][0]
                accuracy = accuracy_score(y, [final_pred] * len(y))
        
        window_name = f"Window {window_size}" if window_size else "Full Sequence"
        results[window_name] = accuracy
        print(f"{window_name:20s}: {accuracy:.4f}")
    
    return results

In [36]:
print("EMOTION DETECTION MODEL TRAINING")
print("-"*70)

# 1. Load data
BEST_FEATURE_SET = "Gaze_Pose_KeyBlendshapes"
final_features = config.FEATURE_SETS[BEST_FEATURE_SET]

loader = DataLoader(config.CSV_DIR, final_features)
X, y, file_stats = loader.load_all_csvs()

if X is None:
    print("Failed to load data. Exiting.")
else:
    print(f"Loaded {len(X):,} samples")

label_encoder = LabelEncoder()
y_encoded = label_encoder.fit_transform(y)

EMOTION DETECTION MODEL TRAINING
----------------------------------------------------------------------
Skipping grant_neutral_1_processed.csv - missing: ['bs_eyeBlinkLeft', 'bs_eyeBlinkRight', 'bs_eyeSquintLeft', 'bs_eyeSquintRight', 'bs_eyeWideLeft', 'bs_eyeWideRight', 'bs_jawOpen', 'bs_mouthSmileLeft', 'bs_mouthSmileRight', 'bs_mouthFrownLeft', 'bs_mouthFrownRight', 'bs_mouthPucker', 'bs_mouthFunnel', 'bs_mouthStretchLeft', 'bs_mouthStretchRight', 'bs_browDownLeft', 'bs_browDownRight', 'bs_browInnerUp', 'bs_browOuterUpLeft', 'bs_browOuterUpRight']
Skipping grant_neutral_5_processed.csv - missing: ['bs_eyeBlinkLeft', 'bs_eyeBlinkRight', 'bs_eyeSquintLeft', 'bs_eyeSquintRight', 'bs_eyeWideLeft', 'bs_eyeWideRight', 'bs_jawOpen', 'bs_mouthSmileLeft', 'bs_mouthSmileRight', 'bs_mouthFrownLeft', 'bs_mouthFrownRight', 'bs_mouthPucker', 'bs_mouthFunnel', 'bs_mouthStretchLeft', 'bs_mouthStretchRight', 'bs_browDownLeft', 'bs_browDownRight', 'bs_browInnerUp', 'bs_browOuterUpLeft', 'bs_browOuter

In [26]:
X_train, X_temp, y_train, y_temp = train_test_split(
    X, y_encoded,
    test_size=config.TEST_SIZE + config.VAL_SIZE,
    stratify=y_encoded,
    random_state=config.RANDOM_STATE
)

val_ratio = config.VAL_SIZE / (config.TEST_SIZE + config.VAL_SIZE)
X_val, X_test, y_val, y_test = train_test_split(
    X_temp, y_temp,
    test_size=1 - val_ratio,
    stratify=y_temp,
    random_state=config.RANDOM_STATE
)

scaler = StandardScaler()
X_train_scaled = scaler.fit_transform(X_train)
X_val_scaled   = scaler.transform(X_val)
X_test_scaled  = scaler.transform(X_test)

print(f"\n Data split complete:")
print(f"  Training:   {len(X_train):6,d}")
print(f"  Validation: {len(X_val):6,d}")
print(f"  Test:       {len(X_test):6,d}")



 Data split complete:
  Training:   37,639
  Validation:  5,377
  Test:       10,755


In [None]:
models = {
    'Random Forest': RandomForestClassifier(random_state=config.RANDOM_STATE, n_jobs=-1),
    'Gradient Boosting': GradientBoostingClassifier(random_state=config.RANDOM_STATE)
}

trained_models = {}

for name, base_model in models.items():
    best_model, best_params, best_cv_score = train_with_grid_search(X_train_scaled, y_train, X_val_scaled, y_val,
                                                                    name, base_model, param_grids[name])
    
    test_score = best_model.score(X_test_scaled, y_test)
    
    trained_models[name] = {
        'model': best_model,
        'params': best_params,
        'cv_score': best_cv_score,
        'test_score': test_score
    }


Grid Search: Random Forest
----------------------------------------------------------------------
Testing 1 combinations...
Fitting 5 folds for each of 1 candidates, totalling 5 fits



 Best parameters: {'max_depth': 20, 'max_features': 'sqrt', 'min_samples_leaf': 1, 'min_samples_split': 2, 'n_estimators': 300}
Best CV score: 0.9709
Validation score: 0.9751
Grid Search: Gradient Boosting
----------------------------------------------------------------------
Testing 8 combinations...
Fitting 5 folds for each of 8 candidates, totalling 40 fits


KeyboardInterrupt: 

In [34]:
best_model_name = max(trained_models, key=lambda x: trained_models[x]['test_score'])
best_model = trained_models[best_model_name]['model']
best_test_acc = trained_models[best_model_name]['test_score']

print(f" BEST MODEL: {best_model_name}")
print("-"*70)
print(f"Test Accuracy: {best_test_acc:.4f}")

# Evaluate
y_pred = best_model.predict(X_test_scaled)
print("\n Classification Report:")
print(classification_report(y_test, y_pred, target_names=label_encoder.classes_))


 BEST MODEL: Random Forest
----------------------------------------------------------------------
Test Accuracy: 0.9759

 Classification Report:
              precision    recall  f1-score   support

       angry       0.99      0.95      0.97      1424
     disgust       0.96      0.98      0.97      1588
        fear       0.98      0.97      0.98      1481
       happy       0.99      0.98      0.99      1724
     neutral       0.97      0.99      0.98      1374
         sad       0.97      0.99      0.98      1586
    surprise       0.98      0.97      0.98      1578

    accuracy                           0.98     10755
   macro avg       0.98      0.98      0.98     10755
weighted avg       0.98      0.98      0.98     10755

