In [1]:
import os
import sys
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
import joblib
from pathlib import Path
from typing import List, Dict, Any
import warnings
warnings.filterwarnings('ignore')

# Set up paths
project_root = os.path.abspath(os.path.join(os.getcwd(), '..', '..'))
if project_root not in sys.path:
    sys.path.insert(0, project_root)
    

# Import your classes
from data.collection.models import TrainingDataSample, ClassificationResult
from data.collection.config import EventCategory
from ml_classifier import CelebrityMLClassifier, ModelMetrics, TrainingConfig

In [2]:
# Set plotting style
plt.style.use('seaborn-v0_8')
sns.set_palette("husl")

In [9]:
models_dir = 'models'
models_dir = Path(models_dir) if models_dir else Path(project_root) / "models"
        
def load_model(model_name):
    """Load all .pkl models from the models directory"""
    print(f"Loading models from: {models_dir}")
    model_path = Path(models_dir, model_name)
    
    if not model_path.exists():
        print(f"Models directory not found: {model_path}")
        return

    try:
        print(f"Loading {model_name}...")
        model_data = joblib.load(model_path)
        
        if 'trained_at' in model_data:
            print(f"   Trained at: {model_data['trained_at']}")
        if 'config' in model_data:
            print(f"   Model type: {model_data['config'].model_type}")
        
        return model_data
    except Exception as e:
        print(f"❌ Failed to load {model_name}: {e}")
    
    
logistic_regression = load_model('celebrity_classifier_logistic_regression.pkl')
naive_bayes = load_model('celebrity_classifier_naive_bayes.pkl')
random_forest = load_model('celebrity_classifier_random_forest.pkl')


Loading models from: models
Loading celebrity_classifier_logistic_regression.pkl...
   Trained at: 2025-06-28 02:34:09.103122
   Model type: logistic_regression
Loading models from: models
Loading celebrity_classifier_naive_bayes.pkl...
   Trained at: 2025-06-28 02:34:09.024826
   Model type: naive_bayes
Loading models from: models
Loading celebrity_classifier_random_forest.pkl...
   Trained at: 2025-06-28 02:34:08.969200
   Model type: random_forest


AttributeError: 'dict' object has no attribute 'config'

In [None]:
def create_comparison_dataframe(model_metrics) -> pd.DataFrame:
    """Create a DataFrame comparing all models"""
    comparison_data = []
    
    for model_name, metrics in model_metrics.items():
        model_config = loaded_models[model_name].get('config')
        
        row = {
            'Model Name': model_name,
            'Model Type': model_config.model_type if model_config else 'Unknown',
            'Accuracy': metrics.accuracy,
            'Training Samples': metrics.training_samples,
            'Test Samples': metrics.test_samples,
            'CV Mean': np.mean(metrics.cross_val_scores),
            'CV Std': np.std(metrics.cross_val_scores)
        }
        
        # Add per-class F1 scores
        for category in EventCategory:
            if category.value in metrics.f1_score:
                row[f'F1_{category.value}'] = metrics.f1_score[category.value]
        
        comparison_data.append(row)
    
    return pd.DataFrame(comparison_data)