In [1]:
# Cell 1: Importing Required Libraries

import sys
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
from pathlib import Path
import json
import time
from itertools import product
import warnings
warnings.filterwarnings('ignore')

# Adding src directory to path
sys.path.append('../src')

# Importing custom utilities
from config import *
from data_utils import retrieve_processed_datasets, load_resampled_data
from model_utils import (
    initialize_all_models,
    train_single_model,
    evaluate_model_predictions,
    perform_cross_validation,
    save_trained_model
)
from evaluation_utils import (
    calculate_all_metrics,
    create_confusion_matrix,
    perform_statistical_significance_test,
    print_statistical_test_results,
    create_metrics_summary_table
)

# Setting visualization style
plt.style.use('seaborn-v0_8-darkgrid')
sns.set_palette("Set2")

# Display settings
pd.set_option('display.max_columns', None)
pd.set_option('display.precision', 4)

print("All libraries imported successfully!")
print(f"Working Directory: {Path.cwd()}")
print(f"Random Seed: {SEED_VALUE}")

All libraries imported successfully!
Working Directory: C:\Users\Ashutosh\Documents\Projects\beyond-smote-evaluation\notebooks
Random Seed: 42


In [2]:
# Cell 2: Load Test Data (Unchanged)
# Loading test data which remains constant across all experiments
# One thing to remember is that test set is NEVER resampled

print("="*70)
print("LOADING TEST DATA")
print("="*70)

# Loading original processed datasets
X_train_original, X_test, y_train_original, y_test = retrieve_processed_datasets(
    file_prefix='higgs'
)

print("\nTest set loaded (unchanged across all experiments):")
print(f"  Test features shape: {X_test.shape}")
print(f"  Test labels shape: {y_test.shape}")

# Displaying test set distribution
test_dist = y_test.value_counts().sort_index()
print(f"\nTest Set Class Distribution:")
for class_label, count in test_dist.items():
    percentage = (count / len(y_test)) * 100
    print(f"  Class {int(class_label)}: {count:,} ({percentage:.2f}%)")

print("\nNote: This test set will be used to evaluate ALL 55 experiments")

LOADING TEST DATA
Loading processed data from C:\Users\Ashutosh\Documents\Projects\beyond-smote-evaluation\data\processed...
Datasets loaded successfully
Training shape: (800000, 28)
Testing shape: (200000, 28)

Test set loaded (unchanged across all experiments):
  Test features shape: (200000, 28)
  Test labels shape: (200000,)

Test Set Class Distribution:
  Class 0: 94,065 (47.03%)
  Class 1: 105,935 (52.97%)

Note: This test set will be used to evaluate ALL 55 experiments


In [3]:
# Cell 3: Load Baseline Results
# Purpose: Loading baseline performance for comparison

print("="*70)
print("LOADING BASELINE RESULTS")
print("="*70)

# Loading baseline metrics
baseline_metrics_path = METRIC_OUTPUT / 'baseline' / 'baseline_metrics.json'

with open(baseline_metrics_path, 'r', encoding='utf-8') as f:
    baseline_metrics = json.load(f)

print("\nBaseline metrics loaded successfully!")
print(f"Number of baseline models: {len(baseline_metrics)}")

# Displaying baseline F1-scores
print("\nBaseline F1-Scores (No Resampling):")
for model_name, metrics in baseline_metrics.items():
    print(f"  {model_name.replace('_', ' ').title()}: {metrics['f1_score']:.4f}")

# Identifying best baseline
best_baseline_model = max(baseline_metrics.items(), 
                          key=lambda x: x[1]['f1_score'])
print(f"\nBest Baseline: {best_baseline_model[0]} (F1={best_baseline_model[1]['f1_score']:.4f})")

LOADING BASELINE RESULTS

Baseline metrics loaded successfully!
Number of baseline models: 5

Baseline F1-Scores (No Resampling):
  Logistic Regression: 0.6869
  Random Forest: 0.7493
  Xgboost: 0.7468
  Svm: 0.4500
  Mlp: 0.7701

Best Baseline: mlp (F1=0.7701)


In [4]:
# Cell 4: Load Resampling Statistics
# Loading resampling method information from previous notebook (03) 
# Statistics for each resampling method

print("="*70)
print("LOADING RESAMPLING STATISTICS")
print("="*70)

# Loading resampling statistics
resampling_stats_path = METRIC_OUTPUT / 'resampling' / 'resampling_statistics.json'

with open(resampling_stats_path, 'r', encoding='utf-8') as f:
    resampling_stats = json.load(f)

print("\nResampling statistics loaded successfully!")
print(f"Number of resampling methods: {len(resampling_stats)}")

print("\nResampling Methods Available:")
for i, method_name in enumerate(resampling_stats.keys(), 1):
    method_info = resampling_stats[method_name]
    print(f"  {i:2d}. {method_name.replace('_', ' ').title()}: "
          f"{method_info['n_samples']:,} samples "
          f"(Ratio: {method_info['imbalance_ratio']:.3f}:1)")


LOADING RESAMPLING STATISTICS

Resampling statistics loaded successfully!
Number of resampling methods: 11

Resampling Methods Available:
   1. Baseline: 800,000 samples (Ratio: 0.888:1)
   2. Random Oversampling: 847,476 samples (Ratio: 1.000:1)
   3. Smote: 847,476 samples (Ratio: 1.000:1)
   4. Borderline Smote: 847,476 samples (Ratio: 1.000:1)
   5. Adasyn: 847,476 samples (Ratio: 1.000:1)
   6. Random Undersampling: 752,524 samples (Ratio: 1.000:1)
   7. Tomek Links: 751,637 samples (Ratio: 0.998:1)
   8. Nearmiss: 752,524 samples (Ratio: 1.000:1)
   9. Smote Tomek: 771,632 samples (Ratio: 1.000:1)
  10. Smote Enn: 276,095 samples (Ratio: 0.858:1)
  11. Class Weighting: 800,000 samples (Ratio: 0.888:1)


In [5]:
# Cell 5: Defining Experiment Configuration
# Setting up experimental design and tracking structure
# Experiment matrix and results storage

print("="*70)
print("EXPERIMENT CONFIGURATION")
print("="*70)

# Defining resampling methods to test
resampling_methods = list(resampling_stats.keys())

# Defining models to test
model_names = ['logistic_regression', 'random_forest', 'xgboost', 'svm', 'mlp']

# Computing total experiments
total_experiments = len(resampling_methods) * len(model_names)

print(f"\nExperimental Design:")
print(f"  Resampling Methods: {len(resampling_methods)}")
print(f"  ML Models: {len(model_names)}")
print(f"  Total Experiments: {total_experiments}")
print(f"  Cross-Validation Folds: {FOLD_COUNT}")
print(f"  Total Model Trainings: {total_experiments * FOLD_COUNT}")

print(f"\nResampling Methods:")
for method in resampling_methods:
    print(f"  - {method}")

print(f"\nML Models:")
for model in model_names:
    print(f"  - {model}")

# Initializing results storage
experiment_results = []

print("\nExperiment tracking initialized")

EXPERIMENT CONFIGURATION

Experimental Design:
  Resampling Methods: 11
  ML Models: 5
  Total Experiments: 55
  Cross-Validation Folds: 5
  Total Model Trainings: 275

Resampling Methods:
  - baseline
  - random_oversampling
  - smote
  - borderline_smote
  - adasyn
  - random_undersampling
  - tomek_links
  - nearmiss
  - smote_tomek
  - smote_enn
  - class_weighting

ML Models:
  - logistic_regression
  - random_forest
  - xgboost
  - svm
  - mlp

Experiment tracking initialized


In [6]:
# Cell 6: Main Experiment Loop - Part 1 (Baseline + ROS + SMOTE)
# Running first 3 resampling methods × 5 models = 15 experiments
# Methods: baseline, random_oversampling, smote


print("="*70)
print("STARTING MAIN EXPERIMENTS - PART 1")
print("="*70)
print("Processing: Baseline, Random Oversampling, SMOTE")
print(f"Experiments in this batch: 15")
print("="*70)

# Methods for this batch
batch_1_methods = ['baseline', 'random_oversampling', 'smote']

experiment_counter = 0

for method_name in batch_1_methods:
    print(f"\n{'='*70}")
    print(f"RESAMPLING METHOD: {method_name.upper()}")
    print(f"{'='*70}")
    
    # Loading resampled data
    if method_name == 'class_weighting':
        # Class weighting uses original data
        X_resampled = X_train_original.copy()
        y_resampled = y_train_original.copy()
        use_weights = True
        class_weights = resampling_stats[method_name].get('weights', None)
    else:
        # Loading resampled dataset
        X_resampled, y_resampled = load_resampled_data(
            method_name=method_name,
            data_dir=RESAMPLED_DIR
        )
        use_weights = False
        class_weights = None
    
    print(f"\nDataset loaded: {len(X_resampled):,} samples")
    
    # Training all models on this resampled data
    for model_name in model_names:
        experiment_counter += 1
        
        print(f"\n{'-'*70}")
        print(f"Experiment {experiment_counter}/{total_experiments}: "
              f"{method_name} + {model_name}")
        print(f"{'-'*70}")
        
        # Recording experiment start time
        exp_start_time = time.time()
        
        try:
            # Initializing model
            models = initialize_all_models(
                use_class_weights=use_weights,
                class_weight_dict=class_weights
            )
            model = models[model_name]
            
            # Training model
            trained_model, train_time = train_single_model(
                model=model,
                X_train=X_resampled,
                y_train=y_resampled,
                model_name=f"{method_name}_{model_name}",
                verbose=False
            )
            
            # Evaluating on test set
            eval_results = evaluate_model_predictions(
                model=trained_model,
                X_test=X_test,
                y_test=y_test,
                model_name=f"{method_name}_{model_name}",
                verbose=False
            )
            
            # Computing metrics
            metrics = calculate_all_metrics(
                y_true=y_test,
                y_pred=eval_results['y_pred'],
                y_proba=eval_results['y_proba']
            )
            
            # Computing total experiment time
            exp_time = time.time() - exp_start_time
            
            # Storing results
            result_row = {
                'resampling_method': method_name,
                'model': model_name,
                'accuracy': metrics['accuracy'],
                'precision': metrics['precision'],
                'recall': metrics['recall'],
                'f1_score': metrics['f1_score'],
                'auc_roc': metrics['auc_roc'],
                'auc_pr': metrics['auc_pr'],
                'g_mean': metrics['g_mean'],
                'mcc': metrics['mcc'],
                'training_time': train_time,
                'experiment_time': exp_time,
                'dataset_size': len(X_resampled),
                'status': 'success'
            }
            
            experiment_results.append(result_row)
            
            print(f"Status: SUCCESS")
            print(f"F1-Score: {metrics['f1_score']:.4f}")
            print(f"Time: {exp_time:.2f}s")
            
        except Exception as e:
            print(f"Status: FAILED - {str(e)}")
            
            # Recording failure
            result_row = {
                'resampling_method': method_name,
                'model': model_name,
                'status': 'failed',
                'error': str(e)
            }
            experiment_results.append(result_row)

print(f"\n{'='*70}")
print(f"BATCH 1 COMPLETED: {experiment_counter}/{total_experiments} experiments")
print(f"{'='*70}")

STARTING MAIN EXPERIMENTS - PART 1
Processing: Baseline, Random Oversampling, SMOTE
Experiments in this batch: 15

RESAMPLING METHOD: BASELINE
Loading resampled data: baseline
  Loaded 800,000 samples
  Features shape: (800000, 28)

Dataset loaded: 800,000 samples

----------------------------------------------------------------------
Experiment 1/55: baseline + logistic_regression
----------------------------------------------------------------------
Status: SUCCESS
F1-Score: 0.6869
Time: 3.63s

----------------------------------------------------------------------
Experiment 2/55: baseline + random_forest
----------------------------------------------------------------------
Status: SUCCESS
F1-Score: 0.7493
Time: 77.58s

----------------------------------------------------------------------
Experiment 3/55: baseline + xgboost
----------------------------------------------------------------------
Status: SUCCESS
F1-Score: 0.7468
Time: 4.08s

-------------------------------------------

In [7]:
# Cell 7: Main Experiment Loop - Part 2 (Borderline-SMOTE + ADASYN)
# Running next 2 resampling methods × 5 models = 10 experiments
# Methods: borderline_smote, adasyn

print("="*70)
print("STARTING MAIN EXPERIMENTS - PART 2")
print("="*70)
print("Processing: Borderline-SMOTE, ADASYN")
print(f"Experiments in this batch: 10")
print("="*70)

# Methods for this batch
batch_2_methods = ['borderline_smote', 'adasyn']

for method_name in batch_2_methods:
    print(f"\n{'='*70}")
    print(f"RESAMPLING METHOD: {method_name.upper()}")
    print(f"{'='*70}")
    
    # Loading resampled data
    X_resampled, y_resampled = load_resampled_data(
        method_name=method_name,
        data_dir=RESAMPLED_DIR
    )
    
    print(f"\nDataset loaded: {len(X_resampled):,} samples")
    
    # Training all models
    for model_name in model_names:
        experiment_counter += 1
        
        print(f"\n{'-'*70}")
        print(f"Experiment {experiment_counter}/{total_experiments}: "
              f"{method_name} + {model_name}")
        print(f"{'-'*70}")
        
        exp_start_time = time.time()
        
        try:
            models = initialize_all_models(use_class_weights=False)
            model = models[model_name]
            
            trained_model, train_time = train_single_model(
                model=model,
                X_train=X_resampled,
                y_train=y_resampled,
                model_name=f"{method_name}_{model_name}",
                verbose=False
            )
            
            eval_results = evaluate_model_predictions(
                model=trained_model,
                X_test=X_test,
                y_test=y_test,
                model_name=f"{method_name}_{model_name}",
                verbose=False
            )
            
            metrics = calculate_all_metrics(
                y_true=y_test,
                y_pred=eval_results['y_pred'],
                y_proba=eval_results['y_proba']
            )
            
            exp_time = time.time() - exp_start_time
            
            result_row = {
                'resampling_method': method_name,
                'model': model_name,
                'accuracy': metrics['accuracy'],
                'precision': metrics['precision'],
                'recall': metrics['recall'],
                'f1_score': metrics['f1_score'],
                'auc_roc': metrics['auc_roc'],
                'auc_pr': metrics['auc_pr'],
                'g_mean': metrics['g_mean'],
                'mcc': metrics['mcc'],
                'training_time': train_time,
                'experiment_time': exp_time,
                'dataset_size': len(X_resampled),
                'status': 'success'
            }
            
            experiment_results.append(result_row)
            
            print(f"Status: SUCCESS")
            print(f"F1-Score: {metrics['f1_score']:.4f}")
            print(f"Time: {exp_time:.2f}s")
            
        except Exception as e:
            print(f"Status: FAILED - {str(e)}")
            
            result_row = {
                'resampling_method': method_name,
                'model': model_name,
                'status': 'failed',
                'error': str(e)
            }
            experiment_results.append(result_row)

print(f"\n{'='*70}")
print(f"BATCH 2 COMPLETED: {experiment_counter}/{total_experiments} experiments")
print(f"{'='*70}")

STARTING MAIN EXPERIMENTS - PART 2
Processing: Borderline-SMOTE, ADASYN
Experiments in this batch: 10

RESAMPLING METHOD: BORDERLINE_SMOTE
Loading resampled data: borderline_smote
  Loaded 847,476 samples
  Features shape: (847476, 28)

Dataset loaded: 847,476 samples

----------------------------------------------------------------------
Experiment 16/55: borderline_smote + logistic_regression
----------------------------------------------------------------------
Status: SUCCESS
F1-Score: 0.6569
Time: 9.44s

----------------------------------------------------------------------
Experiment 17/55: borderline_smote + random_forest
----------------------------------------------------------------------
Status: SUCCESS
F1-Score: 0.7382
Time: 77.00s

----------------------------------------------------------------------
Experiment 18/55: borderline_smote + xgboost
----------------------------------------------------------------------
Status: SUCCESS
F1-Score: 0.7364
Time: 3.65s

------------

In [8]:
# Cell 8: Main Experiment Loop - Part 3 (Undersampling Methods)
# Running 3 undersampling methods × 5 models = 15 experiments
# Methods: random_undersampling, tomek_links, nearmiss

print("="*70)
print("STARTING MAIN EXPERIMENTS - PART 3")
print("="*70)
print("Processing: Random Undersampling, Tomek Links, NearMiss")
print(f"Experiments in this batch: 15")
print("="*70)

# Methods for this batch
batch_3_methods = ['random_undersampling', 'tomek_links', 'nearmiss']

for method_name in batch_3_methods:
    print(f"\n{'='*70}")
    print(f"RESAMPLING METHOD: {method_name.upper()}")
    print(f"{'='*70}")
    
    # Loading resampled data
    X_resampled, y_resampled = load_resampled_data(
        method_name=method_name,
        data_dir=RESAMPLED_DIR
    )
    
    print(f"\nDataset loaded: {len(X_resampled):,} samples")
    
    # Training all models
    for model_name in model_names:
        experiment_counter += 1
        
        print(f"\n{'-'*70}")
        print(f"Experiment {experiment_counter}/{total_experiments}: "
              f"{method_name} + {model_name}")
        print(f"{'-'*70}")
        
        exp_start_time = time.time()
        
        try:
            models = initialize_all_models(use_class_weights=False)
            model = models[model_name]
            
            trained_model, train_time = train_single_model(
                model=model,
                X_train=X_resampled,
                y_train=y_resampled,
                model_name=f"{method_name}_{model_name}",
                verbose=False
            )
            
            eval_results = evaluate_model_predictions(
                model=trained_model,
                X_test=X_test,
                y_test=y_test,
                model_name=f"{method_name}_{model_name}",
                verbose=False
            )
            
            metrics = calculate_all_metrics(
                y_true=y_test,
                y_pred=eval_results['y_pred'],
                y_proba=eval_results['y_proba']
            )
            
            exp_time = time.time() - exp_start_time
            
            result_row = {
                'resampling_method': method_name,
                'model': model_name,
                'accuracy': metrics['accuracy'],
                'precision': metrics['precision'],
                'recall': metrics['recall'],
                'f1_score': metrics['f1_score'],
                'auc_roc': metrics['auc_roc'],
                'auc_pr': metrics['auc_pr'],
                'g_mean': metrics['g_mean'],
                'mcc': metrics['mcc'],
                'training_time': train_time,
                'experiment_time': exp_time,
                'dataset_size': len(X_resampled),
                'status': 'success'
            }
            
            experiment_results.append(result_row)
            
            print(f"Status: SUCCESS")
            print(f"F1-Score: {metrics['f1_score']:.4f}")
            print(f"Time: {exp_time:.2f}s")
            
        except Exception as e:
            print(f"Status: FAILED - {str(e)}")
            
            result_row = {
                'resampling_method': method_name,
                'model': model_name,
                'status': 'failed',
                'error': str(e)
            }
            experiment_results.append(result_row)

print(f"\n{'='*70}")
print(f"BATCH 3 COMPLETED: {experiment_counter}/{total_experiments} experiments")
print(f"{'='*70}")

STARTING MAIN EXPERIMENTS - PART 3
Processing: Random Undersampling, Tomek Links, NearMiss
Experiments in this batch: 15

RESAMPLING METHOD: RANDOM_UNDERSAMPLING
Loading resampled data: random_undersampling
  Loaded 752,524 samples
  Features shape: (752524, 28)

Dataset loaded: 752,524 samples

----------------------------------------------------------------------
Experiment 26/55: random_undersampling + logistic_regression
----------------------------------------------------------------------
Status: SUCCESS
F1-Score: 0.6610
Time: 3.39s

----------------------------------------------------------------------
Experiment 27/55: random_undersampling + random_forest
----------------------------------------------------------------------
Status: SUCCESS
F1-Score: 0.7388
Time: 65.44s

----------------------------------------------------------------------
Experiment 28/55: random_undersampling + xgboost
----------------------------------------------------------------------
Status: SUCCESS
F1-

In [9]:
# Cell 9: Main Experiment Loop - Part 4 (Combination Methods + Class Weights)
# Purpose: Running final 3 methods × 5 models = 15 experiments
# Methods: smote_tomek, smote_enn, class_weighting
# ============================================================================

print("="*70)
print("STARTING MAIN EXPERIMENTS - PART 4 (FINAL BATCH)")
print("="*70)
print("Processing: SMOTE+Tomek, SMOTE+ENN, Class Weighting")
print(f"Experiments in this batch: 15")
print("="*70)

# Methods for this batch
batch_4_methods = ['smote_tomek', 'smote_enn', 'class_weighting']

for method_name in batch_4_methods:
    print(f"\n{'='*70}")
    print(f"RESAMPLING METHOD: {method_name.upper()}")
    print(f"{'='*70}")
    
    # Loading resampled data
    if method_name == 'class_weighting':
        X_resampled = X_train_original.copy()
        y_resampled = y_train_original.copy()
        use_weights = True
        class_weights = resampling_stats[method_name].get('weights', None)
    else:
        X_resampled, y_resampled = load_resampled_data(
            method_name=method_name,
            data_dir=RESAMPLED_DIR
        )
        use_weights = False
        class_weights = None
    
    print(f"\nDataset loaded: {len(X_resampled):,} samples")
    
    # Training all models
    for model_name in model_names:
        experiment_counter += 1
        
        print(f"\n{'-'*70}")
        print(f"Experiment {experiment_counter}/{total_experiments}: "
              f"{method_name} + {model_name}")
        print(f"{'-'*70}")
        
        exp_start_time = time.time()
        
        try:
            models = initialize_all_models(
                use_class_weights=use_weights,
                class_weight_dict=class_weights
            )
            model = models[model_name]
            
            trained_model, train_time = train_single_model(
                model=model,
                X_train=X_resampled,
                y_train=y_resampled,
                model_name=f"{method_name}_{model_name}",
                verbose=False
            )
            
            eval_results = evaluate_model_predictions(
                model=trained_model,
                X_test=X_test,
                y_test=y_test,
                model_name=f"{method_name}_{model_name}",
                verbose=False
            )
            
            metrics = calculate_all_metrics(
                y_true=y_test,
                y_pred=eval_results['y_pred'],
                y_proba=eval_results['y_proba']
            )
            
            exp_time = time.time() - exp_start_time
            
            result_row = {
                'resampling_method': method_name,
                'model': model_name,
                'accuracy': metrics['accuracy'],
                'precision': metrics['precision'],
                'recall': metrics['recall'],
                'f1_score': metrics['f1_score'],
                'auc_roc': metrics['auc_roc'],
                'auc_pr': metrics['auc_pr'],
                'g_mean': metrics['g_mean'],
                'mcc': metrics['mcc'],
                'training_time': train_time,
                'experiment_time': exp_time,
                'dataset_size': len(X_resampled),
                'status': 'success'
            }
            
            experiment_results.append(result_row)
            
            print(f"Status: SUCCESS")
            print(f"F1-Score: {metrics['f1_score']:.4f}")
            print(f"Time: {exp_time:.2f}s")
            
        except Exception as e:
            print(f"Status: FAILED - {str(e)}")
            
            result_row = {
                'resampling_method': method_name,
                'model': model_name,
                'status': 'failed',
                'error': str(e)
            }
            experiment_results.append(result_row)

print(f"\n{'='*70}")
print(f"ALL EXPERIMENTS COMPLETED: {experiment_counter}/{total_experiments}")
print(f"{'='*70}")

STARTING MAIN EXPERIMENTS - PART 4 (FINAL BATCH)
Processing: SMOTE+Tomek, SMOTE+ENN, Class Weighting
Experiments in this batch: 15

RESAMPLING METHOD: SMOTE_TOMEK
Loading resampled data: smote_tomek
  Loaded 771,632 samples
  Features shape: (771632, 28)

Dataset loaded: 771,632 samples

----------------------------------------------------------------------
Experiment 41/55: smote_tomek + logistic_regression
----------------------------------------------------------------------
Status: SUCCESS
F1-Score: 0.6612
Time: 3.48s

----------------------------------------------------------------------
Experiment 42/55: smote_tomek + random_forest
----------------------------------------------------------------------
Status: SUCCESS
F1-Score: 0.7406
Time: 69.86s

----------------------------------------------------------------------
Experiment 43/55: smote_tomek + xgboost
----------------------------------------------------------------------
Status: SUCCESS
F1-Score: 0.7392
Time: 3.39s

--------

In [10]:
# Cell 9b: Re-runnig Failed Class Weighting Experiments
# Fixing the 3 failed class_weighting experiments

print("="*70)
print("RE-RUNNING FAILED CLASS WEIGHTING EXPERIMENTS")
print("="*70)

# Loading original training data
X_resampled = X_train_original.copy()
y_resampled = y_train_original.copy()

# Recalculating class weights with correct integer keys
from sklearn.utils.class_weight import compute_class_weight
classes = np.unique(y_train_original)
weights = compute_class_weight('balanced', classes=classes, y=y_train_original)
class_weights = dict(zip(classes, weights))

print(f"\nClass weights: {class_weights}")
print(f"Dataset: {len(X_resampled):,} samples")

# Models that failed
failed_models = ['logistic_regression', 'random_forest', 'svm']

for model_name in failed_models:
    print(f"\n{'-'*70}")
    print(f"Re-running: class_weighting + {model_name}")
    print(f"{'-'*70}")
    
    exp_start_time = time.time()
    
    try:
        # Initialize model with class weights
        models = initialize_all_models(
            use_class_weights=True,
            class_weight_dict=class_weights
        )
        model = models[model_name]
        
        # Train
        trained_model, train_time = train_single_model(
            model=model,
            X_train=X_resampled,
            y_train=y_resampled,
            model_name=f"class_weighting_{model_name}",
            verbose=False
        )
        
        # Evaluate
        eval_results = evaluate_model_predictions(
            model=trained_model,
            X_test=X_test,
            y_test=y_test,
            model_name=f"class_weighting_{model_name}",
            verbose=False
        )
        
        # Metrics
        metrics = calculate_all_metrics(
            y_true=y_test,
            y_pred=eval_results['y_pred'],
            y_proba=eval_results['y_proba']
        )
        
        exp_time = time.time() - exp_start_time
        
        # Update the failed result in experiment_results
        for i, result in enumerate(experiment_results):
            if (result['resampling_method'] == 'class_weighting' and 
                result['model'] == model_name and 
                result.get('status') == 'failed'):
                
                # Replace failed result with successful one
                experiment_results[i] = {
                    'resampling_method': 'class_weighting',
                    'model': model_name,
                    'accuracy': metrics['accuracy'],
                    'precision': metrics['precision'],
                    'recall': metrics['recall'],
                    'f1_score': metrics['f1_score'],
                    'auc_roc': metrics['auc_roc'],
                    'auc_pr': metrics['auc_pr'],
                    'g_mean': metrics['g_mean'],
                    'mcc': metrics['mcc'],
                    'training_time': train_time,
                    'experiment_time': exp_time,
                    'dataset_size': len(X_resampled),
                    'status': 'success'
                }
                break
        
        print(f"Status: SUCCESS")
        print(f"F1-Score: {metrics['f1_score']:.4f}")
        print(f"Time: {exp_time:.2f}s")
        
    except Exception as e:
        print(f"Status: STILL FAILED - {str(e)}")

print(f"\n{'='*70}")
print("CLASS WEIGHTING EXPERIMENTS FIXED!")
print(f"{'='*70}")

RE-RUNNING FAILED CLASS WEIGHTING EXPERIMENTS

Class weights: {np.int64(0): np.float64(1.063089017758902), np.int64(1): np.float64(0.9439795345236915)}
Dataset: 800,000 samples

----------------------------------------------------------------------
Re-running: class_weighting + logistic_regression
----------------------------------------------------------------------
Status: SUCCESS
F1-Score: 0.6608
Time: 3.80s

----------------------------------------------------------------------
Re-running: class_weighting + random_forest
----------------------------------------------------------------------
Status: SUCCESS
F1-Score: 0.7430
Time: 71.76s

----------------------------------------------------------------------
Re-running: class_weighting + svm
----------------------------------------------------------------------
Status: SUCCESS
F1-Score: 0.0000
Time: 1137.79s

CLASS WEIGHTING EXPERIMENTS FIXED!
