# Evaluation & Comparison

Notebook này thực hiện:
1. Kết hợp kết quả từ tất cả models
2. Xuất bảng metrics metrics.csv
3. Lưu confusion matrix và ROC curves vào thư mục results/
4. Review toàn bộ pipeline và dữ liệu


In [None]:
# Import libraries
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
import warnings
import os
import sys
import pickle
from pathlib import Path
warnings.filterwarnings('ignore')

# Set style
sns.set_style('whitegrid')
plt.rcParams['figure.figsize'] = (12, 6)

# Get project root directory
current_dir = Path.cwd()
project_root = current_dir
if (current_dir / 'src').exists():
    project_root = current_dir
elif (current_dir.parent / 'src').exists():
    project_root = current_dir.parent
else:
    project_root = current_dir

# Add project root to path
project_root_str = str(project_root.absolute())
if project_root_str not in sys.path:
    sys.path.insert(0, project_root_str)

# Import evaluation functions
from src.evaluate import (
    evaluate_model, get_metrics_dict, export_metrics_to_csv,
    plot_metrics_comparison, compare_models_roc, print_metrics
)

print(f"Project root: {project_root.absolute()}")
print("Libraries imported successfully!")


## 1. Load Results từ tất cả Models

Load predictions và metrics từ các models đã train trong notebook 02.


In [None]:
# Load test data for final evaluation
# Note: In practice, you would load preprocessed data from notebook 01
# For this notebook, we'll reload and preprocess

data_path = project_root / 'data' / 'creditcard.csv'
df = pd.read_csv(data_path)

# Prepare features and target
feature_cols = [f'V{i}' for i in range(1, 29)] + ['Amount']
X = df[feature_cols]
y = df['Class']

# Load scaler if available, otherwise scale again
scaler_path = project_root / 'models' / 'scaler.pkl'
if scaler_path.exists():
    with open(scaler_path, 'rb') as f:
        scaler = pickle.load(f)
    X_scaled, _ = scale_features(X, feature_cols=feature_cols, scaler=scaler, fit=False)
    print("✓ Loaded scaler from file")
else:
    from src.data_preprocessing import scale_features
    X_scaled, scaler = scale_features(X, feature_cols=feature_cols, fit=True)
    print("✓ Created new scaler")

# Split data (same random_state as notebook 01)
from src.data_preprocessing import split_data
X_train, X_val, X_test, y_train, y_val, y_test = split_data(
    X_scaled, y, 
    test_size=0.15, 
    val_size=0.15, 
    random_state=42
)

print(f"\nTest set: {X_test.shape[0]:,} samples")
print(f"Test set class distribution: {y_test.value_counts().to_dict()}")


In [None]:
# Load models and get predictions
# Note: In practice, models would be loaded from saved pickle files
# For demonstration, we'll train small models here
# In real scenario, you would load from: models/random_forest.pkl, models/adaboost.pkl, etc.

from sklearn.linear_model import LogisticRegression
from sklearn.ensemble import RandomForestClassifier, AdaBoostClassifier
from src.data_preprocessing import get_class_weights

# Get class weights
class_weights = get_class_weights(y_train)

# Dictionary to store all results
all_results = {
    'y_true': y_test,  # Use test set for final evaluation
    'predictions': {},
    'probabilities': {},
    'metrics': []
}

print("Training models for evaluation...")
print("(In practice, these would be loaded from saved files)\n")


In [None]:
# Model 1: Logistic Regression
print("Training Logistic Regression...")
lr_model = LogisticRegression(class_weight=class_weights, random_state=42, max_iter=1000)
lr_model.fit(X_train, y_train)
all_results['predictions']['Logistic Regression'] = lr_model.predict(X_test)
all_results['probabilities']['Logistic Regression'] = lr_model.predict_proba(X_test)[:, 1]

# Model 2: Random Forest
print("Training Random Forest...")
rf_model = RandomForestClassifier(
    n_estimators=100,
    max_depth=15,
    class_weight=class_weights,
    random_state=42,
    n_jobs=-1
)
rf_model.fit(X_train, y_train)
all_results['predictions']['Random Forest'] = rf_model.predict(X_test)
all_results['probabilities']['Random Forest'] = rf_model.predict_proba(X_test)[:, 1]

# Model 3: AdaBoost
print("Training AdaBoost...")
ada_model = AdaBoostClassifier(n_estimators=50, random_state=42)
ada_model.fit(X_train, y_train)
all_results['predictions']['AdaBoost'] = ada_model.predict(X_test)
all_results['probabilities']['AdaBoost'] = ada_model.predict_proba(X_test)[:, 1]

print("\n✓ All models trained and predictions generated")


## 2. Kết hợp và So sánh Metrics


In [None]:
# Calculate metrics for all models
y_true = all_results['y_true']

for model_name in all_results['predictions'].keys():
    y_pred = all_results['predictions'][model_name]
    y_pred_proba = all_results['probabilities'][model_name]
    
    metrics = get_metrics_dict(y_true, y_pred, y_pred_proba, model_name)
    all_results['metrics'].append(metrics)
    print_metrics(y_true, y_pred, y_pred_proba, model_name)

# Create comparison DataFrame
metrics_df = pd.DataFrame(all_results['metrics'])
print("\n" + "="*70)
print("Metrics Comparison - All Models")
print("="*70)
print(metrics_df.to_string(index=False))
print("="*70)


In [None]:
# Visualize metrics comparison
fig = plot_metrics_comparison(all_results['metrics'], figsize=(14, 7))
plt.show()


## 3. Xuất metrics.csv


In [None]:
# Export metrics to CSV
metrics_csv_path = project_root / 'results' / 'metrics.csv'
export_metrics_to_csv(all_results['metrics'], metrics_csv_path)

# Display the saved CSV
print("\nSaved metrics:")
print(pd.read_csv(metrics_csv_path).to_string(index=False))


## 4. Lưu Confusion Matrices


In [None]:
# Create directory for confusion matrices
confusion_matrix_dir = project_root / 'results' / 'confusion_matrices'
confusion_matrix_dir.mkdir(parents=True, exist_ok=True)

# Generate and save confusion matrices for all models
from src.evaluate import plot_confusion_matrix

for model_name in all_results['predictions'].keys():
    y_pred = all_results['predictions'][model_name]
    save_path = confusion_matrix_dir / f'{model_name.replace(" ", "_")}_confusion_matrix.png'
    
    fig = plot_confusion_matrix(
        y_true, y_pred, 
        model_name=model_name,
        save_path=save_path
    )
    plt.show()
    plt.close(fig)

print(f"\n✓ All confusion matrices saved to {confusion_matrix_dir}")


## 5. Lưu ROC Curves


In [None]:
# Prepare data for ROC comparison
y_true_dict = {model_name: y_true for model_name in all_results['probabilities'].keys()}
y_pred_proba_dict = all_results['probabilities']

# Plot ROC curves comparison
roc_save_path = project_root / 'results' / 'roc_curves_comparison.png'
fig = compare_models_roc(y_true_dict, y_pred_proba_dict, roc_save_path, figsize=(12, 8))
plt.show()
plt.close(fig)

print(f"\n✓ ROC curves comparison saved to {roc_save_path}")


## 6. Tổng hợp và So sánh

Review toàn bộ pipeline và dữ liệu.


In [None]:
# Summary statistics
print("="*70)
print("EVALUATION SUMMARY")
print("="*70)

print(f"\nTotal models evaluated: {len(all_results['metrics'])}")
print(f"Test set size: {len(y_true):,} samples")
print(f"Test set fraud cases: {y_true.sum():,} ({y_true.sum()/len(y_true)*100:.2f}%)")

print("\n" + "-"*70)
print("Best Model by Metric:")
print("-"*70)

metrics_df = pd.DataFrame(all_results['metrics'])

for metric in ['accuracy', 'precision', 'recall', 'f1', 'auc']:
    if metric in metrics_df.columns and metrics_df[metric].notna().any():
        best_idx = metrics_df[metric].idxmax()
        best_model = metrics_df.loc[best_idx, 'model']
        best_value = metrics_df.loc[best_idx, metric]
        print(f"{metric.capitalize():12s}: {best_model:25s} ({best_value:.4f})")

print("\n" + "="*70)
print("Pipeline Review:")
print("="*70)
print("✓ Data preprocessing: Scaling, train/val/test split completed")
print("✓ Class imbalance handling: Class weights applied")
print("✓ Models trained and evaluated on test set")
print("✓ Metrics calculated and exported to CSV")
print("✓ Confusion matrices saved")
print("✓ ROC curves comparison saved")
print("="*70)


In [None]:
# Display final metrics table
print("\nFinal Metrics Table:")
print(metrics_df.to_string(index=False))

# Save comparison chart
comparison_chart_path = project_root / 'results' / 'metrics_comparison.png'
fig = plot_metrics_comparison(all_results['metrics'], comparison_chart_path, figsize=(14, 7))
plt.show()
plt.close(fig)

print(f"\n✓ Metrics comparison chart saved to {comparison_chart_path}")
print("\n✅ Evaluation and Comparison completed successfully!")
