In [None]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
import os
import time
import joblib
from sklearn.preprocessing import StandardScaler, RobustScaler
from sklearn.model_selection import train_test_split, RandomizedSearchCV, StratifiedKFold
from sklearn.feature_selection import VarianceThreshold, SelectKBest, mutual_info_classif
from sklearn.metrics import accuracy_score, confusion_matrix, classification_report, f1_score, roc_curve, auc
from sklearn.ensemble import RandomForestClassifier, GradientBoostingClassifier, VotingClassifier, ExtraTreesClassifier
from sklearn.linear_model import LogisticRegression
from xgboost import XGBClassifier
import warnings
warnings.filterwarnings('ignore')

# Set the number of cores for parallel processing
n_cores = max(1, os.cpu_count() // 2)
print(f"Using {n_cores} CPU cores for parallel processing")

# Create output directories
os.makedirs('models/session_models', exist_ok=True)
os.makedirs('session_figures', exist_ok=True)  # Separate folder for figures
os.makedirs('session_figures/confusion_matrices', exist_ok=True)
os.makedirs('session_figures/roc_curves', exist_ok=True)
os.makedirs('session_figures/feature_importance', exist_ok=True)
os.makedirs('session_figures/summaries', exist_ok=True)

# Define a function to train models for a specific session
def train_session_model(df, session_id, test_size=0.1):
    """
    Train all five models on data from a specific session with RandomizedSearchCV
    
    Parameters:
    -----------
    df : pandas DataFrame
        The full dataset
    session_id : int
        The session ID to filter by
    test_size : float
        The proportion of data to use for testing (default: 0.1 for 90-10 split)
    """
    start_time = time.time()
    
    print(f"\n{'='*80}")
    print(f"Training models for Session {session_id}")
    print(f"{'='*80}")
    
    # Filter data for this session
    session_df = df[df['ssn'] == session_id].copy()
    print(f"Session {session_id} dataset shape: {session_df.shape}")
    
    # Create binary target column if not already present
    if 'attack' not in session_df.columns:
        session_df['attack'] = (session_df['type_of_attack'] != 0).astype(int)  # 1 for attack, 0 for no attack
    
    # Display class distribution
    attack_count = session_df['attack'].sum()
    total_records = len(session_df)
    normal_count = total_records - attack_count
    print(f"Total records: {total_records}")
    print(f"Number of attacks: {attack_count}")
    print(f"Number of normal operations: {normal_count}")
    print(f"Attack percentage: {attack_count/total_records:.2%}")
    
    # If either class has too few samples, notify but continue
    if attack_count < 5 or normal_count < 5:
        print(f"WARNING: Session {session_id} has very few samples in one class.")
    
    # Define feature columns, excluding the target and direct identifiers
    feature_columns = [col for col in session_df.columns if col not in ['attack', 'type_of_attack', 'gen_attacked']]
    
    X = session_df[feature_columns]
    y = session_df['attack']
    
    # Split dataset into training and testing sets (90-10 split)
    X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=test_size, random_state=42, stratify=None)
    
    print(f"Training set shape: {X_train.shape}")
    print(f"Test set shape: {X_test.shape}")
    
    # Feature selection and preprocessing pipeline
    # 1. Remove low variance features
    print("\nApplying variance threshold...")
    variance_threshold = 0.01
    selector = VarianceThreshold(threshold=variance_threshold)
    X_train_var = selector.fit_transform(X_train)
    X_test_var = selector.transform(X_test)
    print(f"Features after variance thresholding: {X_train_var.shape[1]}")
    
    # 2. Feature selection using mutual information
    print("\nSelecting most informative features...")
    select_k = SelectKBest(mutual_info_classif, k=min(100, X_train_var.shape[1]))
    X_train_selected = select_k.fit_transform(X_train_var, y_train)
    X_test_selected = select_k.transform(X_test_var)
    print(f"Features after selection: {X_train_selected.shape[1]}")
    
    # Print top 20 feature names
    selected_indices = select_k.get_support(indices=True)
    original_indices = selector.get_support(indices=True)
    selected_names = [X.columns[original_indices[i]] for i in selected_indices[:20]]
    scores = select_k.scores_[selected_indices]
    
    print("\nTop 20 most informative features:")
    for name, score in sorted(zip(selected_names, scores), key=lambda x: x[1], reverse=True)[:20]:
        print(f"{name}: {score:.6f}")
    
    # 3. Scale features
    print("\nScaling features...")
    scaler = RobustScaler()
    X_train_scaled = scaler.fit_transform(X_train_selected)
    X_test_scaled = scaler.transform(X_test_selected)
    
    # Define a function to evaluate models
    def evaluate_model(model, X_train, X_test, y_train, y_test, model_name):
        # Create session-specific filename prefix
        prefix = f"ssn{session_id}_{model_name.replace(' ', '_').lower()}"
        
        # Train the model
        model.fit(X_train, y_train)
        
        # Make predictions
        y_pred = model.predict(X_test)
        
        # Calculate metrics
        accuracy = accuracy_score(y_test, y_pred)
        f1 = f1_score(y_test, y_pred, zero_division=0)
        
        # Print results
        print(f"\n{model_name} Results:")
        print(f"Accuracy: {accuracy:.4f}")
        print(f"F1 Score: {f1:.4f}")
        print("Confusion Matrix:")
        cm = confusion_matrix(y_test, y_pred)
        print(cm)
        print("Classification Report:")
        print(classification_report(y_test, y_pred, zero_division=0))
        
        # Plot confusion matrix
        plt.figure(figsize=(8, 6))
        sns.heatmap(cm, annot=True, fmt='d', cmap='Blues', 
                    xticklabels=['Normal', 'Attack'],
                    yticklabels=['Normal', 'Attack'])
        plt.xlabel('Predicted')
        plt.ylabel('Actual')
        plt.title(f'Session {session_id} - Confusion Matrix - {model_name}')
        plt.savefig(f'session_figures/confusion_matrices/{prefix}_confusion_matrix.png')
        plt.close()
        
        # ROC curve and AUC (if applicable)
        if hasattr(model, "predict_proba"):
            try:
                y_proba = model.predict_proba(X_test)[:, 1]
                fpr, tpr, _ = roc_curve(y_test, y_proba)
                roc_auc = auc(fpr, tpr)
                print(f"ROC AUC: {roc_auc:.4f}")
                
                # Plot ROC curve
                plt.figure()
                plt.plot(fpr, tpr, color='darkorange', lw=2, 
                        label=f'ROC curve (area = {roc_auc:.2f})')
                plt.plot([0, 1], [0, 1], color='navy', lw=2, linestyle='--')
                plt.xlim([0.0, 1.0])
                plt.ylim([0.0, 1.05])
                plt.xlabel('False Positive Rate')
                plt.ylabel('True Positive Rate')
                plt.title(f'Session {session_id} - ROC Curve - {model_name}')
                plt.legend(loc="lower right")
                plt.savefig(f'session_figures/roc_curves/{prefix}_roc_curve.png')
                plt.close()
            except Exception as e:
                print(f"Could not calculate ROC AUC: {e}")
                
        # Feature importance
        if hasattr(model, 'feature_importances_'):
            feature_names = [X.columns[original_indices[i]] for i in selected_indices]
            importances = model.feature_importances_
            indices = np.argsort(importances)[::-1]
            
            # Plot top 20 feature importances
            plt.figure(figsize=(12, 8))
            plt.title(f'Session {session_id} - Top 20 Feature Importances - {model_name}')
            n_features = min(20, len(importances))
            plt.bar(range(n_features), 
                    importances[indices[:n_features]], align='center')
            plt.xticks(range(n_features), 
                    [feature_names[i] for i in indices[:n_features]], rotation=90)
            plt.tight_layout()
            plt.savefig(f'session_figures/feature_importance/{prefix}_feature_importance.png')
            plt.close()
            
            print("\nTop 10 most important features:")
            for i in range(min(10, len(importances))):
                print(f"{feature_names[indices[i]]}: {importances[indices[i]]:.6f}")
        
        return model, accuracy, f1

    # ====================== MODEL 1: EXTRA TREES CLASSIFIER ======================
    print("\n1. Training Extra Trees Classifier for Session", session_id)

    # Define parameter grid
    et_param_grid = {
        'n_estimators': [50, 100, 200],
        'max_depth': [None, 10, 20],
        'min_samples_split': [2, 5],
        'min_samples_leaf': [1, 2],
        'bootstrap': [True, False]
    }

    # Create base model
    et_base = ExtraTreesClassifier(random_state=42)

    # RandomizedSearchCV
    et_cv = RandomizedSearchCV(
        et_base, et_param_grid, n_iter=100, 
        cv=StratifiedKFold(n_splits=3, shuffle=True, random_state=42),
        scoring='accuracy', n_jobs=n_cores, random_state=42
    )

    # Fit on training data
    et_cv.fit(X_train_scaled, y_train)

    # Get best model
    et_best = et_cv.best_estimator_
    print(f"Best parameters: {et_cv.best_params_}")

    # Evaluate
    et_model, et_accuracy, et_f1 = evaluate_model(
        et_best, X_train_scaled, X_test_scaled, y_train, y_test, "Extra Trees"
    )

    # ====================== MODEL 2: GRADIENT BOOSTING ======================
    print("\n2. Training Gradient Boosting Classifier for Session", session_id)

    # Define parameter grid
    gb_param_grid = {
        'n_estimators': [50, 100, 200],
        'learning_rate': [0.01, 0.05, 0.1],
        'max_depth': [3, 5, 7],
        'min_samples_split': [2, 5],
        'subsample': [0.8, 0.9, 1.0]
    }

    # Create base model
    gb_base = GradientBoostingClassifier(random_state=42)

    # RandomizedSearchCV
    gb_cv = RandomizedSearchCV(
        gb_base, gb_param_grid, n_iter=100, 
        cv=StratifiedKFold(n_splits=3, shuffle=True, random_state=42),
        scoring='accuracy', n_jobs=n_cores, random_state=42
    )

    # Fit on training data
    gb_cv.fit(X_train_scaled, y_train)

    # Get best model
    gb_best = gb_cv.best_estimator_
    print(f"Best parameters: {gb_cv.best_params_}")

    # Evaluate
    gb_model, gb_accuracy, gb_f1 = evaluate_model(
        gb_best, X_train_scaled, X_test_scaled, y_train, y_test, "Gradient Boosting"
    )

    # ====================== MODEL 3: RANDOM FOREST ======================
    print("\n3. Training Random Forest Classifier for Session", session_id)

    # Define parameter grid
    rf_param_grid = {
        'n_estimators': [50, 100, 200],
        'max_depth': [None, 10, 20],
        'min_samples_split': [2, 5],
        'min_samples_leaf': [1, 2],
        'bootstrap': [True, False]
    }

    # Create base model
    rf_base = RandomForestClassifier(random_state=42)

    # RandomizedSearchCV
    rf_cv = RandomizedSearchCV(
        rf_base, rf_param_grid, n_iter=100, 
        cv=StratifiedKFold(n_splits=3, shuffle=True, random_state=42),
        scoring='accuracy', n_jobs=n_cores, random_state=42
    )

    # Fit on training data
    rf_cv.fit(X_train_scaled, y_train)

    # Get best model
    rf_best = rf_cv.best_estimator_
    print(f"Best parameters: {rf_cv.best_params_}")

    # Evaluate
    rf_model, rf_accuracy, rf_f1 = evaluate_model(
        rf_best, X_train_scaled, X_test_scaled, y_train, y_test, "Random Forest"
    )

    # ====================== MODEL 4: XGBOOST ======================
    print("\n4. Training XGBoost Classifier for Session", session_id)

    # Define parameter grid
    xgb_param_grid = {
        'n_estimators': [50, 100, 200],
        'learning_rate': [0.01, 0.05, 0.1],
        'max_depth': [3, 5, 7],
        'min_child_weight': [1, 3],
        'gamma': [0, 0.1],
        'subsample': [0.8, 0.9, 1.0],
        'colsample_bytree': [0.8, 0.9, 1.0]
    }

    # Create base model
    xgb_base = XGBClassifier(random_state=42, use_label_encoder=False, eval_metric='logloss')

    # RandomizedSearchCV
    xgb_cv = RandomizedSearchCV(
        xgb_base, xgb_param_grid, n_iter=100, 
        cv=StratifiedKFold(n_splits=3, shuffle=True, random_state=42),
        scoring='accuracy', n_jobs=n_cores, random_state=42
    )

    # Fit on training data
    xgb_cv.fit(X_train_scaled, y_train)

    # Get best model
    xgb_best = xgb_cv.best_estimator_
    print(f"Best parameters: {xgb_cv.best_params_}")

    # Evaluate
    xgb_model, xgb_accuracy, xgb_f1 = evaluate_model(
        xgb_best, X_train_scaled, X_test_scaled, y_train, y_test, "XGBoost"
    )

    # ====================== MODEL 5: LOGISTIC REGRESSION ======================
    print("\n5. Training Logistic Regression for Session", session_id)

    # Define parameter grid
    lr_param_grid = {
        'C': np.logspace(-3, 3, 7),
        'penalty': ['l1', 'l2', None],
        'solver': ['liblinear', 'saga', 'lbfgs'],
        'max_iter': [1000, 2000]
    }

    # Create base model
    lr_base = LogisticRegression(random_state=42)

    # RandomizedSearchCV
    lr_cv = RandomizedSearchCV(
        lr_base, lr_param_grid, n_iter=100, 
        cv=StratifiedKFold(n_splits=3, shuffle=True, random_state=42),
        scoring='accuracy', n_jobs=n_cores, random_state=42
    )

    # Fit on training data
    lr_cv.fit(X_train_scaled, y_train)

    # Get best model
    lr_best = lr_cv.best_estimator_
    print(f"Best parameters: {lr_cv.best_params_}")

    # Evaluate
    lr_model, lr_accuracy, lr_f1 = evaluate_model(
        lr_best, X_train_scaled, X_test_scaled, y_train, y_test, "Logistic Regression"
    )

    # ====================== ENSEMBLE MODEL: VOTING CLASSIFIER ======================
    print("\n6. Training Voting Classifier Ensemble for Session", session_id)

    # Create a dictionary of our models
    models = {
        'ExtraTrees': et_best,
        'GradientBoosting': gb_best,
        'RandomForest': rf_best,
        'XGBoost': xgb_best,
        'LogisticRegression': lr_best
    }

    # Calculate scores
    model_scores = {
        'ExtraTrees': et_f1,
        'GradientBoosting': gb_f1,
        'RandomForest': rf_f1,
        'XGBoost': xgb_f1,
        'LogisticRegression': lr_f1
    }

    # Sort by F1 score and select top 3
    top_models = sorted(model_scores.items(), key=lambda x: x[1], reverse=True)[:3]
    print(f"Top 3 models for ensemble: {[model[0] for model in top_models]}")

    # Create voting classifier with top 3 models
    estimators = [(name, models[name]) for name, _ in top_models]
    voting_clf = VotingClassifier(estimators=estimators, voting='soft')

    # Evaluate
    voting_model, voting_accuracy, voting_f1 = evaluate_model(
        voting_clf, X_train_scaled, X_test_scaled, y_train, y_test, "Voting Classifier"
    )

    # ====================== SUMMARY ======================
    print("\n=== Model Performance Summary for Session", session_id, "===")
    models_summary = {
        'Extra Trees': (et_accuracy, et_f1),
        'Gradient Boosting': (gb_accuracy, gb_f1),
        'Random Forest': (rf_accuracy, rf_f1),
        'XGBoost': (xgb_accuracy, xgb_f1),
        'Logistic Regression': (lr_accuracy, lr_f1),
        'Voting Classifier': (voting_accuracy, voting_f1)
    }

    # Sort by F1 score
    sorted_models = sorted(models_summary.items(), key=lambda x: x[1][1], reverse=True)

    print("\nModels ranked by F1 score:")
    for i, (model_name, (acc, f1)) in enumerate(sorted_models, 1):
        print(f"{i}. {model_name}: Accuracy = {acc:.4f}, F1 = {f1:.4f}")

    # Plot model comparison (both Accuracy and F1 Score)
    plt.figure(figsize=(16, 10))
    model_names = [name for name, _ in sorted_models]
    accuracies = [acc for _, (acc, _) in sorted_models]
    f1_scores = [f1 for _, (_, f1) in sorted_models]
    
    x = np.arange(len(model_names))
    width = 0.35
    
    fig, ax = plt.subplots(figsize=(16, 8))
    rects1 = ax.bar(x - width/2, accuracies, width, label='Accuracy', color='skyblue')
    rects2 = ax.bar(x + width/2, f1_scores, width, label='F1 Score', color='salmon')
    
    ax.set_xlabel('Models', fontsize=14)
    ax.set_ylabel('Scores', fontsize=14)
    ax.set_title(f'Session {session_id} - Model Performance Comparison', fontsize=16)
    ax.set_xticks(x)
    ax.set_xticklabels(model_names, rotation=45, ha='right', fontsize=12)
    ax.legend(fontsize=12)
    ax.set_ylim(0, 1.1)  # Ensure consistent y-axis scale
    
    # Add value labels on the bars
    for rect in rects1:
        height = rect.get_height()
        ax.annotate(f'{height:.3f}',
                    xy=(rect.get_x() + rect.get_width()/2, height),
                    xytext=(0, 3),  # 3 points vertical offset
                    textcoords="offset points",
                    ha='center', va='bottom', fontsize=10)
    
    for rect in rects2:
        height = rect.get_height()
        ax.annotate(f'{height:.3f}',
                    xy=(rect.get_x() + rect.get_width()/2, height),
                    xytext=(0, 3),  # 3 points vertical offset
                    textcoords="offset points",
                    ha='center', va='bottom', fontsize=10)
    
    # Add a grid for better readability
    ax.grid(True, linestyle='--', alpha=0.7)
    
    fig.tight_layout()
    plt.savefig(f'session_figures/summaries/ssn{session_id}_model_comparison.png')
    plt.close()

    # Save the best model
    best_model_name = sorted_models[0][0]
    best_model = None

    if best_model_name == 'Extra Trees':
        best_model = et_model
    elif best_model_name == 'Gradient Boosting':
        best_model = gb_model
    elif best_model_name == 'Random Forest':
        best_model = rf_model
    elif best_model_name == 'XGBoost':
        best_model = xgb_model
    elif best_model_name == 'Logistic Regression':
        best_model = lr_model
    else:
        best_model = voting_model

    # Save model package
    model_package = {
        'variance_selector': selector,
        'feature_selector': select_k,
        'scaler': scaler,
        'model': best_model,
        'feature_columns': feature_columns,
        'selected_indices': selected_indices,
        'original_indices': original_indices,
        'best_model_name': best_model_name,
        'metrics': {'accuracy': sorted_models[0][1][0], 'f1': sorted_models[0][1][1]},
        'all_models': {
            'ExtraTrees': et_model,
            'GradientBoosting': gb_model,
            'RandomForest': rf_model,
            'XGBoost': xgb_model,
            'LogisticRegression': lr_model,
            'VotingClassifier': voting_model
        },
        'all_metrics': models_summary
    }
    
    model_path = f"models/session_models/ssn{session_id}_{best_model_name.replace(' ', '_').lower()}.pkl"
    joblib.dump(model_package, model_path)
    print(f"\nBest model for Session {session_id} ({best_model_name}) saved as '{model_path}'")
    
    print(f"\nSession {session_id} training completed in {time.time() - start_time:.2f} seconds")
    
    return model_package

def main():
    start_time = time.time()
    
    print("\n=== Loading data ===")
    # Load dataset
    df = pd.read_csv("data/N300_G69_transposed.csv", dtype={'ssn':'Int64', 'type_of_attack': 'Int64', 'gen_attacked': 'Int64'})
    print(f"Dataset loaded with shape: {df.shape}")
    
    # Basic preprocessing
    if 'attack' not in df.columns:
        df['attack'] = (df['type_of_attack'] != 0).astype(int)  # 1 for attack, 0 for no attack
    
    
    # ====================== ADVANCED FEATURE ENGINEERING ======================
    # Apply the advanced feature engineering 
    from advanced_features import engineer_advanced_features
    df = engineer_advanced_features(df)
        
    # Get unique session IDs
    session_ids = df['ssn'].unique()
    print(f"Found {len(session_ids)} unique sessions: {session_ids}")
    
    # Train models for each session
    session_results = {}
    
    for session_id in session_ids:
        model_package = train_session_model(df, session_id)
        session_results[session_id] = model_package['metrics']['f1']
    
    # Create a summary of results across sessions
    print("\n=== Session Models Summary ===")
    print("\nModel performance by session:")
    
    # Table format for terminal display
    headers = ["Session", "Best Model", "Accuracy", "F1 Score"]
    rows = []
    
    # Dictionary to store accuracy and F1 scores for each session
    accuracy_by_session = {}
    f1_by_session = {}
    best_models_by_session = {}
   # Fixed version - only show the best model per session
    for session_id in session_ids:
        best_model_file = None
        # Find the file that corresponds to the best model for this session
        for file in os.listdir("models/session_models/"):
            if file.startswith(f"ssn{session_id}_") and not file.endswith("_noscale.pkl"):
                # If we already found a file for this session, we need to determine which is newer/correct
                if best_model_file is None:
                    best_model_file = file
                
        if best_model_file:
            model_path = os.path.join("models/session_models/", best_model_file)
            model_package = joblib.load(model_path)
            
            accuracy = model_package['metrics']['accuracy']
            f1 = model_package['metrics']['f1']
            best_model = model_package['best_model_name']
            
            accuracy_by_session[session_id] = accuracy
            f1_by_session[session_id] = f1
            best_models_by_session[session_id] = best_model
            
            # Only add one row per session
            rows.append([str(session_id), str(best_model), f"{accuracy:.4f}", f"{f1:.4f}"])
    
    # Print table
    col_width = max(len(word) for row in [headers] + rows for word in row) + 2
    print("+" + "+".join(["-" * col_width for _ in headers]) + "+")
    print("|" + "|".join(word.ljust(col_width) for word in headers) + "|")
    print("+" + "+".join(["-" * col_width for _ in headers]) + "+")
    for row in rows:
        print("|" + "|".join(str(word).ljust(col_width) for word in row) + "|")
    print("+" + "+".join(["-" * col_width for _ in headers]) + "+")
    
    # Identify best and worst performing sessions by both metrics
    best_acc_session = max(accuracy_by_session.items(), key=lambda x: x[1])
    worst_acc_session = min(accuracy_by_session.items(), key=lambda x: x[1])
    best_f1_session = max(f1_by_session.items(), key=lambda x: x[1])
    worst_f1_session = min(f1_by_session.items(), key=lambda x: x[1])
    
    print(f"\nBest performing session (Accuracy): Session {best_acc_session[0]} with {best_acc_session[1]:.4f}")
    print(f"Worst performing session (Accuracy): Session {worst_acc_session[0]} with {worst_acc_session[1]:.4f}")
    print(f"Best performing session (F1 Score): Session {best_f1_session[0]} with {best_f1_session[1]:.4f}")
    print(f"Worst performing session (F1 Score): Session {worst_f1_session[0]} with {worst_f1_session[1]:.4f}")
    
    # Plot summary of scores by session
    fig, ax = plt.subplots(figsize=(12, 8))
    
    sessions = list(sorted(accuracy_by_session.keys()))
    accuracies = [accuracy_by_session[s] for s in sessions]
    f1_scores = [f1_by_session[s] for s in sessions]
    
    x = np.arange(len(sessions))
    width = 0.35
    
    rects1 = ax.bar(x - width/2, accuracies, width, label='Accuracy', color='skyblue')
    rects2 = ax.bar(x + width/2, f1_scores, width, label='F1 Score', color='salmon')
    
    ax.set_xlabel('Session ID', fontsize=14)
    ax.set_ylabel('Score', fontsize=14)
    ax.set_title('Attack Detection Performance by Session', fontsize=16)
    ax.set_xticks(x)
    ax.set_xticklabels([f'Session {s}' for s in sessions], fontsize=12)
    ax.legend(fontsize=12)
    ax.set_ylim(0, 1.1)
    
    # Add grid for better readability
    ax.grid(True, linestyle='--', alpha=0.7)
    
    # Add value labels on top of each bar
    for rect in rects1:
        height = rect.get_height()
        ax.annotate(f'{height:.3f}',
                    xy=(rect.get_x() + rect.get_width()/2, height),
                    xytext=(0, 3),  # 3 points vertical offset
                    textcoords="offset points",
                    ha='center', va='bottom', fontsize=10)
                    
    for rect in rects2:
        height = rect.get_height()
        ax.annotate(f'{height:.3f}',
                    xy=(rect.get_x() + rect.get_width()/2, height),
                    xytext=(0, 3),  # 3 points vertical offset
                    textcoords="offset points",
                    ha='center', va='bottom', fontsize=10)
    
    plt.tight_layout()
    plt.savefig('session_figures/summaries/session_performance_comparison.png')
    plt.close()
    
    # Plot best model by session
    model_counts = {}
    for session_id in session_ids:
        files = os.listdir("models/session_models/")
        for file in files:
            if file.startswith(f"ssn{session_id}_"):
                model_name = file.replace(f"ssn{session_id}_", "").replace(".pkl", "").replace("_", " ").title()
                if model_name not in model_counts:
                    model_counts[model_name] = 0
                model_counts[model_name] += 1
    
    # Create a bar chart of best model types
    plt.figure(figsize=(12, 8))
    models = list(model_counts.keys())
    counts = [model_counts[m] for m in models]
    
    plt.bar(models, counts, color='lightgreen')
    plt.xlabel('Model Type', fontsize=14)
    plt.ylabel('Number of Sessions', fontsize=14)
    plt.title('Best Model Type by Number of Sessions', fontsize=16)
    
    # Add value labels on top of each bar
    for i, v in enumerate(counts):
        plt.text(i, v + 0.1, str(v), ha='center', fontsize=12)
    
    # Add grid for better readability
    plt.grid(True, linestyle='--', alpha=0.3, axis='y')
    
    plt.tight_layout()
    plt.savefig('session_figures/summaries/best_model_distribution.png')
    plt.close()
    
    # Create comparison table for all models across all sessions
    print("\n=== Creating cross-session model comparison ===")
    
    # Dictionary to store F1 scores for each model type across sessions
    model_performance = {
        'Extra Trees': {},
        'Gradient Boosting': {},
        'Random Forest': {},
        'XGBoost': {},
        'Logistic Regression': {},
        'Voting Classifier': {}
    }
    
    # Load model packages to get performance metrics
    for session_id in session_ids:
        for file in os.listdir("models/session_models/"):
            if file.startswith(f"ssn{session_id}_"):
                model_path = os.path.join("models/session_models/", file)
                model_package = joblib.load(model_path)
                
                # Extract all model metrics
                for model_name, (acc, f1) in model_package['all_metrics'].items():
                    model_performance[model_name][session_id] = f1
    
    # Create a heatmap of model performance across sessions
    plt.figure(figsize=(14, 10))
    
    # Initialize data structures for both metrics
    accuracy_heatmap_data = []
    f1_heatmap_data = []
    model_names = list(model_performance.keys())
    
    for model_name in model_names:
        # Collect accuracy scores
        accuracy_scores = []
        f1_scores = []
        for session_id in session_ids:
            # For each session, load the model package and get metrics for all models
            found = False
            for file in os.listdir("models/session_models/"):
                if file.startswith(f"ssn{session_id}_"):
                    model_path = os.path.join("models/session_models/", file)
                    model_package = joblib.load(model_path)
                    
                    # Extract metrics for this model
                    if model_name in model_package['all_metrics']:
                        acc, f1 = model_package['all_metrics'][model_name]
                        accuracy_scores.append(acc)
                        f1_scores.append(f1)
                        found = True
                        break
            
            if not found:
                accuracy_scores.append(0)
                f1_scores.append(0)
                
        accuracy_heatmap_data.append(accuracy_scores)
        f1_heatmap_data.append(f1_scores)
    
    # Create accuracy heatmap
    plt.figure(figsize=(14, 8))
    sns.heatmap(accuracy_heatmap_data, annot=True, fmt=".3f", cmap="YlGnBu", 
                xticklabels=[f"Session {s}" for s in session_ids],
                yticklabels=model_names,
                vmin=0, vmax=1)
    
    plt.title('Accuracy by Model and Session', fontsize=16)
    plt.tight_layout()
    plt.savefig('session_figures/summaries/model_session_accuracy_heatmap.png')
    plt.close()
    
    # Create F1 score heatmap
    plt.figure(figsize=(14, 8))
    sns.heatmap(f1_heatmap_data, annot=True, fmt=".3f", cmap="YlGnBu", 
                xticklabels=[f"Session {s}" for s in session_ids],
                yticklabels=model_names,
                vmin=0, vmax=1)
    
    plt.title('F1 Score by Model and Session', fontsize=16)
    plt.tight_layout()
    plt.savefig('session_figures/summaries/model_session_f1_heatmap.png')
    plt.close()
    
    # Create attack type distribution visualization
    if 'type_of_attack' in df.columns:
        attack_types = df[df['attack'] == 1]['type_of_attack'].value_counts()
        
        # Map attack types to descriptive names
        attack_type_names = {
            0: "Normal",
            1: "Ramp Rate Attack",
            2: "Upper Limit Attack",
            3: "Lower Limit Attack",
            4: "Generation Cost Attack"
        }
        
        # Create a separate dataset for attack type distribution by session
        attack_by_session = {}
        for session_id in session_ids:
            session_attacks = df[(df['ssn'] == session_id) & (df['attack'] == 1)]['type_of_attack'].value_counts()
            attack_by_session[session_id] = {attack_type_names.get(k, f"Type {k}"): v 
                                           for k, v in session_attacks.items()}
        
        # Prepare data for stacked bar chart
        attack_types_data = []
        for attack_type in range(1, 5):  # Attack types 1-4
            type_data = []
            for session_id in session_ids:
                if attack_type_names.get(attack_type) in attack_by_session[session_id]:
                    type_data.append(attack_by_session[session_id][attack_type_names.get(attack_type)])
                else:
                    type_data.append(0)
            attack_types_data.append(type_data)
        
        # Create stacked bar chart
        plt.figure(figsize=(14, 8))
        bottom = np.zeros(len(session_ids))
        
        colors = ['#ff9999', '#66b3ff', '#99ff99', '#ffcc99']
        
        for i, attack_data in enumerate(attack_types_data):
            plt.bar([f"Session {s}" for s in session_ids], attack_data, bottom=bottom, 
                   label=attack_type_names.get(i+1), color=colors[i])
            bottom += attack_data
        
        plt.xlabel('Session', fontsize=14)
        plt.ylabel('Number of Attacks', fontsize=14)
        plt.title('Attack Type Distribution by Session', fontsize=16)
        plt.legend(fontsize=12)
        plt.xticks(rotation=0, fontsize=12)
        
        # Add grid for better readability
        plt.grid(True, linestyle='--', alpha=0.3, axis='y')
        
        plt.tight_layout()
        plt.savefig('session_figures/summaries/attack_type_distribution_by_session.png')
        plt.close()
        
        # Create pie charts for each session's attack distribution
        for session_id in session_ids:
            plt.figure(figsize=(10, 8))
            session_data = df[(df['ssn'] == session_id) & (df['attack'] == 1)]
            
            if len(session_data) > 0:
                attack_counts = session_data['type_of_attack'].value_counts()
                labels = [attack_type_names.get(i, f"Type {i}") for i in attack_counts.index]
                
                plt.pie(attack_counts, labels=labels, autopct='%1.1f%%', 
                       startangle=90, colors=colors, wedgeprops={'edgecolor': 'black'})
                plt.axis('equal')
                plt.title(f'Session {session_id} - Attack Type Distribution', fontsize=16)
                plt.savefig(f'session_figures/summaries/session{session_id}_attack_distribution_pie.png')
                plt.close()
    
    print(f"\nAll session-specific models trained in {time.time() - start_time:.2f} seconds")
    print(f"Results summary saved to 'session_figures/summaries/' directory")
    print(f"Individual session figures saved to:")
    print(f"  - 'session_figures/confusion_matrices/' for confusion matrices")
    print(f"  - 'session_figures/roc_curves/' for ROC curves")
    print(f"  - 'session_figures/feature_importance/' for feature importance plots")
    print(f"All models saved to 'models/session_models/' directory")

if __name__ == "__main__":
    main()

Using 2 CPU cores for parallel processing

=== Loading data ===
