In [1]:
"""
HIGGS BOSON DETECTION - KAGGLE DATASET (FIXED VERSION)
======================================================

Works with the actual Kaggle dataset structure:
- training.csv (separate file)
- test.csv (separate file)  
- 33 columns (not 29)

Dataset: https://www.kaggle.com/datasets/knight079/higgsb
"""

import numpy as np
import pandas as pd
import plotly.graph_objects as go
import plotly.express as px
from plotly.subplots import make_subplots
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
from sklearn.linear_model import LogisticRegression
from sklearn.ensemble import RandomForestClassifier
from sklearn.metrics import roc_curve, auc, confusion_matrix, classification_report
from sklearn.decomposition import PCA
import warnings
warnings.filterwarnings('ignore')

try:
    import xgboost as xgb
    XGBOOST_AVAILABLE = True
except ImportError:
    print("⚠️  XGBoost not available. Install with: pip install xgboost")
    XGBOOST_AVAILABLE = False

print("🔬 HIGGS BOSON DETECTION PROJECT")
print("=" * 60)
print("Using actual Kaggle dataset structure")
print("=" * 60 + "\n")


🔬 HIGGS BOSON DETECTION PROJECT
Using actual Kaggle dataset structure



In [2]:
# ============================================================================
# PART 1: LOAD KAGGLE DATASET (FLEXIBLE VERSION)
# ============================================================================

def load_kaggle_higgs_data(filepath, n_samples=50000):
    """
    Load Higgs dataset from Kaggle - handles any column structure.
    
    Args:
        filepath: Path to training.csv or test.csv
        n_samples: Number of samples to load
    """
    print(f"📊 Loading Higgs dataset from: {filepath}")
    print(f"   Loading {n_samples} collision events...")
    
    try:
        # Load data - first check how many columns
        df_peek = pd.read_csv(filepath, nrows=1)
        n_cols = len(df_peek.columns)
        print(f"   Detected {n_cols} columns in the file")
        
        # Load full data
        df = pd.read_csv(filepath, nrows=n_samples)
        print(f"✅ Successfully loaded {len(df)} events!\n")
        
    except FileNotFoundError:
        print(f"\n❌ ERROR: Could not find '{filepath}'")
        print("\n📥 Please download the dataset from:")
        print("   https://www.kaggle.com/datasets/knight079/higgsb")
        print("\n   You should have: training.csv and/or test.csv")
        raise
    
    # Detect if columns need renaming
    first_col = df.columns[0]
    
    if isinstance(first_col, int) or first_col == '0' or 'Unnamed' in str(first_col):
        print("   Adding feature names...")
        
        # The Kaggle dataset has these columns:
        # Column 0: Label (1 = signal, 0 = background)
        # Columns 1-21: Low-level features (original 21 physics features)
        # Columns 22-28: High-level features (derived 7 features)
        # Columns 29-32: Additional features (EventId, Weight, etc.)
        
        if n_cols == 33:
            # Full Kaggle format
            feature_names = [
                'Label',  # 0
                # Low-level features (21 features)
                'lepton_pT', 'lepton_eta', 'lepton_phi',  # 1-3
                'missing_energy_magnitude', 'missing_energy_phi',  # 4-5
                'jet1_pt', 'jet1_eta', 'jet1_phi', 'jet1_btag',  # 6-9
                'jet2_pt', 'jet2_eta', 'jet2_phi', 'jet2_btag',  # 10-13
                'jet3_pt', 'jet3_eta', 'jet3_phi', 'jet3_btag',  # 14-17
                'jet4_pt', 'jet4_eta', 'jet4_phi', 'jet4_btag',  # 18-21
                # High-level features (7 features)
                'm_jj', 'm_jjj', 'm_lv', 'm_jlv', 'm_bb', 'm_wbb', 'm_wwbb',  # 22-28
                # Additional columns
                'EventId', 'Weight', 'KaggleSet', 'KaggleWeight'  # 29-32
            ]
        elif n_cols == 32:
            # Slightly different format
            feature_names = [
                'Label',
                'lepton_pT', 'lepton_eta', 'lepton_phi',
                'missing_energy_magnitude', 'missing_energy_phi',
                'jet1_pt', 'jet1_eta', 'jet1_phi', 'jet1_btag',
                'jet2_pt', 'jet2_eta', 'jet2_phi', 'jet2_btag',
                'jet3_pt', 'jet3_eta', 'jet3_phi', 'jet3_btag',
                'jet4_pt', 'jet4_eta', 'jet4_phi', 'jet4_btag',
                'm_jj', 'm_jjj', 'm_lv', 'm_jlv', 'm_bb', 'm_wbb', 'm_wwbb',
                'EventId', 'Weight', 'KaggleSet'
            ]
        else:
            # Generic naming for any other format
            feature_names = ['Label'] + [f'feature_{i}' for i in range(1, n_cols)]
        
        df.columns = feature_names[:n_cols]  # Only use as many as needed
    
    # Ensure 'Label' column exists (case insensitive)
    if 'label' not in df.columns.str.lower().tolist():
        print("⚠️  Warning: Could not find label column. Using first column.")
        df = df.rename(columns={df.columns[0]: 'Label'})
    else:
        # Standardize to 'Label'
        label_col = [col for col in df.columns if col.lower() == 'label'][0]
        df = df.rename(columns={label_col: 'Label'})
    
    return df

# Load data
print("=" * 60)
print("STEP 1: DATA LOADING")
print("=" * 60)

# USER: Update this path to your training.csv location
TRAINING_FILE = r"C:\Users\HARISH K\Downloads\archive\training.csv"

df = load_kaggle_higgs_data(TRAINING_FILE, n_samples=50000)

# Display dataset info
print(f"\n📊 Dataset Overview:")
print(f"   Total events: {len(df)}")
print(f"   Total columns: {len(df.columns)}")
print(f"   Features: {len(df.columns) - 1}")

# Check label distribution
label_counts = df['Label'].value_counts()
print(f"\n   ⚛️  Signal (Higgs): {label_counts.get(1, label_counts.get('s', 0))} events")
print(f"   🌌 Background: {label_counts.get(0, label_counts.get('b', 0))} events")

# Show column names
print(f"\n📋 Columns in dataset:")
for i, col in enumerate(df.columns[:10]):
    print(f"   {i}: {col}")
if len(df.columns) > 10:
    print(f"   ... and {len(df.columns) - 10} more columns")

STEP 1: DATA LOADING
📊 Loading Higgs dataset from: C:\Users\HARISH K\Downloads\archive\training.csv
   Loading 50000 collision events...
   Detected 33 columns in the file
✅ Successfully loaded 50000 events!


📊 Dataset Overview:
   Total events: 50000
   Total columns: 33
   Features: 32

   ⚛️  Signal (Higgs): 17065 events
   🌌 Background: 32935 events

📋 Columns in dataset:
   0: EventId
   1: DER_mass_MMC
   2: DER_mass_transverse_met_lep
   3: DER_mass_vis
   4: DER_pt_h
   5: DER_deltaeta_jet_jet
   6: DER_mass_jet_jet
   7: DER_prodeta_jet_jet
   8: DER_deltar_tau_lep
   9: DER_pt_tot
   ... and 23 more columns


In [3]:
# ============================================================================
# PART 2: PREPARE DATA FOR ML
# ============================================================================

print("\n" + "=" * 60)
print("STEP 2: DATA PREPARATION")
print("=" * 60)

# Standardize label to binary (0/1)
if df['Label'].dtype == 'object':
    # If labels are 's' and 'b', convert them
    label_map = {'s': 1, 'b': 0}
    df['Label'] = df['Label'].map(label_map)
    print("   Converted 's'/'b' labels to 1/0")

# Keep only numeric physics features
# Drop EventId, Weight, KaggleSet, KaggleWeight if they exist
cols_to_drop = ['EventId', 'Weight', 'KaggleSet', 'KaggleWeight', 'Unnamed: 0']
cols_to_drop = [col for col in cols_to_drop if col in df.columns]

if cols_to_drop:
    print(f"   Dropping non-physics columns: {cols_to_drop}")
    df = df.drop(columns=cols_to_drop)

# Separate features and labels
X = df.drop('Label', axis=1)
y = df['Label']

print(f"\n✅ Data prepared:")
print(f"   Features: {X.shape[1]} physics measurements")
print(f"   Samples: {X.shape[0]} collision events")
print(f"   Signal class: {y.sum()} ({y.sum()/len(y)*100:.1f}%)")
print(f"   Background class: {(1-y).sum()} ({(1-y).sum()/len(y)*100:.1f}%)")


STEP 2: DATA PREPARATION
   Converted 's'/'b' labels to 1/0
   Dropping non-physics columns: ['EventId', 'Weight']

✅ Data prepared:
   Features: 30 physics measurements
   Samples: 50000 collision events
   Signal class: 17065 (34.1%)
   Background class: 32935 (65.9%)


In [4]:
# ============================================================================
# PART 3: 3D COLLISION VISUALIZATION
# ============================================================================

print("\n" + "=" * 60)
print("STEP 3: 3D COLLISION VISUALIZATION")
print("=" * 60)

def create_collision_3d(event_data, event_type, event_id):
    """Visualize collision event in 3D"""
    
    fig = go.Figure()
    
    # Helper function
    def cylindrical_to_cartesian(pt, eta, phi):
        x = pt * np.cos(phi)
        y = pt * np.sin(phi)
        z = pt * np.sinh(eta)
        return x, y, z
    
    # Get feature values (with error handling for different column names)
    def get_feature(event, possible_names):
        for name in possible_names:
            if name in event.index:
                return event[name]
        return 0  # Default if not found
    
    # 1. LEPTON
    pT = get_feature(event_data, ['lepton_pT', 'feature_1'])
    eta = get_feature(event_data, ['lepton_eta', 'feature_2'])
    phi = get_feature(event_data, ['lepton_phi', 'feature_3'])
    
    if pT > 0:
        x, y, z = cylindrical_to_cartesian(pT, eta, phi)
        fig.add_trace(go.Scatter3d(
            x=[0, x], y=[0, y], z=[0, z],
            mode='lines+markers',
            line=dict(color='yellow', width=6),
            marker=dict(size=[12, 18], color='yellow'),
            name='Lepton',
            hovertemplate=f'<b>Lepton</b><br>pT: {pT:.1f} GeV<extra></extra>'
        ))
    
    # 2. JETS
    jet_color = 'red' if event_type == 'higgs' else 'blue'
    
    for jet_num in range(1, 5):
        pT = get_feature(event_data, [f'jet{jet_num}_pt', f'feature_{5 + (jet_num-1)*4}'])
        eta = get_feature(event_data, [f'jet{jet_num}_eta', f'feature_{6 + (jet_num-1)*4}'])
        phi = get_feature(event_data, [f'jet{jet_num}_phi', f'feature_{7 + (jet_num-1)*4}'])
        
        if pT > 10:
            x, y, z = cylindrical_to_cartesian(pT, eta, phi)
            fig.add_trace(go.Scatter3d(
                x=[0, x], y=[0, y], z=[0, z],
                mode='lines+markers',
                line=dict(color=jet_color, width=4),
                marker=dict(size=[10, 12 + pT/15], color=jet_color),
                name=f'Jet {jet_num}',
                hovertemplate=f'<b>Jet {jet_num}</b><br>pT: {pT:.1f} GeV<extra></extra>'
            ))
    
    # 3. MISSING ENERGY
    missing_E = get_feature(event_data, ['missing_energy_magnitude', 'feature_4'])
    missing_phi = get_feature(event_data, ['missing_energy_phi', 'feature_5'])
    
    if missing_E > 0:
        x_miss = missing_E * np.cos(missing_phi)
        y_miss = missing_E * np.sin(missing_phi)
        fig.add_trace(go.Scatter3d(
            x=[0, x_miss], y=[0, y_miss], z=[0, 0],
            mode='lines+markers',
            line=dict(color='rgba(255,255,255,0.7)', width=6, dash='dash'),
            marker=dict(size=[12, 22], color='rgba(255,255,255,0.4)', symbol='diamond'),
            name='Missing Energy',
            hovertemplate=f'<b>Missing E</b><br>{missing_E:.1f} GeV<extra></extra>'
        ))
    
    # 4. COLLISION POINT
    fig.add_trace(go.Scatter3d(
        x=[0], y=[0], z=[0],
        mode='markers',
        marker=dict(size=20, color='white', symbol='diamond',
                   line=dict(color='gold', width=3)),
        name='Collision',
        hovertemplate='<b>Collision Point</b><br>E = mc²<extra></extra>'
    ))
    
    # Layout
    title_color = 'red' if event_type == 'higgs' else 'cyan'
    title = f"🔬 {'HIGGS BOSON' if event_type == 'higgs' else 'BACKGROUND'} Event #{event_id}"
    
    fig.update_layout(
        title=dict(text=title, font=dict(size=18, color=title_color)),
        scene=dict(
            xaxis=dict(title='X (GeV/c)', backgroundcolor='rgb(20,20,20)', 
                      gridcolor='gray', range=[-200, 200]),
            yaxis=dict(title='Y (GeV/c)', backgroundcolor='rgb(20,20,20)',
                      gridcolor='gray', range=[-200, 200]),
            zaxis=dict(title='Z (GeV/c)', backgroundcolor='rgb(20,20,20)',
                      gridcolor='gray', range=[-200, 200]),
            bgcolor='black'
        ),
        paper_bgcolor='black',
        font=dict(color='white'),
        height=650,
        showlegend=True
    )
    
    return fig

# Create sample events
print("\n🎬 Creating 3D collision visualizations...")

higgs_samples = df[df['Label'] == 1].sample(n=min(3, (df['Label']==1).sum()), random_state=42)
background_samples = df[df['Label'] == 0].sample(n=min(3, (df['Label']==0).sum()), random_state=42)

print("\n🔴 Higgs Events:")
for i, (idx, event) in enumerate(higgs_samples.iterrows(), 1):
    fig = create_collision_3d(event, 'higgs', i)
    filename = f'collision_higgs_{i}.html'
    fig.write_html(filename)
    print(f"   ✅ Saved: {filename}")
    if i == 1:
        fig.show()

print("\n🔵 Background Events:")
for i, (idx, event) in enumerate(background_samples.iterrows(), 1):
    fig = create_collision_3d(event, 'background', i)
    filename = f'collision_background_{i}.html'
    fig.write_html(filename)
    print(f"   ✅ Saved: {filename}")
    if i == 1:
        fig.show()



STEP 3: 3D COLLISION VISUALIZATION

🎬 Creating 3D collision visualizations...

🔴 Higgs Events:
   ✅ Saved: collision_higgs_1.html


   ✅ Saved: collision_higgs_2.html
   ✅ Saved: collision_higgs_3.html

🔵 Background Events:
   ✅ Saved: collision_background_1.html


   ✅ Saved: collision_background_2.html
   ✅ Saved: collision_background_3.html


In [5]:
# ============================================================================
# PART 4: FEATURE DISTRIBUTIONS
# ============================================================================

print("\n" + "=" * 60)
print("STEP 4: FEATURE ANALYSIS")
print("=" * 60)

print("\n📊 Creating feature distributions...")

# Select first 6 features for visualization
features_to_plot = []
for col in X.columns[:6]:
    features_to_plot.append((col, col.replace('_', ' ').title()))

fig = make_subplots(
    rows=2, cols=3,
    subplot_titles=[name for _, name in features_to_plot]
)

for idx, (feature, name) in enumerate(features_to_plot):
    row = idx // 3 + 1
    col = idx % 3 + 1
    
    fig.add_trace(
        go.Histogram(
            x=df[df['Label'] == 1][feature],
            name='Higgs',
            marker_color='rgba(255,0,0,0.6)',
            nbinsx=50,
            showlegend=(idx == 0)
        ),
        row=row, col=col
    )
    
    fig.add_trace(
        go.Histogram(
            x=df[df['Label'] == 0][feature],
            name='Background',
            marker_color='rgba(0,100,255,0.6)',
            nbinsx=50,
            showlegend=(idx == 0)
        ),
        row=row, col=col
    )

fig.update_layout(
    title='🔍 Higgs vs Background: Feature Signatures',
    height=800,
    barmode='overlay',
    template='plotly_dark'
)

fig.write_html('feature_distributions.html')
print("✅ Saved: feature_distributions.html")
fig.show()


STEP 4: FEATURE ANALYSIS

📊 Creating feature distributions...
✅ Saved: feature_distributions.html


In [6]:
# ============================================================================
# PART 5: 3D FEATURE SPACE
# ============================================================================

print("\n🌌 Creating 3D feature space...")

pca = PCA(n_components=3)
X_pca = pca.fit_transform(X)

df_pca = pd.DataFrame(X_pca, columns=['PC1', 'PC2', 'PC3'])
df_pca['Type'] = y.map({1: 'Higgs', 0: 'Background'})

fig_3d = px.scatter_3d(
    df_pca, x='PC1', y='PC2', z='PC3',
    color='Type',
    color_discrete_map={'Higgs': 'red', 'Background': 'blue'},
    title=f'🌌 3D Feature Space ({pca.explained_variance_ratio_.sum()*100:.1f}% variance)',
    opacity=0.5
)

fig_3d.update_traces(marker=dict(size=2.5))
fig_3d.update_layout(height=700, template='plotly_dark')

fig_3d.write_html('feature_space_3d.html')
print("✅ Saved: feature_space_3d.html")
fig_3d.show()


🌌 Creating 3D feature space...
✅ Saved: feature_space_3d.html


In [7]:
# ============================================================================
# PART 6: MACHINE LEARNING
# ============================================================================

print("\n" + "=" * 60)
print("STEP 5: MACHINE LEARNING")
print("=" * 60)

X_train, X_test, y_train, y_test = train_test_split(
    X, y, test_size=0.2, random_state=42, stratify=y
)

print(f"\n   Training: {len(X_train)} events")
print(f"   Test: {len(X_test)} events")

# Scale for logistic regression
scaler = StandardScaler()
X_train_scaled = scaler.fit_transform(X_train)
X_test_scaled = scaler.transform(X_test)

predictions = {}

# Logistic Regression
print("\n1️⃣  Logistic Regression...")
lr = LogisticRegression(max_iter=1000, random_state=42)
lr.fit(X_train_scaled, y_train)
predictions['Logistic Regression'] = (
    lr.predict(X_test_scaled),
    lr.predict_proba(X_test_scaled)[:, 1]
)
print("   ✅ Done!")

# Random Forest
print("\n2️⃣  Random Forest...")
rf = RandomForestClassifier(n_estimators=100, max_depth=15, random_state=42, n_jobs=-1)
rf.fit(X_train, y_train)
predictions['Random Forest'] = (
    rf.predict(X_test),
    rf.predict_proba(X_test)[:, 1]
)
print("   ✅ Done!")

# XGBoost
if XGBOOST_AVAILABLE:
    print("\n3️⃣  XGBoost...")
    xgb_model = xgb.XGBClassifier(
        n_estimators=100, max_depth=7, learning_rate=0.1,
        random_state=42, eval_metric='logloss', verbosity=0
    )
    xgb_model.fit(X_train, y_train)
    predictions['XGBoost'] = (
        xgb_model.predict(X_test),
        xgb_model.predict_proba(X_test)[:, 1]
    )
    print("   ✅ Done!")


STEP 5: MACHINE LEARNING

   Training: 40000 events
   Test: 10000 events

1️⃣  Logistic Regression...
   ✅ Done!

2️⃣  Random Forest...
   ✅ Done!

3️⃣  XGBoost...
   ✅ Done!


In [8]:












# ============================================================================
# PART 7: RESULTS
# ============================================================================

print("\n" + "=" * 60)
print("STEP 6: RESULTS")
print("=" * 60)

print("\n📊 Performance Metrics:\n")

for model_name, (y_pred, y_prob) in predictions.items():
    print(f"\n{'='*60}")
    print(f"{model_name}")
    print(f"{'='*60}")
    print(classification_report(y_test, y_pred, target_names=['Background', 'Higgs']))
    
    fpr, tpr, _ = roc_curve(y_test, y_prob)
    roc_auc = auc(fpr, tpr)
    print(f"ROC-AUC: {roc_auc:.4f}")

# ROC Curves
print("\n📈 Creating ROC curves...")
fig_roc = go.Figure()

colors = {'Logistic Regression': 'cyan', 'Random Forest': 'lime', 'XGBoost': 'magenta'}

for model_name, (_, y_prob) in predictions.items():
    fpr, tpr, _ = roc_curve(y_test, y_prob)
    roc_auc = auc(fpr, tpr)
    
    fig_roc.add_trace(go.Scatter(
        x=fpr, y=tpr,
        name=f'{model_name} (AUC={roc_auc:.3f})',
        line=dict(color=colors.get(model_name, 'white'), width=3)
    ))

fig_roc.add_trace(go.Scatter(
    x=[0, 1], y=[0, 1],
    name='Random',
    line=dict(color='gray', dash='dash')
))

fig_roc.update_layout(
    title='🎯 ROC Curves',
    xaxis_title='False Positive Rate',
    yaxis_title='True Positive Rate',
    template='plotly_dark',
    height=600
)

fig_roc.write_html('roc_curves.html')
print("✅ Saved: roc_curves.html")
fig_roc.show()

# Confusion Matrices
print("\n📊 Creating confusion matrices...")
fig_cm = make_subplots(
    rows=1, cols=len(predictions),
    subplot_titles=list(predictions.keys()),
    specs=[[{'type': 'heatmap'}] * len(predictions)]
)

for idx, (model_name, (y_pred, _)) in enumerate(predictions.items(), 1):
    cm = confusion_matrix(y_test, y_pred)
    z_text = [[str(cm[i][j]) for j in range(2)] for i in range(2)]
    
    fig_cm.add_trace(
        go.Heatmap(
            z=cm,
            x=['Background', 'Higgs'],
            y=['Background', 'Higgs'],
            colorscale='Blues',
            text=z_text,
            texttemplate='%{text}',
            textfont=dict(size=16),
            showscale=(idx == len(predictions))
        ),
        row=1, col=idx
    )

fig_cm.update_layout(
    title='📊 Confusion Matrices',
    height=400,
    template='plotly_dark'
)

fig_cm.write_html('confusion_matrices.html')
print("✅ Saved: confusion_matrices.html")
fig_cm.show()

# Feature Importance
print("\n🔑 Feature importance...")
importance_df = pd.DataFrame({
    'Feature': X.columns,
    'Importance': rf.feature_importances_
}).sort_values('Importance', ascending=True)

fig_imp = go.Figure(go.Bar(
    x=importance_df['Importance'],
    y=importance_df['Feature'],
    orientation='h',
    marker=dict(color=importance_df['Importance'], colorscale='Viridis')
))

fig_imp.update_layout(
    title='🔑 Feature Importance',
    xaxis_title='Importance',
    height=800,
    template='plotly_dark'
)

fig_imp.write_html('feature_importance.html')
print("✅ Saved: feature_importance.html")
fig_imp.show()

print("\n💡 Top 5 features:")
for _, row in importance_df.tail(5).iterrows():
    print(f"   {row['Feature']}: {row['Importance']:.4f}")




STEP 6: RESULTS

📊 Performance Metrics:


Logistic Regression
              precision    recall  f1-score   support

  Background       0.77      0.86      0.82      6587
       Higgs       0.66      0.51      0.58      3413

    accuracy                           0.74     10000
   macro avg       0.72      0.69      0.70     10000
weighted avg       0.73      0.74      0.73     10000

ROC-AUC: 0.8133

Random Forest
              precision    recall  f1-score   support

  Background       0.85      0.90      0.88      6587
       Higgs       0.78      0.69      0.74      3413

    accuracy                           0.83     10000
   macro avg       0.82      0.80      0.81     10000
weighted avg       0.83      0.83      0.83     10000

ROC-AUC: 0.9011

XGBoost
              precision    recall  f1-score   support

  Background       0.86      0.90      0.88      6587
       Higgs       0.79      0.72      0.75      3413

    accuracy                           0.84     10000
   macro 


📊 Creating confusion matrices...
✅ Saved: confusion_matrices.html



🔑 Feature importance...
✅ Saved: feature_importance.html



💡 Top 5 features:
   DER_met_phi_centrality: 0.0451
   PRI_tau_pt: 0.0623
   DER_mass_vis: 0.0809
   DER_mass_transverse_met_lep: 0.1218
   DER_mass_MMC: 0.1987


In [9]:
# ============================================================================
# FINAL SUMMARY
# ============================================================================

print("\n" + "=" * 60)
print("🎉 PROJECT COMPLETE!")
print("=" * 60)

print(f"\n📁 Generated {6 + 5} HTML files:")
print("   • 6 collision visualizations (3D, rotate with mouse!)")
print("   • feature_distributions.html")
print("   • feature_space_3d.html")
print("   • roc_curves.html")
print("   • confusion_matrices.html")
print("   • feature_importance.html")

print("\n🎯 Performance Summary:")
print(f"\n{'Model':<25} {'ROC-AUC':<10} {'Accuracy':<10}")
print("-" * 45)
for model_name, (y_pred, y_prob) in predictions.items():
    fpr, tpr, _ = roc_curve(y_test, y_prob)
    roc_auc = auc(fpr, tpr)
    accuracy = (y_pred == y_test).mean()
    print(f"{model_name:<25} {roc_auc:.4f}    {accuracy*100:.2f}%")

print("\n✨ All visualizations ready for your report!")
print("=" * 60)


🎉 PROJECT COMPLETE!

📁 Generated 11 HTML files:
   • 6 collision visualizations (3D, rotate with mouse!)
   • feature_distributions.html
   • feature_space_3d.html
   • roc_curves.html
   • confusion_matrices.html
   • feature_importance.html

🎯 Performance Summary:

Model                     ROC-AUC    Accuracy  
---------------------------------------------
Logistic Regression       0.8133    74.30%
Random Forest             0.9011    83.04%
XGBoost                   0.9050    83.83%

✨ All visualizations ready for your report!
