# Model Comparison: Phase-wise Test Results

Compare test results across different model configurations.


In [0]:
import os
import glob
import json
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns

# Set style
plt.style.use('seaborn-v0_8-whitegrid')
sns.set_palette('husl')


In [0]:
os.listdir('/Volumes/main_dev/dld_ml_anticheat_test/anticheat_test_volume/pgc_wwcd/pgc_results/inference_v2_1sec_viz/')

In [0]:
# Configuration
CHECKPOINT_DIR = "/Volumes/main_dev/dld_ml_anticheat_test/anticheat_test_volume/pgc_wwcd/pgc_results/checkpoints"
NUM_PHASES = 10


In [0]:
lgbm = pd.read_csv('/Volumes/main_dev/dld_ml_anticheat_test/anticheat_test_volume/pgc_wwcd/pgc_results/lgbm/leaves31_lr0.1/test_results.csv').loc[0]

## 1. Load All Results


In [0]:
def load_all_results(checkpoint_dir: str) -> pd.DataFrame:
    """
    Load test_results.csv from all experiment folders.  
    
    Returns:
        DataFrame with columns: model_name, metric, Phase_1, ..., Phase_10, Average
    """
    all_results = []
    
    # Find all experiment directories
    exp_dirs = sorted(glob.glob(os.path.join(checkpoint_dir, "*")))
    exp_dirs = [x for x in exp_dirs if 'toy' not in x]
    
    for exp_dir in exp_dirs:
        if not os.path.isdir(exp_dir):
            continue
            
        results_path = os.path.join(exp_dir, "test_results.csv")
        config_path = os.path.join(exp_dir, "config.json")
        
        if not os.path.exists(results_path):
            continue
            
        # Load results
        df = pd.read_csv(results_path)
        
        # Get model name from directory
        model_name = os.path.basename(exp_dir)
        
        # Try to load config for more details
        config = {}
        if os.path.exists(config_path):
            with open(config_path, 'r') as f:
                config = json.load(f)
        
        # Add model info
        df['model_name'] = model_name
        df['embed_dim'] = config.get('embed_dim', 'N/A')
        df['num_heads'] = config.get('num_heads', 'N/A')
        df['num_layers'] = config.get('num_layers', 'N/A')
        df['loss_type'] = config.get('loss_type', 'mse')
        df['lr'] = config.get('lr', 'N/A')
        
        all_results.append(df)
    
    if not all_results:
        print("No results found!")
        return pd.DataFrame()
    
    return pd.concat(all_results, ignore_index=True)

# Load results
results_df = load_all_results(CHECKPOINT_DIR)
print(f"Loaded {len(results_df)} rows from {results_df['model_name'].nunique()} models")
results_df.shape


## 2. Accuracy Summary Table


In [0]:
# Filter accuracy rows
accuracy_df = results_df[results_df['metric'] == 'accuracy'].copy()

# Phase columns
phase_cols = [f'Phase_{i}' for i in range(1, NUM_PHASES + 1)]
display_cols = ['model_name', 'embed_dim', 'num_heads', 'num_layers', 'loss_type'] + phase_cols + ['Average']

# Sort by Average accuracy
accuracy_df = accuracy_df.sort_values('Average', ascending=False)
accuracy_df = accuracy_df[accuracy_df['loss_type'] == 'mse']
accuracy_df[phase_cols] = accuracy_df[phase_cols]*100
accuracy_df['Average'] = accuracy_df['Average']*100
mse = accuracy_df.iloc[0]
print("Accuracy Results (sorted by Average):")
accuracy_df[display_cols].round(2)

In [0]:
# Filter accuracy rows
accuracy_df = results_df[results_df['metric'] == 'accuracy'].copy()

# Phase columns
phase_cols = [f'Phase_{i}' for i in range(1, NUM_PHASES + 1)]
display_cols = ['model_name', 'embed_dim', 'num_heads', 'num_layers', 'loss_type'] + phase_cols + ['Average']

# Sort by Average accuracy
accuracy_df = accuracy_df.sort_values('Average', ascending=False)
accuracy_df = accuracy_df[accuracy_df['loss_type'] == 'cox']
accuracy_df[phase_cols] = accuracy_df[phase_cols]*100
accuracy_df['Average'] = accuracy_df['Average']*100
cox = accuracy_df.iloc[0]
print("Accuracy Results (sorted by Average):")
accuracy_df[display_cols].round(2)

In [0]:
# Filter accuracy rows
accuracy_df = results_df[results_df['metric'] == 'accuracy'].copy()

# Phase columns
phase_cols = [f'Phase_{i}' for i in range(1, NUM_PHASES + 1)]
display_cols = ['model_name', 'embed_dim', 'num_heads', 'num_layers', 'loss_type'] + phase_cols + ['Average']

# Sort by Average accuracy
accuracy_df = accuracy_df.sort_values('Average', ascending=False)
accuracy_df = accuracy_df[accuracy_df['loss_type'] == 'rank_cox']
accuracy_df[phase_cols] = accuracy_df[phase_cols]*100
accuracy_df['Average'] = accuracy_df['Average']*100
rank_cox = accuracy_df.iloc[0]
print("Accuracy Results (sorted by Average):")
accuracy_df[display_cols].round(2)

In [0]:
# Filter accuracy rows
accuracy_df = results_df[results_df['metric'] == 'accuracy'].copy()

# Phase columns
phase_cols = [f'Phase_{i}' for i in range(1, NUM_PHASES + 1)]
display_cols = ['model_name', 'embed_dim', 'num_heads', 'num_layers', 'loss_type'] + phase_cols + ['Average']

# Sort by Average accuracy
accuracy_df = accuracy_df.sort_values('Average', ascending=False)
accuracy_df = accuracy_df[(accuracy_df['loss_type'] == 'weighted_cox') & (accuracy_df['model_name'].str.contains('v11'))]
accuracy_df[phase_cols] = accuracy_df[phase_cols]*100
accuracy_df['Average'] = accuracy_df['Average']*100
weighted_cox = accuracy_df.iloc[0]
print("Accuracy Results (sorted by Average):")
accuracy_df[display_cols].round(2)

In [0]:
accuracy_df.iloc[0]['model_name']

In [0]:
# Filter accuracy rows
accuracy_df = results_df[results_df['metric'] == 'accuracy'].copy()

# Phase columns
phase_cols = [f'Phase_{i}' for i in range(1, NUM_PHASES + 1)]
display_cols = ['model_name', 'embed_dim', 'num_heads', 'num_layers', 'loss_type'] + phase_cols + ['Average']

# Sort by Average accuracy
accuracy_df = accuracy_df.sort_values('Average', ascending=False)
accuracy_df = accuracy_df[accuracy_df['loss_type'] == 'concordance']
accuracy_df[phase_cols] = accuracy_df[phase_cols]*100
accuracy_df['Average'] = accuracy_df['Average']*100
concordance = accuracy_df.iloc[0]
print("Accuracy Results (sorted by Average):")
accuracy_df[display_cols].round(2)

In [0]:
# Filter accuracy rows
accuracy_df = results_df[results_df['metric'] == 'accuracy'].copy()

# Phase columns
phase_cols = [f'Phase_{i}' for i in range(1, NUM_PHASES + 1)]
display_cols = ['model_name', 'embed_dim', 'num_heads', 'num_layers', 'loss_type'] + phase_cols + ['Average']

# Sort by Average accuracy
accuracy_df = accuracy_df.sort_values('Average', ascending=False)
accuracy_df = accuracy_df[accuracy_df['loss_type'] == 'survival_ce']
accuracy_df[phase_cols] = accuracy_df[phase_cols]*100
accuracy_df['Average'] = accuracy_df['Average']*100
survival_ce = accuracy_df.iloc[0]
print("Accuracy Results (sorted by Average):")
accuracy_df[display_cols].round(2)

In [0]:
results = pd.concat([mse,cox,rank_cox,weighted_cox, survival_ce],axis=1).T
results = results.iloc[:,1:12]
tmp = pd.DataFrame(lgbm.values[1:]).T
tmp = tmp*100
tmp.columns = results.columns
results = pd.concat([results,tmp],axis=0)
results.index = ['Ours-mse','Ours-fullall', 'Ours-rank_cox','Ours-weighted_cox','Ours-CE', 'LGBM']
results = results.astype('float')
results = results.sort_values('Average', ascending=False)
results.round(2)

In [0]:
import matplotlib.pyplot as plt
import numpy as np

# Phase labels
phases = ["Phase_1", "Phase_2", "Phase_3", "Phase_4", "Phase_5",
          "Phase_6", "Phase_7", "Phase_8", "Phase_9", "Phase_10"]

# Data
ours_mse = [10.29, 15.22, 22.18, 28.96, 38.99, 53.01, 68.09, 80.54, 94.19, 100.00]
ours_rank_cox = [10.20, 16.18, 21.38, 28.96, 37.42, 51.96, 67.20, 81.03, 94.23, 100.00]
ours_weighted_cox = [8.46, 14.21, 18.10, 25.86, 36.74, 49.45, 67.01, 80.61, 94.00, 99.77]
lgbm = [10.29, 15.78, 21.92, 25.21, 31.32, 44.03, 59.58, 74.72, 91.35, 99.70]

plt.figure(figsize=(10, 5))

plt.plot(phases, ours_mse, marker="o", label="Ours-mse")
plt.plot(phases, ours_rank_cox, marker="o", label="Ours-rank_cox")
plt.plot(phases, ours_weighted_cox, marker="o", label="Ours-weighted_cox")
plt.plot(phases, lgbm, marker="o", label="LGBM")

plt.xticks(rotation=45)
plt.xlabel("Phase")
plt.ylabel("Score")
plt.title("Model Performance by Phase")
plt.legend()
plt.grid(True)

plt.tight_layout()
plt.show()

In [0]:
results.index = ['Ours-mse','Ours-rank_cox','Ours-weighted_cox','LGBM']
results = results.astype('float')
results.round(2)

In [0]:
lgbm.values[1:]

## 3. Phase-wise Accuracy Trends (Line Plot)


In [0]:
def plot_phase_trends(df: pd.DataFrame, metric: str = 'accuracy', top_n: int = 5):
    """Plot phase trends as line plot."""
    metric_df = df[df['metric'] == metric].copy()
    metric_df = metric_df.sort_values('Average', ascending=False).head(top_n)
    
    phase_cols = [f'Phase_{i}' for i in range(1, NUM_PHASES + 1)]
    
    fig, ax = plt.subplots(figsize=(12, 6))
    
    for _, row in metric_df.iterrows():
        values = [row[col] for col in phase_cols]
        label = row['model_name'][:40] if len(row['model_name']) > 40 else row['model_name']
        ax.plot(range(1, NUM_PHASES + 1), values, marker='o', linewidth=2, markersize=6, label=label)
    
    ax.set_xlabel('Phase', fontsize=12)
    ax.set_ylabel(metric.upper(), fontsize=12)
    ax.set_title(f'Phase-wise {metric.upper()} Trends (Top {top_n} Models)', fontsize=14)
    ax.set_xticks(range(1, NUM_PHASES + 1))
    ax.legend(bbox_to_anchor=(1.02, 1), loc='upper left', fontsize=9)
    ax.grid(True, alpha=0.3)
    
    plt.tight_layout()
    plt.show()

# Plot accuracy trends
if len(results_df) > 0:
    plot_phase_trends(results_df, metric='accuracy', top_n=5)


## 4. Heatmap: Phase-wise Accuracy


In [0]:
def plot_accuracy_heatmap(df: pd.DataFrame, top_n: int = 10):
    """Plot heatmap of phase-wise accuracy."""
    accuracy_df = df[df['metric'] == 'accuracy'].copy()
    accuracy_df = accuracy_df.sort_values('Average', ascending=False).head(top_n)
    
    phase_cols = [f'Phase_{i}' for i in range(1, NUM_PHASES + 1)] + ['Average']
    
    # Create matrix
    heatmap_data = accuracy_df.set_index('model_name')[phase_cols]
    
    # Shorten model names for display
    heatmap_data.index = [name[:35] + '...' if len(name) > 35 else name for name in heatmap_data.index]
    
    fig, ax = plt.subplots(figsize=(14, max(6, len(heatmap_data) * 0.5)))
    
    sns.heatmap(
        heatmap_data,
        annot=True,
        fmt='.3f',
        cmap='RdYlGn',
        vmin=0,
        vmax=1,
        ax=ax,
        cbar_kws={'label': 'Accuracy'}
    )
    
    ax.set_title('Phase-wise Accuracy Heatmap', fontsize=14)
    ax.set_xlabel('Phase', fontsize=12)
    ax.set_ylabel('Model', fontsize=12)
    
    plt.tight_layout()
    plt.show()

# Plot heatmap
if len(results_df) > 0:
    plot_accuracy_heatmap(results_df, top_n=10)


## 5. All Metrics Summary


In [0]:
def create_summary_table(df: pd.DataFrame) -> pd.DataFrame:
    """Create summary table with all metrics for each model."""
    metrics = ['accuracy', 'log_loss', 'ece']
    summary_data = []
    
    for model_name in df['model_name'].unique():
        model_df = df[df['model_name'] == model_name]
        
        row = {'model_name': model_name}
        
        # Get config info from first row
        first_row = model_df.iloc[0]
        row['embed_dim'] = first_row.get('embed_dim', 'N/A')
        row['num_heads'] = first_row.get('num_heads', 'N/A')
        row['num_layers'] = first_row.get('num_layers', 'N/A')
        row['loss_type'] = first_row.get('loss_type', 'mse')
        
        for metric in metrics:
            metric_row = model_df[model_df['metric'] == metric]
            if len(metric_row) > 0:
                row[f'{metric}_avg'] = metric_row['Average'].values[0]
            else:
                row[f'{metric}_avg'] = np.nan
        
        summary_data.append(row)
    
    summary_df = pd.DataFrame(summary_data)
    summary_df = summary_df.sort_values('accuracy_avg', ascending=False)
    
    return summary_df

# Create and display summary
summary_df = create_summary_table(results_df)
print("Model Summary (sorted by Average Accuracy):")
summary_df


## 6. Best Model


In [0]:
# Find best model
if len(summary_df) > 0:
    best_model = summary_df.iloc[0]
    print("Best Model:")
    print("=" * 50)
    print(f"Name: {best_model['model_name']}")
    print(f"Embed Dim: {best_model['embed_dim']}")
    print(f"Num Heads: {best_model['num_heads']}")
    print(f"Num Layers: {best_model['num_layers']}")
    print(f"Loss Type: {best_model['loss_type']}")
    print("-" * 50)
    print(f"Average Accuracy: {best_model['accuracy_avg']:.4f}")
    print(f"Average Log Loss: {best_model['log_loss_avg']:.4f}")
    print(f"Average ECE: {best_model['ece_avg']:.4f}")
    print("=" * 50)


## 7. Export Summary


In [0]:
# Save summary to CSV
output_path = os.path.join(CHECKPOINT_DIR, "model_comparison_summary.csv")
summary_df.to_csv(output_path, index=False)
print(f"Summary saved to: {output_path}")
