# scikit-learn Trees Benchmark Results Visualization

This notebook visualizes the benchmark results from the orchestration runs.

In [None]:
import json
from pathlib import Path
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns

# Set style
sns.set_style("whitegrid")
plt.rcParams['figure.figsize'] = (12, 6)

## Load Results

Load the summary JSON file from the orchestration run.

In [None]:
# Path to results directory
results_dir = Path("results")

# Find the latest summary file
summary_files = sorted(results_dir.glob("*_summary.json"))
if not summary_files:
    raise FileNotFoundError("No summary files found in results directory")

latest_summary = summary_files[-1]
print(f"Loading results from: {latest_summary}")

# Load data
with open(latest_summary, "r") as f:
    results = json.load(f)

# Convert to DataFrame
df = pd.DataFrame(results)
print(f"\nLoaded {len(df)} benchmark results")
df.head()

## Data Overview

In [None]:
# Display basic statistics
print("Models tested:")
print(df['model'].value_counts())
print("\nBranches tested:")
print(df['branch'].value_counts())
print("\nDataset sizes:")
print(df[['n_samples', 'n_features']].value_counts().sort_index())

## Training Time Analysis

In [None]:
# Plot training times by model
fig, axes = plt.subplots(1, 2, figsize=(14, 5))

# Training time by model
sns.boxplot(data=df, x='model', y='train_time_mean', ax=axes[0])
axes[0].set_title('Training Time by Model')
axes[0].set_ylabel('Training Time (seconds)')
axes[0].set_xlabel('Model')
axes[0].tick_params(axis='x', rotation=45)

# Prediction time by model
sns.boxplot(data=df, x='model', y='predict_time_mean', ax=axes[1])
axes[1].set_title('Prediction Time by Model')
axes[1].set_ylabel('Prediction Time (seconds)')
axes[1].set_xlabel('Model')
axes[1].tick_params(axis='x', rotation=45)

plt.tight_layout()
plt.show()

## Performance by Dataset Size

In [None]:
# Create a combined size column for better visualization
df['dataset_size'] = df['n_samples'].astype(str) + 'x' + df['n_features'].astype(str)

# Plot training time by dataset size
fig, axes = plt.subplots(1, 2, figsize=(14, 5))

for model in df['model'].unique():
    model_data = df[df['model'] == model]
    axes[0].plot(
        model_data['n_samples'],
        model_data['train_time_mean'],
        'o-',
        label=model,
        alpha=0.7
    )

axes[0].set_xlabel('Number of Samples')
axes[0].set_ylabel('Training Time (seconds)')
axes[0].set_title('Training Time vs Dataset Size')
axes[0].legend(bbox_to_anchor=(1.05, 1), loc='upper left')
axes[0].set_xscale('log')
axes[0].set_yscale('log')
axes[0].grid(True, alpha=0.3)

# Plot prediction time by dataset size
for model in df['model'].unique():
    model_data = df[df['model'] == model]
    axes[1].plot(
        model_data['n_samples'],
        model_data['predict_time_mean'],
        'o-',
        label=model,
        alpha=0.7
    )

axes[1].set_xlabel('Number of Samples')
axes[1].set_ylabel('Prediction Time (seconds)')
axes[1].set_title('Prediction Time vs Dataset Size')
axes[1].legend(bbox_to_anchor=(1.05, 1), loc='upper left')
axes[1].set_xscale('log')
axes[1].set_yscale('log')
axes[1].grid(True, alpha=0.3)

plt.tight_layout()
plt.show()

## Branch Comparison

Compare performance across different scikit-learn branches.

In [None]:
if df['branch'].nunique() > 1:
    fig, axes = plt.subplots(1, 2, figsize=(14, 5))
    
    # Training time by branch
    sns.boxplot(data=df, x='branch', y='train_time_mean', hue='model', ax=axes[0])
    axes[0].set_title('Training Time by Branch')
    axes[0].set_ylabel('Training Time (seconds)')
    axes[0].set_xlabel('Branch')
    axes[0].legend(bbox_to_anchor=(1.05, 1), loc='upper left')
    
    # Prediction time by branch
    sns.boxplot(data=df, x='branch', y='predict_time_mean', hue='model', ax=axes[1])
    axes[1].set_title('Prediction Time by Branch')
    axes[1].set_ylabel('Prediction Time (seconds)')
    axes[1].set_xlabel('Branch')
    axes[1].legend(bbox_to_anchor=(1.05, 1), loc='upper left')
    
    plt.tight_layout()
    plt.show()
else:
    print("Only one branch tested, skipping branch comparison")

## Parameter Impact Analysis

Analyze the impact of different model parameters on performance.

In [None]:
# Extract parameters into separate columns
params_df = pd.json_normalize(df['model_params'])
df_with_params = pd.concat([df, params_df], axis=1)

# Show available parameters
print("Available parameters:")
print(params_df.columns.tolist())

# Example: Plot impact of max_depth if available
if 'max_depth' in params_df.columns:
    fig, ax = plt.subplots(figsize=(10, 6))
    
    for model in df_with_params['model'].unique():
        model_data = df_with_params[
            (df_with_params['model'] == model) & 
            (df_with_params['max_depth'].notna())
        ]
        if len(model_data) > 0:
            ax.plot(
                model_data['max_depth'],
                model_data['train_time_mean'],
                'o-',
                label=model,
                alpha=0.7
            )
    
    ax.set_xlabel('Max Depth')
    ax.set_ylabel('Training Time (seconds)')
    ax.set_title('Impact of max_depth on Training Time')
    ax.legend()
    ax.grid(True, alpha=0.3)
    plt.show()

## Summary Statistics

In [None]:
# Group by model and compute statistics
summary = df.groupby('model').agg({
    'train_time_mean': ['mean', 'std', 'min', 'max'],
    'predict_time_mean': ['mean', 'std', 'min', 'max']
}).round(4)

print("Summary Statistics by Model:")
print(summary)

## Export Results

Export processed results to CSV for further analysis.

In [None]:
# Export to CSV
csv_file = results_dir / f"{latest_summary.stem}_processed.csv"
df.to_csv(csv_file, index=False)
print(f"Results exported to: {csv_file}")