In [None]:
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
import numpy as np

# Set style
sns.set_style("whitegrid")
plt.rcParams['figure.figsize'] = (14, 10)

# Load data
df = pd.read_csv('scheduler_metrics.csv')

# Define metrics and their units
metrics = {
    'Turnaround_Time': 'Turnaround Time (k-ticks)',
    'Waiting_Time': 'Waiting Time (k-ticks)',
    'Response_Time': 'Response Time (k-ticks)',
    'Context_Switches': 'Context Switches',
    'CPU_Share': 'CPU Share (%)'
}

benchmarks = ['short_io', 'long_io', 'short_cpu', 'long_cpu_child']
benchmark_labels = {
    'short_io': 'Short I/O',
    'long_io': 'Long I/O',
    'short_cpu': 'Short CPU',
    'long_cpu_child': 'Long CPU (4 procs avg)'
}

schedulers = ['RR', 'FIFO', 'EEVDF']
colors = {'RR': '#3498db', 'FIFO': '#e74c3c', 'EEVDF': '#2ecc71'}

# Create comparison plots for each metric
fig, axes = plt.subplots(3, 2, figsize=(16, 14))
fig.suptitle('Scheduler Performance Comparison Across Benchmarks', fontsize=16, fontweight='bold')

for idx, (metric_key, metric_label) in enumerate(metrics.items()):
    if idx >= 5:  # We only have 5 metrics
        break
    
    row = idx // 2
    col = idx % 2
    ax = axes[row, col]
    
    # Prepare data for this metric
    plot_data = []
    for benchmark in benchmarks:
        bench_data = df[df['Benchmark'] == benchmark]
        for scheduler in schedulers:
            sched_data = bench_data[bench_data['Scheduler'] == scheduler]
            if not sched_data.empty:
                plot_data.append({
                    'Benchmark': benchmark_labels[benchmark],
                    'Scheduler': scheduler,
                    'Value': sched_data[metric_key].values[0]
                })
    
    plot_df = pd.DataFrame(plot_data)
    
    # Create grouped bar chart
    x = np.arange(len(benchmarks))
    width = 0.25
    
    for i, scheduler in enumerate(schedulers):
        sched_df = plot_df[plot_df['Scheduler'] == scheduler]
        values = [sched_df[sched_df['Benchmark'] == benchmark_labels[b]]['Value'].values[0] 
                  if not sched_df[sched_df['Benchmark'] == benchmark_labels[b]].empty else 0
                  for b in benchmarks]
        
        ax.bar(x + i * width, values, width, label=scheduler, color=colors[scheduler], alpha=0.8)
    
    ax.set_xlabel('Benchmark Type', fontsize=11, fontweight='bold')
    ax.set_ylabel(metric_label, fontsize=11, fontweight='bold')
    ax.set_title(metric_label, fontsize=12, fontweight='bold')
    ax.set_xticks(x + width)
    ax.set_xticklabels([benchmark_labels[b] for b in benchmarks], rotation=15, ha='right')
    ax.legend()
    ax.grid(axis='y', alpha=0.3)

# Remove empty subplot
if len(metrics) < 6:
    fig.delaxes(axes[2, 1])

plt.tight_layout()
plt.savefig('scheduler_comparison_all_metrics.png', dpi=300, bbox_inches='tight')
plt.show()

# Create individual detailed plots for key metrics
# 1. Response Time Comparison (Most Important for Interactivity)
fig, ax = plt.subplots(figsize=(12, 6))
response_data = df.pivot(index='Benchmark', columns='Scheduler', values='Response_Time')
response_data = response_data.reindex(benchmarks)
response_data.index = [benchmark_labels[b] for b in benchmarks]

x = np.arange(len(response_data.index))
width = 0.25

for i, scheduler in enumerate(schedulers):
    ax.bar(x + i * width, response_data[scheduler], width, 
           label=scheduler, color=colors[scheduler], alpha=0.8)

ax.set_xlabel('Benchmark Type', fontsize=12, fontweight='bold')
ax.set_ylabel('Response Time (k-ticks)', fontsize=12, fontweight='bold')
ax.set_title('Response Time Comparison: Lower is Better for Interactivity', 
             fontsize=14, fontweight='bold')
ax.set_xticks(x + width)
ax.set_xticklabels(response_data.index)
ax.legend(fontsize=11)
ax.grid(axis='y', alpha=0.3)

# Add value labels on bars
for i, scheduler in enumerate(schedulers):
    for j, v in enumerate(response_data[scheduler]):
        if v > 0:
            ax.text(j + i * width, v, f'{v:.1f}', 
                   ha='center', va='bottom', fontsize=9)

plt.tight_layout()
plt.savefig('response_time_comparison.png', dpi=300, bbox_inches='tight')
plt.show()

# 2. Context Switches Comparison (Overhead Analysis)
fig, ax = plt.subplots(figsize=(12, 6))
ctx_data = df.pivot(index='Benchmark', columns='Scheduler', values='Context_Switches')
ctx_data = ctx_data.reindex(benchmarks)
ctx_data.index = [benchmark_labels[b] for b in benchmarks]

x = np.arange(len(ctx_data.index))
for i, scheduler in enumerate(schedulers):
    ax.bar(x + i * width, ctx_data[scheduler], width, 
           label=scheduler, color=colors[scheduler], alpha=0.8)

ax.set_xlabel('Benchmark Type', fontsize=12, fontweight='bold')
ax.set_ylabel('Context Switches', fontsize=12, fontweight='bold')
ax.set_title('Context Switches: Lower Indicates Less Overhead', 
             fontsize=14, fontweight='bold')
ax.set_xticks(x + width)
ax.set_xticklabels(ctx_data.index)
ax.legend(fontsize=11)
ax.grid(axis='y', alpha=0.3)
ax.set_yscale('log')  # Log scale for better visualization

plt.tight_layout()
plt.savefig('context_switches_comparison.png', dpi=300, bbox_inches='tight')
plt.show()

# 3. CPU Share Comparison (Fairness)
fig, ax = plt.subplots(figsize=(12, 6))
cpu_data = df.pivot(index='Benchmark', columns='Scheduler', values='CPU_Share')
cpu_data = cpu_data.reindex(benchmarks)
cpu_data.index = [benchmark_labels[b] for b in benchmarks]

x = np.arange(len(cpu_data.index))
for i, scheduler in enumerate(schedulers):
    ax.bar(x + i * width, cpu_data[scheduler], width, 
           label=scheduler, color=colors[scheduler], alpha=0.8)

ax.set_xlabel('Benchmark Type', fontsize=12, fontweight='bold')
ax.set_ylabel('CPU Share (%)', fontsize=12, fontweight='bold')
ax.set_title('CPU Utilization: Higher is Better', 
             fontsize=14, fontweight='bold')
ax.set_xticks(x + width)
ax.set_xticklabels(cpu_data.index)
ax.legend(fontsize=11)
ax.grid(axis='y', alpha=0.3)
ax.set_ylim([0, 105])

# Add horizontal line at 100%
ax.axhline(y=100, color='red', linestyle='--', alpha=0.5, label='Max (100%)')

plt.tight_layout()
plt.savefig('cpu_share_comparison.png', dpi=300, bbox_inches='tight')
plt.show()

# 4. Summary Statistics Table
print("\n" + "="*80)
print("SUMMARY STATISTICS")
print("="*80)

for benchmark in benchmarks:
    print(f"\n{benchmark_labels[benchmark]}:")
    print("-" * 80)
    bench_df = df[df['Benchmark'] == benchmark][['Scheduler', 'Response_Time', 
                                                   'Context_Switches', 'CPU_Share']]
    print(bench_df.to_string(index=False))
    
    # Find best scheduler for each metric
    best_response = bench_df.loc[bench_df['Response_Time'].idxmin(), 'Scheduler']
    best_ctx = bench_df.loc[bench_df['Context_Switches'].idxmin(), 'Scheduler']
    best_cpu = bench_df.loc[bench_df['CPU_Share'].idxmax(), 'Scheduler']
    
    print(f"\n  Best Response Time: {best_response}")
    print(f"  Fewest Context Switches: {best_ctx}")
    print(f"  Best CPU Share: {best_cpu}")

# 5. Fairness Analysis for Long CPU (detailed breakdown)
print("\n" + "="*80)
print("LONG CPU FAIRNESS ANALYSIS (4 Concurrent Processes)")
print("="*80)

# Calculate coefficient of variation (std/mean) for turnaround time
# Lower CV = more fair
fairness_data = {
    'RR': {
        'mean_turnaround': 24717.75,
        'std_turnaround': np.std([23239.67, 24835.67, 25331, 26465.67]),
        'cv': None
    },
    'FIFO': {
        'mean_turnaround': 18694.33,
        'std_turnaround': np.std([18500.33, 18579.67, 18701, 37102]),  # Note: huge variance!
        'cv': None
    },
    'EEVDF': {
        'mean_turnaround': 25733.08,
        'std_turnaround': np.std([25373.33, 25564.33, 25899.67, 26095]),
        'cv': None
    }
}

for scheduler in schedulers:
    cv = (fairness_data[scheduler]['std_turnaround'] / 
          fairness_data[scheduler]['mean_turnaround']) * 100
    fairness_data[scheduler]['cv'] = cv
    
    print(f"\n{scheduler}:")
    print(f"  Mean Turnaround: {fairness_data[scheduler]['mean_turnaround']:.2f} k-ticks")
    print(f"  Std Dev: {fairness_data[scheduler]['std_turnaround']:.2f} k-ticks")
    print(f"  Coefficient of Variation: {cv:.2f}% (lower = more fair)")

print("\n" + "="*80)
print(f"Most Fair: {min(fairness_data, key=lambda x: fairness_data[x]['cv'])}")
print("="*80)