# So Sánh Kết Quả Benchmark

Notebook này so sánh kết quả giữa:
- **Thuật toán của dự án** (Tabu Search): `benchmark_results*.csv`
- **Baseline (PINAP)**: `pinap_*.csv`

In [None]:
import pandas as pd
import matplotlib.pyplot as plt
import numpy as np
import re

## 1. Load Data

In [None]:
# Load 10 instances data
benchmark_10 = pd.read_csv('10_instances/benchmark_results.csv')
pinap_10 = pd.read_csv('10_instances/pinap_10.csv', encoding='utf-8-sig')

# Load 15/20 instances data
benchmark_15_20 = pd.read_csv('15_20_instances/benchmark_results_15_20.csv')
pinap_15_20 = pd.read_csv('15_20_instances/pinap_15_20.csv', encoding='utf-8-sig')

print("=== 10 Instances ===")
print(f"Benchmark: {len(benchmark_10)} rows")
print(f"PINAP: {len(pinap_10)} rows")

print("\n=== 15/20 Instances ===")
print(f"Benchmark: {len(benchmark_15_20)} rows")
print(f"PINAP: {len(pinap_15_20)} rows")

## 2. Xử Lý và Merge Data - 10 Instances

In [None]:
def parse_instance_name(name):
    """Parse instance name like U_10_0.5_Num_1.txt to components"""
    match = re.match(r'U_(\d+)_([\d.]+)_Num_(\d+)\.txt', name)
    if match:
        return {
            'Customers': int(match.group(1)),
            'Beta': float(match.group(2)),
            'Instance_Num': int(match.group(3))
        }
    return None

# Parse benchmark instance names
benchmark_10_parsed = benchmark_10.copy()
benchmark_10_parsed = benchmark_10_parsed.rename(columns={'Instance': 'Instance_Name'})
parsed_info = benchmark_10_parsed['Instance_Name'].apply(parse_instance_name).apply(pd.Series)
benchmark_10_parsed = pd.concat([benchmark_10_parsed, parsed_info], axis=1)

# Filter PINAP for Center depot only (matching our benchmark)
pinap_10_center = pinap_10[pinap_10['Depot location'] == 'Center'].copy()
pinap_10_center = pinap_10_center.rename(columns={'Instance': 'Instance_Num'})

# Merge on Customers, Beta, Instance_Num
merged_10 = pd.merge(
    benchmark_10_parsed,
    pinap_10_center[['Customers', 'Instance_Num', 'Beta', 'Objective value', 'TB (s)']],
    on=['Customers', 'Beta', 'Instance_Num'],
    suffixes=('_ours', '_pinap')
)

# Rename columns for clarity
merged_10 = merged_10.rename(columns={
    'Best Makespan': 'Our_Makespan',
    'Objective value': 'PINAP_Makespan',
    'Time (s)': 'Our_Time',
    'TB (s)': 'PINAP_Time'
})

# Calculate gap with PINAP
merged_10['Gap_vs_PINAP (%)'] = ((merged_10['Our_Makespan'] - merged_10['PINAP_Makespan']) / merged_10['PINAP_Makespan'] * 100).round(2)

print("=== Merged 10 Instances (Center Depot) ===")
display_cols = ['Instance_Name', 'Beta', 'Init Makespan', 'Our_Makespan', 'PINAP_Makespan', 'Gap_vs_PINAP (%)', 'Our_Time', 'PINAP_Time']
merged_10[display_cols]

## 3. Xử Lý và Merge Data - 15/20 Instances

In [None]:
# Parse benchmark instance names
benchmark_15_20_parsed = benchmark_15_20.copy()
benchmark_15_20_parsed = benchmark_15_20_parsed.rename(columns={'Instance': 'Instance_Name'})
parsed_info = benchmark_15_20_parsed['Instance_Name'].apply(parse_instance_name).apply(pd.Series)
benchmark_15_20_parsed = pd.concat([benchmark_15_20_parsed, parsed_info], axis=1)

# Filter PINAP for Center depot only
pinap_15_20_center = pinap_15_20[pinap_15_20['Depot location'] == 'Center'].copy()
pinap_15_20_center = pinap_15_20_center.rename(columns={'Instance': 'Instance_Num'})

# Merge
merged_15_20 = pd.merge(
    benchmark_15_20_parsed,
    pinap_15_20_center[['Customers', 'Instance_Num', 'Beta', 'Objective value', 'TB (s)']],
    on=['Customers', 'Beta', 'Instance_Num'],
    suffixes=('_ours', '_pinap')
)

merged_15_20 = merged_15_20.rename(columns={
    'Best Makespan': 'Our_Makespan',
    'Objective value': 'PINAP_Makespan',
    'Time (s)': 'Our_Time',
    'TB (s)': 'PINAP_Time'
})

merged_15_20['Gap_vs_PINAP (%)'] = ((merged_15_20['Our_Makespan'] - merged_15_20['PINAP_Makespan']) / merged_15_20['PINAP_Makespan'] * 100).round(2)

print("=== Merged 15/20 Instances (Center Depot) ===")
display_cols = ['Instance_Name', 'Customers', 'Beta', 'Init Makespan', 'Our_Makespan', 'PINAP_Makespan', 'Gap_vs_PINAP (%)', 'Our_Time', 'PINAP_Time']
merged_15_20[display_cols]

## 4. Thống Kê Tổng Hợp

In [None]:
def compute_stats(df, name):
    """Compute summary statistics for a merged dataframe"""
    stats = {
        'Dataset': name,
        'Num Instances': len(df),
        'Avg Gap vs PINAP (%)': df['Gap_vs_PINAP (%)'].mean().round(2),
        'Min Gap (%)': df['Gap_vs_PINAP (%)'].min(),
        'Max Gap (%)': df['Gap_vs_PINAP (%)'].max(),
        'Better than PINAP': (df['Gap_vs_PINAP (%)'] < 0).sum(),
        'Equal to PINAP': (df['Gap_vs_PINAP (%)'] == 0).sum(),
        'Worse than PINAP': (df['Gap_vs_PINAP (%)'] > 0).sum(),
        'Avg Our Time (s)': df['Our_Time'].mean().round(2),
        'Avg PINAP Time (s)': df['PINAP_Time'].mean().round(2),
    }
    return stats

stats_10 = compute_stats(merged_10, '10 Customers')
stats_15 = compute_stats(merged_15_20[merged_15_20['Customers'] == 15], '15 Customers')
stats_20 = compute_stats(merged_15_20[merged_15_20['Customers'] == 20], '20 Customers')

summary_df = pd.DataFrame([stats_10, stats_15, stats_20])
print("=== Summary Statistics ===")
summary_df

## 5. Visualization

In [None]:
# Combine all data
all_merged = pd.concat([merged_10, merged_15_20], ignore_index=True)

fig, axes = plt.subplots(1, 3, figsize=(15, 5))

for idx, n_customers in enumerate([10, 15, 20]):
    ax = axes[idx]
    data = all_merged[all_merged['Customers'] == n_customers].sort_values('Beta')
    
    x = range(len(data))
    width = 0.35
    
    bars1 = ax.bar([i - width/2 for i in x], data['Our_Makespan'], width, label='Our Method', color='steelblue')
    bars2 = ax.bar([i + width/2 for i in x], data['PINAP_Makespan'], width, label='PINAP', color='coral')
    
    ax.set_xlabel('Instance')
    ax.set_ylabel('Makespan (min)')
    ax.set_title(f'{n_customers} Customers')
    ax.set_xticks(x)
    ax.set_xticklabels([f"β={b}\n#{i}" for b, i in zip(data['Beta'], data['Instance_Num'])], rotation=45, fontsize=8)
    ax.legend()

plt.suptitle('So Sánh Makespan: Our Method vs PINAP', fontsize=14, fontweight='bold')
plt.tight_layout()
plt.show()

In [None]:
# Gap distribution by customer size
fig, axes = plt.subplots(1, 3, figsize=(15, 5))

for idx, n_customers in enumerate([10, 15, 20]):
    ax = axes[idx]
    data = all_merged[all_merged['Customers'] == n_customers]
    
    colors = ['green' if g < 0 else 'orange' if g <= 10 else 'red' for g in data['Gap_vs_PINAP (%)']]
    bars = ax.bar(range(len(data)), data['Gap_vs_PINAP (%)'], color=colors)
    
    ax.axhline(y=0, color='black', linestyle='-', linewidth=0.5)
    ax.set_xlabel('Instance Index')
    ax.set_ylabel('Gap vs PINAP (%)')
    ax.set_title(f'{n_customers} Customers\nAvg Gap: {data["Gap_vs_PINAP (%)"].mean():.2f}%')

plt.suptitle('Gap (%) so với PINAP (Xanh: Tốt hơn, Cam: ≤10%, Đỏ: >10%)', fontsize=12, fontweight='bold')
plt.tight_layout()
plt.show()

In [None]:
# Time comparison
fig, ax = plt.subplots(figsize=(10, 6))

categories = ['10 Customers', '15 Customers', '20 Customers']
our_times = [
    merged_10['Our_Time'].mean(),
    merged_15_20[merged_15_20['Customers'] == 15]['Our_Time'].mean(),
    merged_15_20[merged_15_20['Customers'] == 20]['Our_Time'].mean()
]
pinap_times = [
    merged_10['PINAP_Time'].mean(),
    merged_15_20[merged_15_20['Customers'] == 15]['PINAP_Time'].mean(),
    merged_15_20[merged_15_20['Customers'] == 20]['PINAP_Time'].mean()
]

x = np.arange(len(categories))
width = 0.35

bars1 = ax.bar(x - width/2, our_times, width, label='Our Method', color='steelblue')
bars2 = ax.bar(x + width/2, pinap_times, width, label='PINAP', color='coral')

ax.set_ylabel('Average Time (seconds)')
ax.set_title('So Sánh Thời Gian Chạy Trung Bình')
ax.set_xticks(x)
ax.set_xticklabels(categories)
ax.legend()

# Add value labels
for bar in bars1:
    height = bar.get_height()
    ax.annotate(f'{height:.1f}s', xy=(bar.get_x() + bar.get_width() / 2, height),
                xytext=(0, 3), textcoords="offset points", ha='center', va='bottom', fontsize=9)

for bar in bars2:
    height = bar.get_height()
    ax.annotate(f'{height:.1f}s', xy=(bar.get_x() + bar.get_width() / 2, height),
                xytext=(0, 3), textcoords="offset points", ha='center', va='bottom', fontsize=9)

plt.tight_layout()
plt.show()

In [None]:
# Gap by Beta value
fig, ax = plt.subplots(figsize=(10, 6))

gap_by_beta = all_merged.groupby('Beta')['Gap_vs_PINAP (%)'].agg(['mean', 'std', 'min', 'max']).reset_index()

x = range(len(gap_by_beta))
ax.bar(x, gap_by_beta['mean'], yerr=gap_by_beta['std'], capsize=5, color='steelblue', alpha=0.7)
ax.axhline(y=0, color='red', linestyle='--', linewidth=1)

ax.set_xlabel('Beta (Tỷ lệ C2/C1)')
ax.set_ylabel('Gap vs PINAP (%)')
ax.set_title('Gap Trung Bình theo Beta')
ax.set_xticks(x)
ax.set_xticklabels([f'β={b}' for b in gap_by_beta['Beta']])

for i, row in gap_by_beta.iterrows():
    ax.annotate(f'{row["mean"]:.1f}%', xy=(i, row['mean']), 
                xytext=(0, 5), textcoords="offset points", ha='center', fontsize=10)

plt.tight_layout()
plt.show()