## Step 1: Setup

In [None]:
import subprocess
import os
import glob
from datetime import datetime
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns

# Configuration
BENCHMARK_SOURCE = "benchmark_comprehensive.cpp"
BENCHMARK_EXECUTABLE = "./benchmark_comprehensive"
TIMEOUT_SECONDS = 300  # 5 minutes max per algorithm

print("‚úì Libraries loaded")

## Step 2: Compile Benchmark (Run Once)

In [None]:
# Compile the benchmark executable
if not os.path.exists(BENCHMARK_EXECUTABLE) or \
   os.path.getmtime(BENCHMARK_SOURCE) > os.path.getmtime(BENCHMARK_EXECUTABLE):
    print("Compiling benchmark...")
    result = subprocess.run(
        ["g++", "-o", "benchmark_comprehensive", BENCHMARK_SOURCE,
         "-O3", "-std=c++17", "-march=native", "-fopenmp"],
        capture_output=True, text=True
    )
    if result.returncode == 0:
        print("‚úì Compilation successful!")
    else:
        print(f"‚úó Compilation failed:\n{result.stderr}")
else:
    print("‚úì Benchmark already compiled")

## Step 3: Select Dataset

**Run this cell to see available datasets, then set your choice below.**

In [None]:
# Discover all available datasets
def get_graph_info(filepath):
    """Extract basic graph statistics from DIMACS file"""
    vertices, edges = 0, 0
    try:
        with open(filepath, 'r') as f:
            for line in f:
                if line.startswith('p '):
                    parts = line.split()
                    vertices = int(parts[2])
                    edges = int(parts[3])
                    break
    except:
        pass
    density = (2 * edges) / (vertices * (vertices - 1)) * 100 if vertices > 1 else 0
    return vertices, edges, density

# Find all datasets
dataset_folders = ["datasets/real_world", "datasets/synthetic", "datasets/benchmark"]
all_datasets = []

for folder in dataset_folders:
    if os.path.exists(folder):
        for f in glob.glob(f"{folder}/*.txt") + glob.glob(f"{folder}/*.clq"):
            v, e, d = get_graph_info(f)
            category = folder.split('/')[-1]
            all_datasets.append({
                'path': f,
                'name': os.path.basename(f),
                'category': category,
                'vertices': v,
                'edges': e,
                'density': d
            })

# Sort by vertices (smallest first for faster demos)
all_datasets.sort(key=lambda x: x['vertices'])

print(f"{'#':<3} {'Dataset':<35} {'Category':<12} {'V':>8} {'E':>10} {'Density':>8}")
print("="*80)
for i, ds in enumerate(all_datasets):
    print(f"{i:<3} {ds['name']:<35} {ds['category']:<12} {ds['vertices']:>8} {ds['edges']:>10} {ds['density']:>7.2f}%")

print(f"\nüìä Total: {len(all_datasets)} datasets available")
print("\nüí° Tip: Smaller graphs (low V) run faster. For presentations, try datasets with V < 500")

## Step 4: Choose Your Dataset

**Change the number below to select a dataset from the list above.**

In [None]:
# ‚ïî‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïó
# ‚ïë  üëá CHANGE THIS NUMBER TO SELECT YOUR DATASET                              ‚ïë
# ‚ïö‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïù

SELECTED_DATASET = 0  # <-- Enter the dataset number from the list above

# ‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê

if SELECTED_DATASET < 0 or SELECTED_DATASET >= len(all_datasets):
    print(f"‚ùå Invalid selection! Please choose a number between 0 and {len(all_datasets)-1}")
else:
    selected = all_datasets[SELECTED_DATASET]
    print("\n" + "="*60)
    print("üìÅ SELECTED DATASET")
    print("="*60)
    print(f"  Name:     {selected['name']}")
    print(f"  Category: {selected['category']}")
    print(f"  Path:     {selected['path']}")
    print(f"  Vertices: {selected['vertices']:,}")
    print(f"  Edges:    {selected['edges']:,}")
    print(f"  Density:  {selected['density']:.2f}%")
    print("="*60)
    
    # Estimate runtime
    if selected['vertices'] < 200:
        est = "< 30 seconds"
    elif selected['vertices'] < 500:
        est = "30 seconds - 2 minutes"
    elif selected['vertices'] < 1000:
        est = "2-5 minutes"
    else:
        est = "5+ minutes (some algorithms may timeout)"
    print(f"\n‚è±Ô∏è  Estimated Runtime: {est}")
    print("\n‚úì Ready to run! Execute the next cell.")

## Step 5: Run All 11 Algorithms

**This cell runs all algorithms on the selected dataset.**

In [None]:
# Run benchmark on selected dataset
dataset_path = all_datasets[SELECTED_DATASET]['path']
dataset_name = all_datasets[SELECTED_DATASET]['name']

print(f"\n{'='*70}")
print(f"üöÄ RUNNING BENCHMARK: {dataset_name}")
print(f"{'='*70}\n")

start_time = datetime.now()

try:
    result = subprocess.run(
        [BENCHMARK_EXECUTABLE, dataset_path],
        capture_output=True, text=True,
        timeout=TIMEOUT_SECONDS
    )
    
    # Print output
    print(result.stdout)
    if result.stderr:
        print(f"Warnings: {result.stderr}")
        
except subprocess.TimeoutExpired:
    print(f"‚è∞ Benchmark timed out after {TIMEOUT_SECONDS} seconds")
except Exception as e:
    print(f"‚ùå Error: {e}")

end_time = datetime.now()
total_time = (end_time - start_time).total_seconds()

print(f"\n{'='*70}")
print(f"‚úì Total execution time: {total_time:.2f} seconds")
print(f"{'='*70}")

## Step 6: Parse and Visualize Results

In [None]:
# Parse results from stdout
import re

results = []
lines = result.stdout.split('\n') if result.returncode == 0 else []

# Parse algorithm results
for line in lines:
    # Match pattern like: [1/11] Greedy Heuristic...    ‚úì Size:   8, Time:   0.000023 s
    match = re.search(r'\[(\d+)/\d+\]\s+(.+?)\.{3}\s+[‚úì‚úó]\s+Size:\s*(\d+),\s*Time:\s*([\d.]+)', line)
    if match:
        results.append({
            'Algorithm': match.group(2).strip(),
            'Clique Size': int(match.group(3)),
            'Time (s)': float(match.group(4))
        })
    # Also check for SKIP or TIMEOUT
    skip_match = re.search(r'\[(\d+)/\d+\]\s+(.+?)\.{3}\s+SKIP', line)
    if skip_match:
        results.append({
            'Algorithm': skip_match.group(2).strip(),
            'Clique Size': 0,
            'Time (s)': float('nan')
        })

if results:
    df = pd.DataFrame(results)
    print("\nüìä RESULTS SUMMARY")
    print("="*60)
    print(df.to_string(index=False))
    
    # Find best results
    max_clique = df['Clique Size'].max()
    best_algos = df[df['Clique Size'] == max_clique]['Algorithm'].tolist()
    fastest = df[df['Clique Size'] == max_clique].nsmallest(1, 'Time (s)')
    
    print(f"\nüèÜ Maximum Clique Size: {max_clique}")
    print(f"   Found by: {', '.join(best_algos)}")
    if not fastest.empty:
        print(f"   Fastest optimal: {fastest.iloc[0]['Algorithm']} ({fastest.iloc[0]['Time (s)']:.6f}s)")
else:
    print("No results parsed. Check the benchmark output above.")

In [None]:
# Visualization
if results and len(df) > 0:
    fig, axes = plt.subplots(1, 2, figsize=(14, 5))
    
    # Filter out skipped algorithms
    df_valid = df.dropna()
    
    # Color mapping: heuristics vs exact
    heuristics = ['Greedy', 'Randomized', 'Simulated Annealing']
    colors = ['#e74c3c' if any(h in algo for h in heuristics) else '#3498db' 
              for algo in df_valid['Algorithm']]
    
    # Plot 1: Execution Time
    bars1 = axes[0].barh(df_valid['Algorithm'], df_valid['Time (s)'], color=colors, alpha=0.8)
    axes[0].set_xlabel('Time (seconds)', fontsize=12)
    axes[0].set_title(f'Execution Time - {dataset_name}', fontsize=14, fontweight='bold')
    axes[0].invert_yaxis()
    axes[0].grid(axis='x', alpha=0.3)
    
    # Add time labels
    for bar, time in zip(bars1, df_valid['Time (s)']):
        axes[0].text(bar.get_width(), bar.get_y() + bar.get_height()/2, 
                    f' {time:.4f}s', va='center', fontsize=9)
    
    # Plot 2: Clique Size
    bars2 = axes[1].barh(df_valid['Algorithm'], df_valid['Clique Size'], color=colors, alpha=0.8)
    axes[1].set_xlabel('Clique Size', fontsize=12)
    axes[1].set_title(f'Clique Size Found - {dataset_name}', fontsize=14, fontweight='bold')
    axes[1].invert_yaxis()
    axes[1].grid(axis='x', alpha=0.3)
    
    # Add clique size labels
    for bar, size in zip(bars2, df_valid['Clique Size']):
        axes[1].text(bar.get_width(), bar.get_y() + bar.get_height()/2, 
                    f' {size}', va='center', fontsize=10, fontweight='bold')
    
    # Legend
    from matplotlib.patches import Patch
    legend_elements = [
        Patch(facecolor='#e74c3c', alpha=0.8, label='Heuristic (Approximate)'),
        Patch(facecolor='#3498db', alpha=0.8, label='Exact (Optimal)')
    ]
    fig.legend(handles=legend_elements, loc='upper center', ncol=2, 
               bbox_to_anchor=(0.5, 1.02), fontsize=11)
    
    plt.tight_layout()
    plt.subplots_adjust(top=0.88)
    plt.savefig(f'demo_results_{dataset_name.replace(".", "_")}.png', dpi=150, bbox_inches='tight')
    plt.show()
    
    print(f"\n‚úì Visualization saved as 'demo_results_{dataset_name.replace('.', '_')}.png'")
else:
    print("No valid results to visualize.")

---

## üìù Quick Reference

### Recommended Datasets for Presentations

| Speed | Datasets | Use Case |
|-------|----------|----------|
| ‚ö° Fast (< 30s) | Small R-MAT, p_hat300-1 | Quick demos |
| üîÑ Medium (1-2 min) | email-Eu-core, brock200_* | Balanced |
| üê¢ Slow (5+ min) | facebook_combined, twitter | Full capability demo |

### Key Insights to Mention

1. **Heuristics** (red bars) are fast but may find suboptimal cliques
2. **Exact algorithms** (blue bars) guarantee optimal solutions
3. **BBMC** is typically the fastest exact algorithm
4. **Density matters**: High-density graphs are harder for exact algorithms