# Comprehensive ANN-SNN Conversion Experiments

This notebook implements comprehensive experiments for ANN-SNN conversion with the following configurations:
- **Datasets**: imdb, ag_news
- **Models**: bert-base-uncased, distilbert-base-uncased
- **Timesteps**: 2, 4
- **Neuron Types**: ParaInfNeuron_Text (parallel), IFNeuron_Text (sequential)

In [None]:
# Setup and Dependencies
!rm -rf /content/Parallel_Conversion
!git clone -b add-calib https://github.com/TuanMaiz/Parallel_Conversion.git

# Install required packages
!pip install transformers datasets torch tqdm fvcore psutil pandas matplotlib seaborn

import os
import json
import pandas as pd
import time
import subprocess
from datetime import datetime
from tqdm import tqdm
import matplotlib.pyplot as plt
import seaborn as sns
from IPython.display import clear_output

# Configuration Setup

In [None]:
# Experiment Configuration
EXPERIMENT_CONFIG = {
    "datasets": ["imdb", "ag_news"],
    "models": ["bert_base_qcfs", "distilbert_base_qcfs"],
    "timesteps": [2, 4],
    "neuron_types": ["ParaInfNeuron_Text", "IFNeuron_Text"],
    "batch_size": 16,
    "learning_rate": 0.00001,
    "epochs": 5,
    "text_max_len": 256,
    "gpu_type": "A100"
}

# Data Collection Framework
class ExperimentDataCollector:
    def __init__(self):
        self.results = []
        self.experiment_log = "experiment_results.json"
        
    def add_result(self, config, metrics):
        result = {
            "timestamp": datetime.now().isoformat(),
            "config": config,
            "metrics": metrics
        }
        self.results.append(result)
        self.save_results()
        
    def save_results(self):
        with open(self.experiment_log, "w") as f:
            json.dump(self.results, f, indent=2)
            
    def load_results(self):
        if os.path.exists(self.experiment_log):
            with open(self.experiment_log, "r") as f:
                self.results = json.load(f)
                
    def get_dataframe(self):
        return pd.json_normalize(self.results)

# Initialize collector
collector = ExperimentDataCollector()
collector.load_results()

print(f"Loaded {len(collector.results)} previous results")

# Experiment Execution Functions

In [None]:
def run_single_experiment(dataset, model, timestep, neuron_type, phase="training"):
    
    # Construct save directory
    savedir = f"./checkpoints"
    
    # Base command
    if phase == "training":
        cmd = f"""python Parallel_Conversion/main.py \
            --dataset TextCLS \
            --net_arch {model} \
            --savedir {savedir} \
            --neuron_type {neuron_type} \
            --text_dataset {dataset} \
            --text_max_len {EXPERIMENT_CONFIG['text_max_len']} \
            --time_step {timestep} \
            --trainsnn_epochs {EXPERIMENT_CONFIG['epochs']} \
            --batchsize {EXPERIMENT_CONFIG['batch_size']} \
            --lr {EXPERIMENT_CONFIG['learning_rate']} \
            --measure_efficiency \
            --gpu_type {EXPERIMENT_CONFIG['gpu_type']} \
            --dev 0"""
    
    elif phase == "calibration":
        # Fix the checkpoint path format to match the actual naming pattern
        checkpoint_path = f"{savedir}TextCLS-{model}-T{timestep}/{neuron_type}_lr{EXPERIMENT_CONFIG['learning_rate']}_wd0.0005_epoch{EXPERIMENT_CONFIG['epochs']}_mixup_False_weights_epoch_{EXPERIMENT_CONFIG['epochs']-1}.pth"
        
        cmd = f"""python Parallel_Conversion/main.py \
            --dataset TextCLS \
            --net_arch {model} \
            --neuron_type {neuron_type} \
            --text_dataset {dataset} \
            --text_max_len {EXPERIMENT_CONFIG['text_max_len']} \
            --time_step {timestep} \
            --batchsize {EXPERIMENT_CONFIG['batch_size']} \
            --measure_efficiency \
            --gpu_type {EXPERIMENT_CONFIG['gpu_type']} \
            --dev 0 \
            --calibrate_th \
            --direct_inference \
            --pretrained_model \
            --checkpoint_path {checkpoint_path}"""
    
    print(f"\n=== Running {phase} for {dataset} + {model} + {neuron_type} + T{timestep} ===")
    
    # Create progress bar for this phase
    phase_pbar = tqdm(total=100, desc=f"{phase.capitalize()}", unit="%", leave=False)
    
    # Execute and capture output
    start_time = time.time()
    
    def update_progress_bar():
        """Update progress bar while waiting for subprocess to complete"""
        elapsed = time.time() - start_time
        # Simulate progress based on time (rough estimate)
        if phase == "training":
            # Training takes longer, estimate based on 2 hours max
            progress = min(100, (elapsed / 7200) * 100)
        else:
            # Calibration is faster, estimate based on 30 minutes max
            progress = min(100, (elapsed / 1800) * 100)
        
        phase_pbar.update(int(progress - phase_pbar.n))
        return progress < 100
    
    try:
        # Start subprocess
        process = subprocess.Popen(cmd, shell=True, stdout=subprocess.PIPE, stderr=subprocess.PIPE, text=True)
        
        # Monitor process and update progress bar
        while process.poll() is None:
            if not update_progress_bar():
                process.terminate()
                break
            time.sleep(1)
        
        # Get final result
        stdout, stderr = process.communicate()
        execution_time = time.time() - start_time
        
        # Final progress bar update
        phase_pbar.update(100 - phase_pbar.n)
        phase_pbar.close()
        
        if process.returncode == 0:
            print(f"✅ {phase} completed successfully in {execution_time:.2f}s")
            return True, stdout, execution_time
        else:
            print(f"❌ {phase} failed with error:")
            print(stderr)
            return False, stderr, execution_time
            
    except subprocess.TimeoutExpired:
        phase_pbar.close()
        print(f"❌ {phase} timed out after 2 hours")
        return False, "Timeout", 7200
    except Exception as e:
        phase_pbar.close()
        print(f"❌ {phase} failed with exception: {e}")
        return False, str(e), 0

def parse_metrics_from_output(output_text):
    metrics = {}
    import re
    
    accuracy_patterns = [
        r'Accuracy\s*:?\s*(\d+\.\d+)',
        r'acc\s*:?\s*(\d+\.\d+)',
        r'test_accuracy\s*:?\s*(\d+\.\d+)',
        r'val_accuracy\s*:?\s*(\d+\.\d+)'
    ]
    
    for pattern in accuracy_patterns:
        matches = re.findall(pattern, output_text)
        if matches:
            metrics['accuracy'] = float(matches[-1])
            break
    
    if 'FLOPs' in output_text:
        flops_match = re.search(r'FLOPs\s*:?\s*(\d+\.?\d*\s*[eE]?[+-]?\d*)', output_text)
        if flops_match:
            metrics['flops'] = float(flops_match.group(1))
    
    if 'Parameters' in output_text:
        params_match = re.search(r'Parameters\s*:?\s*(\d+\.?\d*\s*[eE]?[+-]?\d*)', output_text)
        if params_match:
            metrics['parameters'] = float(params_match.group(1))
    
    if 'Memory' in output_text:
        memory_match = re.search(r'Memory\s*:?\s*(\d+\.\d+)\s*GB', output_text)
        if memory_match:
            metrics['memory_gb'] = float(memory_match.group(1))
    
    return metrics

def run_complete_experiment(dataset, model, timestep, neuron_type):
    
    config = {
        "dataset": dataset,
        "model": model,
        "timestep": timestep,
        "neuron_type": neuron_type,
        "batch_size": EXPERIMENT_CONFIG['batch_size'],
        "learning_rate": EXPERIMENT_CONFIG['learning_rate'],
        "epochs": EXPERIMENT_CONFIG['epochs']
    }
    
    metrics = {}
    
    # Phase 1: Training
    success, output, training_time = run_single_experiment(dataset, model, timestep, neuron_type, "training")
    
    if success:
        training_metrics = parse_metrics_from_output(output)
        metrics.update(training_metrics)
        metrics['training_time'] = training_time
        metrics['training_success'] = True
        
        # Phase 2: Calibration
        success, calib_output, calib_time = run_single_experiment(dataset, model, timestep, neuron_type, "calibration")
        
        if success:
            calib_metrics = parse_metrics_from_output(calib_output)
            metrics.update({f"calib_{k}": v for k, v in calib_metrics.items()})
            metrics['calibration_time'] = calib_time
            metrics['calibration_success'] = True
        else:
            metrics['calibration_success'] = False
            metrics['calibration_time'] = calib_time
    else:
        metrics['training_success'] = False
        metrics['training_time'] = training_time
    
    # Store results
    collector.add_result(config, metrics)
    
    return metrics

# Quick Test Run

In [None]:
# Quick test run - single experiment
test_config = {
    "dataset": "imdb",
    "model": "distilbert_base_qcfs",
    "timestep": 2,
    "neuron_type": "ParaInfNeuron_Text"
}

print("Running test experiment...")
test_metrics = run_complete_experiment(**test_config)

print("\nTest experiment results:")
for key, value in test_metrics.items():
    print(f"  {key}: {value}")

# Full Experiment Suite

In [None]:
# Generate all experiment combinations
from itertools import product

all_experiments = list(product(
    EXPERIMENT_CONFIG['datasets'],
    EXPERIMENT_CONFIG['models'], 
    EXPERIMENT_CONFIG['timesteps'],
    EXPERIMENT_CONFIG['neuron_types']
))

total_experiments = len(all_experiments)
print(f"Total experiments to run: {total_experiments}")
print("Experiment combinations:")
for i, (dataset, model, timestep, neuron_type) in enumerate(all_experiments):
    print(f"{i+1:2d}. {dataset} + {model} + {neuron_type} + T{timestep}")

In [None]:
# Run all experiments (uncomment to execute)
def run_all_experiments():
    # Create tqdm progress bar for all experiments
    experiment_pbar = tqdm(all_experiments, desc="Overall Progress", unit="experiment")
    
    for i, (dataset, model, timestep, neuron_type) in enumerate(experiment_pbar):
        # Update progress bar description
        experiment_pbar.set_description(f"Exp {i+1}/{total_experiments}: {dataset}+{model}+{neuron_type}+T{timestep}")
        
        print(f"\n{'='*60}")
        print(f"Experiment {i+1}/{total_experiments}: {dataset} + {model} + {neuron_type} + T{timestep}")
        print(f"{'='*60}")
        
        metrics = run_complete_experiment(dataset, model, timestep, neuron_type)
        
        print(f"\n✅ Experiment {i+1} completed")
        print(f"   Training: {'✅' if metrics.get('training_success', False) else '❌'}")
        print(f"   Calibration: {'✅' if metrics.get('calibration_success', False) else '❌'}")
        if 'accuracy' in metrics:
            print(f"   Accuracy: {metrics['accuracy']:.4f}")
        
        # Update progress bar with status
        status = "✅" if metrics.get('training_success', False) and metrics.get('calibration_success', False) else "⚠️"
        experiment_pbar.set_postfix_str(f"Status: {status}")
        
        # Clear output periodically
        if (i+1) % 4 == 0:
            clear_output(wait=True)
    
    experiment_pbar.close()
    print(f"\n🎉 All experiments completed! Results saved to {collector.experiment_log}")

# Uncomment to run all experiments
# run_all_experiments()

# Results Analysis

In [None]:
# Load and analyze results
df = collector.get_dataframe()

if len(df) > 0:
    print(f"Loaded {len(df)} experiment results")
    print("\nAvailable columns:")
    for col in df.columns:
        print(f"  {col}")
    
    # Basic statistics
    print("\n=== Experiment Summary ===")
    successful_experiments = df[df['metrics.training_success'] == True]
    print(f"Successful training experiments: {len(successful_experiments)}")
    
    if 'metrics.accuracy' in df.columns:
        accuracy_data = df[df['metrics.accuracy'].notna()]
        print(f"Experiments with accuracy data: {len(accuracy_data)}")
        if len(accuracy_data) > 0:
            print(f"Accuracy range: {accuracy_data['metrics.accuracy'].min():.4f} - {accuracy_data['metrics.accuracy'].max():.4f}")
            print(f"Mean accuracy: {accuracy_data['metrics.accuracy'].mean():.4f}")
else:
    print("No experiment results found. Run some experiments first.")

In [None]:
# Export results
if len(df) > 0:
    df.to_csv('experiment_results.csv', index=False)
    print("Results exported to 'experiment_results.csv'")
    
    # Create summary
    summary_df = df.copy()
    if 'metrics.accuracy' in summary_df.columns:
        summary_table = summary_df.groupby(['config.dataset', 'config.model', 'config.neuron_type', 'config.timestep']).agg({
            'metrics.accuracy': ['mean', 'std', 'count'],
            'metrics.training_success': 'mean',
            'metrics.calibration_success': 'mean'
        }).round(4)
        
        print("\n=== Summary Statistics ===")
        print(summary_table)
        
        summary_table.to_csv('experiment_summary.csv')
        print("Summary saved to 'experiment_summary.csv'")
else:
    print("No results to export")


