In [None]:
# Check if GPU is available
import torch
print(f"PyTorch version: {torch.__version__}")
print(f"CUDA available: {torch.cuda.is_available()}")
if torch.cuda.is_available():
    print(f"CUDA device name: {torch.cuda.get_device_name(0)}")
    print(f"CUDA device count: {torch.cuda.device_count()}")


In [None]:
# Clone the repository
# Clone the repository specifically from the privacy_accountant branch
!git clone -b privacy_accountant https://github.com/YOUR_USERNAME/Comp430_Project.git
%cd Comp430_Project


In [None]:
# Install dependencies
%pip install -r requirements.txt


In [None]:
# Add repo path to Python path for imports
import sys
import os
import json
import time
import yaml
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
from pathlib import Path

# Set project root
project_root = os.getcwd()
sys.path.insert(0, project_root)
print(f"Project root: {project_root}")

# Create output directory
output_dir = Path(project_root) / "experiments" / "out" / "colab_runs"
output_dir.mkdir(parents=True, exist_ok=True)
print(f"Output directory: {output_dir}")


In [None]:
# List available configuration files
config_files = list(Path(project_root).glob("configs/*.yaml"))
print(f"Found {len(config_files)} configuration files:")
for config_file in sorted(config_files):
    print(f"- {config_file.name}")


In [None]:
# Run the baseline comparison experiments with different noise settings
import os
import time
import json
import numpy as np
import matplotlib.pyplot as plt
import pandas as pd
import re
from pathlib import Path
from tqdm.notebook import tqdm

# Define the specific configs for our baseline comparison
baseline_configs = [
    "mnist_clients5_no_noise.yaml",
    "mnist_clients5_fixed_noise.yaml",
    "mnist_clients5_adaptive_dp_09.yaml",
    "mnist_clients5_adaptive_dp_08.yaml",
    "mnist_clients5_adaptive_dp_05.yaml"
]

# Initialize results storage
baseline_results = []

# Create progress bar
pbar = tqdm(total=len(baseline_configs), desc="Baseline Experiments")

# Run each config file
for config_file in baseline_configs:
    config_path = f"configs/{config_file}"
    run_id = config_file.replace('.yaml', '')
    
    # Generate a description from the filename
    if "no_noise" in run_id:
        desc = "No DP Noise"
    elif "fixed_noise" in run_id:
        desc = "Fixed DP Noise"
    elif "adaptive_dp_09" in run_id:
        desc = "Adaptive DP (0.9 decay)"
    elif "adaptive_dp_08" in run_id:
        desc = "Adaptive DP (0.8 decay)"
    elif "adaptive_dp_05" in run_id:
        desc = "Adaptive DP (0.5 decay)"
    else:
        desc = run_id
    
    print(f"\n{'='*80}")
    print(f"Running baseline experiment: {desc}")
    print(f"Config file: {config_file}")
    print(f"{'='*80}")
    
    # Measure execution time
    start_time = time.time()
    
    try:
        # Run the experiment
        cmd = f"python experiments/train_secure_sfl.py --config {config_path} --run_id {run_id}"
        print(f"Command: {cmd}")
        !{cmd}
        
        end_time = time.time()
        total_runtime = end_time - start_time
        
        # Record result
        result = {
            "config": config_file,
            "run_id": run_id,
            "description": desc,
            "start_time": start_time,
            "end_time": end_time,
            "total_runtime": total_runtime,
            "metrics_path": f"experiments/out/{run_id}/metrics.json",
            "status": "completed"
        }
        
        print(f"\nExperiment completed in {total_runtime:.2f} seconds ({total_runtime/60:.2f} minutes)")
        
    except Exception as e:
        # Handle errors
        print(f"\n{'!'*80}")
        print(f"Error running experiment with {config_file}: {str(e)}")
        print(f"{'!'*80}")
        
        end_time = time.time()
        total_runtime = end_time - start_time
        
        # Record failure
        result = {
            "config": config_file,
            "run_id": run_id,
            "description": desc,
            "start_time": start_time,
            "end_time": end_time,
            "total_runtime": total_runtime,
            "error": str(e),
            "status": "failed"
        }
    
    # Add to results
    baseline_results.append(result)
    
    # Update progress
    pbar.update(1)

# Close progress bar
pbar.close()

# Save experiment summary
baseline_summary_file = output_dir / "baseline_experiment_summary.json"
with open(baseline_summary_file, 'w') as f:
    json.dump(baseline_results, f, indent=2)
    
print(f"\nAll {len(baseline_configs)} baseline experiments completed")
print(f"Summary saved to {baseline_summary_file}")


In [None]:
# Run client comparison experiments (3, 5, 10, 20, 50 clients) with both Adaptive and Vanilla DP
import os
import time
import json
import re
import numpy as np
import pandas as pd
import io
from pathlib import Path
from tqdm.notebook import tqdm

# Define the specific configs for client comparison
client_configs = [
    # Adaptive DP configs
    "mnist_clients3_adaptive_dp.yaml",
    "mnist_clients5_adaptive_dp.yaml",
    "mnist_clients10_adaptive_dp.yaml",
    "mnist_clients20_adaptive_dp.yaml",
    "mnist_clients50_adaptive_dp.yaml",
    # Vanilla DP configs
    "mnist_clients3_vanilla_dp.yaml",
    "mnist_clients5_vanilla_dp.yaml",
    "mnist_clients10_vanilla_dp.yaml",
    "mnist_clients20_vanilla_dp.yaml",
    "mnist_clients50_vanilla_dp.yaml"
]

# Initialize results storage
client_results = []
all_accuracy_data = []

# Create progress bar
pbar = tqdm(total=len(client_configs), desc="Client Comparison Experiments")

# Run each config file
for config_file in client_configs:
    config_path = f"configs/{config_file}"
    run_id = config_file.replace('.yaml', '')
    
    # Skip if config file doesn't exist
    if not os.path.exists(config_path):
        print(f"Config file {config_path} does not exist. Skipping.")
        pbar.update(1)
        continue
    
    # Generate a description from the filename
    client_count = re.search(r'clients(\d+)', config_file).group(1)
    dp_type = "Adaptive" if "adaptive" in config_file else "Vanilla"
    desc = f"{client_count} Clients - {dp_type} DP"
    
    print(f"\n{'='*80}")
    print(f"Running client comparison experiment: {desc}")
    print(f"Config file: {config_file}")
    print(f"{'='*80}")
    
    # Measure execution time
    start_time = time.time()
    
    try:
        # Run the experiment and capture output
        cmd = f"python experiments/train_secure_sfl.py --config {config_path} --run_id {run_id}"
        print(f"Command: {cmd}")
        output = !{cmd}
        # Convert the output to a string
        output_text = "\n".join(output)
        
        # Save the output to a file for parsing
        output_file = f"result_{run_id}.txt"
        with open(output_file, "w") as f:
            f.write(output_text)
        
        # Parse the output file to extract accuracy data
        with open(output_file, 'r') as f:
            content = f.read()
        
        # Extract rounds and validation accuracies
        round_pattern = r"--- Round (\d+)/50 ---.*?Validation Metrics:\nComplete Model - Loss: [\d.]+ \| Accuracy: ([\d.]+)%"
        val_matches = re.findall(round_pattern, content, re.DOTALL)
        
        # Extract test accuracies
        test_acc_pattern = r"Round (\d+): Test Accuracy: ([\d.]+)%"
        test_matches = re.findall(test_acc_pattern, content)
        
        # Store validation accuracies
        for round_num, accuracy in val_matches:
            all_accuracy_data.append({
                'client_count': int(client_count),
                'dp_type': dp_type,
                'round': int(round_num),
                'accuracy_type': 'Validation',
                'accuracy': float(accuracy)
            })
        
        # Store test accuracies
        for round_num, accuracy in test_matches:
            all_accuracy_data.append({
                'client_count': int(client_count),
                'dp_type': dp_type,
                'round': int(round_num),
                'accuracy_type': 'Test',
                'accuracy': float(accuracy)
            })
        
        end_time = time.time()
        total_runtime = end_time - start_time
        
        # Record result
        result = {
            "config": config_file,
            "run_id": run_id,
            "description": desc,
            "client_count": int(client_count),
            "dp_type": dp_type,
            "start_time": start_time,
            "end_time": end_time,
            "total_runtime": total_runtime,
            "metrics_path": f"experiments/out/{run_id}/metrics.json",
            "status": "completed"
        }
        
        print(f"\nExperiment completed in {total_runtime:.2f} seconds ({total_runtime/60:.2f} minutes)")
        
    except Exception as e:
        # Handle errors
        print(f"\n{'!'*80}")
        print(f"Error running experiment with {config_file}: {str(e)}")
        print(f"{'!'*80}")
        
        end_time = time.time()
        total_runtime = end_time - start_time
        
        # Record failure
        result = {
            "config": config_file,
            "run_id": run_id,
            "description": desc,
            "client_count": int(client_count),
            "dp_type": dp_type,
            "start_time": start_time,
            "end_time": end_time,
            "total_runtime": total_runtime,
            "error": str(e),
            "status": "failed"
        }
    
    # Add to results
    client_results.append(result)
    
    # Update progress
    pbar.update(1)
    
    # Save intermediate results after each experiment
    accuracy_df = pd.DataFrame(all_accuracy_data)
    accuracy_df.to_csv('client_comparison_accuracy.csv', index=False)

# Close progress bar
pbar.close()

# Save experiment summary
client_summary_file = output_dir / "client_comparison_summary.json"
with open(client_summary_file, 'w') as f:
    json.dump(client_results, f, indent=2)

# Create and save final accuracy DataFrame
accuracy_df = pd.DataFrame(all_accuracy_data)
accuracy_df.to_csv('client_comparison_accuracy.csv', index=False)
    
print(f"\nAll {len(client_configs)} client comparison experiments completed")
print(f"Summary saved to {client_summary_file}")
print(f"Accuracy data saved to client_comparison_accuracy.csv")


In [None]:
# Visualize client comparison results
import matplotlib.pyplot as plt
import pandas as pd
import seaborn as sns

# Check if the accuracy data file exists
try:
    accuracy_df = pd.read_csv('client_comparison_accuracy.csv')
    has_data = True
except FileNotFoundError:
    print("No accuracy data file found. Run the client comparison experiments first.")
    has_data = False

if has_data:
    # Set the style for plots
    plt.style.use('seaborn-v0_8-whitegrid')  # Updated style name
    plt.rcParams['axes.facecolor'] = '#f8f8f8'  # Light gray background
    plt.rcParams['grid.color'] = '#cccccc'      # Lighter grid lines
    
    # Define colors for different client numbers
    colors = {
        3: '#8B0000',    # Dark Red
        5: '#FF0000',    # Red
        10: '#FF4500',   # Orange Red
        20: '#FF8C00',   # Dark Orange
        50: '#B22222',   # Firebrick
    }
    
    # Define line styles for different DP methods
    line_styles = {
        'Adaptive': '-',      # Solid
        'Vanilla': '--'       # Dashed
    }
    
    # Define marker styles
    markers = {
        3: 'o',    # Circle
        5: 's',    # Square
        10: '^',   # Triangle
        20: 'D',   # Diamond
        50: 'X'    # X
    }
    
    # Filter for test accuracy data
    test_df = accuracy_df[accuracy_df['accuracy_type'] == 'Test']
    
    # Create test accuracy plot
    plt.figure(figsize=(12, 8))
    
    for client_count in sorted(test_df['client_count'].unique()):
        for dp_type in sorted(test_df['dp_type'].unique()):
            data = test_df[(test_df['client_count'] == client_count) & (test_df['dp_type'] == dp_type)]
            if not data.empty:
                label = f"{client_count} Clients - {dp_type}"
                plt.plot(data['round'], data['accuracy'], 
                        color=colors.get(client_count, '#000000'), 
                        linestyle=line_styles.get(dp_type, '-'),
                        linewidth=2.5, 
                        marker=markers.get(client_count, 'o'), 
                        markersize=8,
                        label=label)
    
    plt.grid(True, linestyle='--', alpha=0.7)
    plt.title('Test Accuracy Comparison by Client Count', fontsize=18, fontweight='bold')
    plt.xlabel('Training Round', fontsize=16)
    plt.ylabel('Test Accuracy (%)', fontsize=16)
    plt.legend(fontsize=12, frameon=True, facecolor='white', edgecolor='#CCCCCC', loc='upper left')
    plt.tight_layout()
    
    plt.savefig("client_comparison_test_accuracy.png", dpi=300, bbox_inches='tight')
    plt.show()
    
    # Filter for validation accuracy data
    val_df = accuracy_df[accuracy_df['accuracy_type'] == 'Validation']
    
    # Create validation accuracy plot
    plt.figure(figsize=(12, 8))
    
    for client_count in sorted(val_df['client_count'].unique()):
        for dp_type in sorted(val_df['dp_type'].unique()):
            data = val_df[(val_df['client_count'] == client_count) & (val_df['dp_type'] == dp_type)]
            if not data.empty:
                label = f"{client_count} Clients - {dp_type}"
                plt.plot(data['round'], data['accuracy'], 
                        color=colors.get(client_count, '#000000'), 
                        linestyle=line_styles.get(dp_type, '-'),
                        linewidth=2.5, 
                        marker=markers.get(client_count, 'o'), 
                        markersize=6, 
                        markevery=5,  # Show markers every 5 points to reduce clutter
                        label=label)
    
    plt.grid(True, linestyle='--', alpha=0.7)
    plt.title('Validation Accuracy Comparison by Client Count', fontsize=18, fontweight='bold')
    plt.xlabel('Training Round', fontsize=16)
    plt.ylabel('Validation Accuracy (%)', fontsize=16)
    plt.legend(fontsize=12, frameon=True, facecolor='white', edgecolor='#CCCCCC', loc='upper left')
    plt.tight_layout()
    
    plt.savefig("client_comparison_validation_accuracy.png", dpi=300, bbox_inches='tight')
    plt.show()
    
    # Create a summary table of final test accuracies
    final_test_df = test_df.sort_values('round', ascending=False).drop_duplicates(['client_count', 'dp_type'])
    final_test_pivot = final_test_df.pivot_table(
        index='client_count', 
        columns='dp_type', 
        values='accuracy'
    ).reset_index()
    
    print("Final Test Accuracies:")
    display(final_test_pivot)
    
    # Save the pivot table to CSV
    final_test_pivot.to_csv('client_comparison_final_accuracy.csv')
    print("Final accuracies saved to client_comparison_final_accuracy.csv")
else:
    print("Please run the client comparison experiments first to generate data.")


In [None]:
# Run cut layer comparison experiments with 5 clients for both Adaptive and Vanilla DP
import os
import time
import json
import re
import numpy as np
import pandas as pd
import io
from pathlib import Path
from tqdm.notebook import tqdm
from google.colab import files

# Define the specific configs for cut layer comparison
cut_layer_configs = [
    # Adaptive DP configs with different cut layers
    "mnist_clients5_cut_layer2_adaptive_dp.yaml",
    "mnist_clients5_cut_layer4_adaptive_dp.yaml",
    "mnist_clients5_cut_layer6_adaptive_dp.yaml",
    "mnist_clients5_cut_layer8_adaptive_dp.yaml",
    # Vanilla DP configs with different cut layers
    "mnist_clients5_cut_layer2_vanilla_dp.yaml",
    "mnist_clients5_cut_layer4_vanilla_dp.yaml",
    "mnist_clients5_cut_layer6_vanilla_dp.yaml",
    "mnist_clients5_cut_layer8_vanilla_dp.yaml"
]

# Initialize results storage
cut_layer_results = []
all_accuracy_data = []

# Create progress bar
pbar = tqdm(total=len(cut_layer_configs), desc="Cut Layer Comparison Experiments")

# Run each config file
for config_file in cut_layer_configs:
    config_path = f"configs/{config_file}"
    run_id = config_file.replace('.yaml', '')
    
    # Skip if config file doesn't exist
    if not os.path.exists(config_path):
        print(f"Config file {config_path} does not exist. Skipping.")
        pbar.update(1)
        continue
    
    # Generate a description from the filename
    cut_layer = re.search(r'cut_layer(\d+)', config_file).group(1)
    dp_type = "Adaptive" if "adaptive" in config_file else "Vanilla"
    desc = f"Cut Layer {cut_layer} - {dp_type} DP"
    
    print(f"\n{'='*80}")
    print(f"Running cut layer comparison experiment: {desc}")
    print(f"Config file: {config_file}")
    print(f"{'='*80}")
    
    # Measure execution time
    start_time = time.time()
    
    try:
        # Run the experiment and capture output
        cmd = f"python experiments/train_secure_sfl.py --config {config_path} --run_id {run_id}"
        print(f"Command: {cmd}")
        output = !{cmd}
        # Convert the output to a string
        output_text = "\n".join(output)
        
        # Save the output to a file for parsing
        output_file = f"result_{run_id}.txt"
        with open(output_file, "w") as f:
            f.write(output_text)
        
        # Parse the output file to extract accuracy data
        with open(output_file, 'r') as f:
            content = f.read()
        
        # Extract rounds and validation accuracies
        round_pattern = r"--- Round (\d+)/50 ---.*?Validation Metrics:\nComplete Model - Loss: [\d.]+ \| Accuracy: ([\d.]+)%"
        val_matches = re.findall(round_pattern, content, re.DOTALL)
        
        # Extract test accuracies
        test_acc_pattern = r"Round (\d+): Test Accuracy: ([\d.]+)%"
        test_matches = re.findall(test_acc_pattern, content)
        
        # Extract privacy budget
        privacy_budget = "N/A"
        final_budget_match = re.search(r"Round 50: Current Privacy Budget \(ε, δ=1e-05\): \(([\d.e+]+)", content)
        if final_budget_match:
            privacy_budget = final_budget_match.group(1)
        
        # Store validation accuracies
        for round_num, accuracy in val_matches:
            all_accuracy_data.append({
                'cut_layer': int(cut_layer),
                'dp_type': dp_type,
                'round': int(round_num),
                'accuracy_type': 'Validation',
                'accuracy': float(accuracy)
            })
        
        # Store test accuracies
        for round_num, accuracy in test_matches:
            all_accuracy_data.append({
                'cut_layer': int(cut_layer),
                'dp_type': dp_type,
                'round': int(round_num),
                'accuracy_type': 'Test',
                'accuracy': float(accuracy),
                'privacy_budget': privacy_budget if round_num == "50" else "N/A"
            })
        
        end_time = time.time()
        total_runtime = end_time - start_time
        
        # Record result
        result = {
            "config": config_file,
            "run_id": run_id,
            "description": desc,
            "cut_layer": int(cut_layer),
            "dp_type": dp_type,
            "start_time": start_time,
            "end_time": end_time,
            "total_runtime": total_runtime,
            "privacy_budget": privacy_budget,
            "metrics_path": f"experiments/out/{run_id}/metrics.json",
            "status": "completed"
        }
        
        print(f"\nExperiment completed in {total_runtime:.2f} seconds ({total_runtime/60:.2f} minutes)")
        
    except Exception as e:
        # Handle errors
        print(f"\n{'!'*80}")
        print(f"Error running experiment with {config_file}: {str(e)}")
        print(f"{'!'*80}")
        
        end_time = time.time()
        total_runtime = end_time - start_time
        
        # Record failure
        result = {
            "config": config_file,
            "run_id": run_id,
            "description": desc,
            "cut_layer": int(cut_layer),
            "dp_type": dp_type,
            "start_time": start_time,
            "end_time": end_time,
            "total_runtime": total_runtime,
            "error": str(e),
            "status": "failed"
        }
    
    # Add to results
    cut_layer_results.append(result)
    
    # Update progress
    pbar.update(1)
    
    # Save intermediate results after each experiment
    accuracy_df = pd.DataFrame(all_accuracy_data)
    accuracy_df.to_csv('cut_layer_comparison_accuracy.csv', index=False)

# Close progress bar
pbar.close()

# Save experiment summary
cut_layer_summary_file = output_dir / "cut_layer_comparison_summary.json"
with open(cut_layer_summary_file, 'w') as f:
    json.dump(cut_layer_results, f, indent=2)

# Create and save final accuracy DataFrame
accuracy_df = pd.DataFrame(all_accuracy_data)
accuracy_df.to_csv('cut_layer_comparison_accuracy.csv', index=False)

# Create a summary table of final test accuracies
test_df = accuracy_df[accuracy_df['accuracy_type'] == 'Test']
final_test_df = test_df.sort_values('round', ascending=False).drop_duplicates(['cut_layer', 'dp_type'])
final_test_pivot = final_test_df.pivot_table(
    index='cut_layer', 
    columns='dp_type', 
    values=['accuracy', 'privacy_budget']
).reset_index()

# Save the pivot table to CSV
final_test_pivot.to_csv('cut_layer_final_accuracy.csv')

print(f"\nAll {len(cut_layer_configs)} cut layer comparison experiments completed")
print(f"Summary saved to {cut_layer_summary_file}")
print(f"Accuracy data saved to cut_layer_comparison_accuracy.csv")
print(f"Final accuracies saved to cut_layer_final_accuracy.csv")

# Download the CSV files
try:
    files.download('cut_layer_comparison_accuracy.csv')
    files.download('cut_layer_final_accuracy.csv')
    print("CSV files downloaded successfully")
except:
    print("Note: CSV download is only available in Google Colab. If you're running this locally, the files are saved in your working directory.")


In [None]:
# Visualize cut layer comparison results
import matplotlib.pyplot as plt
import pandas as pd
import seaborn as sns

# Check if the accuracy data file exists
try:
    accuracy_df = pd.read_csv('cut_layer_comparison_accuracy.csv')
    has_data = True
except FileNotFoundError:
    print("No accuracy data file found. Run the cut layer comparison experiments first.")
    has_data = False

if has_data:
    # Set the style for plots
    plt.style.use('seaborn-v0_8-whitegrid')  # Updated style name
    plt.rcParams['axes.facecolor'] = '#f8f8f8'  # Light gray background
    plt.rcParams['grid.color'] = '#cccccc'      # Lighter grid lines
    
    # Define colors for different cut layers
    colors = {
        2: '#8B0000',    # Dark Red
        4: '#FF0000',    # Red
        6: '#FF4500',    # Orange Red
        8: '#FF8C00',    # Dark Orange
    }
    
    # Define line styles for different DP methods
    line_styles = {
        'Adaptive': '-',      # Solid
        'Vanilla': '--'       # Dashed
    }
    
    # Define marker styles
    markers = {
        2: 'o',    # Circle
        4: 's',    # Square
        6: '^',    # Triangle
        8: 'D',    # Diamond
    }
    
    # Filter for test accuracy data
    test_df = accuracy_df[accuracy_df['accuracy_type'] == 'Test']
    
    # Create test accuracy plot
    plt.figure(figsize=(12, 8))
    
    for cut_layer in sorted(test_df['cut_layer'].unique()):
        for dp_type in sorted(test_df['dp_type'].unique()):
            data = test_df[(test_df['cut_layer'] == cut_layer) & (test_df['dp_type'] == dp_type)]
            if not data.empty:
                label = f"Cut Layer {cut_layer} - {dp_type}"
                plt.plot(data['round'], data['accuracy'], 
                        color=colors.get(cut_layer, '#000000'), 
                        linestyle=line_styles.get(dp_type, '-'),
                        linewidth=2.5, 
                        marker=markers.get(cut_layer, 'o'), 
                        markersize=8,
                        label=label)
    
    plt.grid(True, linestyle='--', alpha=0.7)
    plt.title('Test Accuracy Comparison by Cut Layer', fontsize=18, fontweight='bold')
    plt.xlabel('Training Round', fontsize=16)
    plt.ylabel('Test Accuracy (%)', fontsize=16)
    plt.legend(fontsize=12, frameon=True, facecolor='white', edgecolor='#CCCCCC', loc='upper left')
    plt.tight_layout()
    
    plt.savefig("cut_layer_test_accuracy.png", dpi=300, bbox_inches='tight')
    plt.show()
    
    # Filter for validation accuracy data
    val_df = accuracy_df[accuracy_df['accuracy_type'] == 'Validation']
    
    # Create validation accuracy plot
    plt.figure(figsize=(12, 8))
    
    for cut_layer in sorted(val_df['cut_layer'].unique()):
        for dp_type in sorted(val_df['dp_type'].unique()):
            data = val_df[(val_df['cut_layer'] == cut_layer) & (val_df['dp_type'] == dp_type)]
            if not data.empty:
                label = f"Cut Layer {cut_layer} - {dp_type}"
                plt.plot(data['round'], data['accuracy'], 
                        color=colors.get(cut_layer, '#000000'), 
                        linestyle=line_styles.get(dp_type, '-'),
                        linewidth=2.5, 
                        marker=markers.get(cut_layer, 'o'), 
                        markersize=6, 
                        markevery=5,  # Show markers every 5 points to reduce clutter
                        label=label)
    
    plt.grid(True, linestyle='--', alpha=0.7)
    plt.title('Validation Accuracy Comparison by Cut Layer', fontsize=18, fontweight='bold')
    plt.xlabel('Training Round', fontsize=16)
    plt.ylabel('Validation Accuracy (%)', fontsize=16)
    plt.legend(fontsize=12, frameon=True, facecolor='white', edgecolor='#CCCCCC', loc='upper left')
    plt.tight_layout()
    
    plt.savefig("cut_layer_validation_accuracy.png", dpi=300, bbox_inches='tight')
    plt.show()
    
    # Try to load the final accuracy summary
    try:
        final_test_pivot = pd.read_csv('cut_layer_final_accuracy.csv')
        print("Final Test Accuracies:")
        display(final_test_pivot)
    except FileNotFoundError:
        print("Final accuracy summary file not found.")
else:
    print("Please run the cut layer comparison experiments first to generate data.")


In [None]:
# Plot the baseline comparison results with Meta paper style (red coloring)
import matplotlib.pyplot as plt
import numpy as np
import json
from pathlib import Path
import seaborn as sns

# Set the Meta paper style
plt.style.use('seaborn-whitegrid')
red_colors = ['#FF0000', '#CC0000', '#990000', '#660000', '#330000']  # Different shades of red

# Load the results
results_data = []
histories = {}

for exp in baseline_results:
    try:
        # Load metrics for this experiment
        metrics_path = Path(project_root) / exp['metrics_path']
        if not metrics_path.exists():
            print(f"Warning: No metrics found for {exp['run_id']}")
            continue
            
        with open(metrics_path, 'r') as f:
            metrics = json.load(f)
        
        # Store the history for plotting
        if 'history' in metrics:
            histories[exp['description']] = {
                'rounds': metrics['history']['round'],
                'accuracy': metrics['history']['accuracy'],
                'privacy_epsilon': metrics['history'].get('privacy_epsilon', [0] * len(metrics['history']['round']))
            }
        
        # Extract key metrics
        results_data.append({
            'description': exp['description'],
            'final_accuracy': metrics.get('final_test_acc', None),
            'final_epsilon': metrics.get('privacy', {}).get('final_epsilon', None),
            'runtime_minutes': exp['total_runtime'] / 60
        })
    except Exception as e:
        print(f"Error processing {exp['run_id']}: {e}")

# Create a figure with Meta paper style
plt.figure(figsize=(10, 6))

# Plot accuracy over training rounds for each experiment
for i, (desc, data) in enumerate(histories.items()):
    plt.plot(data['rounds'], data['accuracy'], 
             color=red_colors[i % len(red_colors)], 
             linewidth=2.5, 
             marker='o', 
             markersize=6, 
             label=desc)

# Set the style
plt.grid(True, linestyle='--', alpha=0.7)
plt.title('Accuracy Comparison of Different DP Noise Settings', fontsize=16, fontweight='bold')
plt.xlabel('Training Round', fontsize=14)
plt.ylabel('Test Accuracy', fontsize=14)
plt.legend(fontsize=12, frameon=True, facecolor='white', edgecolor='#CCCCCC')
plt.tight_layout()

# Add a text box with final accuracies
textbox = ""
for result in sorted(results_data, key=lambda x: x['description']):
    textbox += f"{result['description']}: {result['final_accuracy']:.4f}\n"

props = dict(boxstyle='round', facecolor='white', alpha=0.7)
plt.text(0.02, 0.02, textbox, transform=plt.gca().transAxes, fontsize=10,
         verticalalignment='bottom', bbox=props)

# Save the figure
output_file = output_dir / "baseline_accuracy_comparison.png"
plt.savefig(output_file, dpi=300, bbox_inches='tight')
plt.show()

print(f"Comparison plot saved to {output_file}")

# Create a second plot for privacy budget evolution
plt.figure(figsize=(10, 6))

# Plot privacy budget over training rounds for each experiment
for i, (desc, data) in enumerate(histories.items()):
    if 'privacy_epsilon' in data and any(data['privacy_epsilon']):  # Only if privacy data exists
        plt.plot(data['rounds'], data['privacy_epsilon'], 
                color=red_colors[i % len(red_colors)], 
                linewidth=2.5, 
                marker='o', 
                markersize=6, 
                label=desc)

# Set the style
plt.grid(True, linestyle='--', alpha=0.7)
plt.title('Privacy Budget Evolution of Different DP Noise Settings', fontsize=16, fontweight='bold')
plt.xlabel('Training Round', fontsize=14)
plt.ylabel('Privacy Budget (ε)', fontsize=14)
plt.legend(fontsize=12, frameon=True, facecolor='white', edgecolor='#CCCCCC')
plt.tight_layout()

# Save the figure
privacy_output_file = output_dir / "baseline_privacy_comparison.png"
plt.savefig(privacy_output_file, dpi=300, bbox_inches='tight')
plt.show()

print(f"Privacy comparison plot saved to {privacy_output_file}")

# Create a summary table
import pandas as pd
summary_df = pd.DataFrame(results_data)
summary_df = summary_df.sort_values('description')
summary_df


OSError: 'seaborn-whitegrid' is not a valid package style, path of style file, URL of style file, or library style name (library styles are listed in `style.available`)

In [None]:
# This cell is left empty intentionally to remove the experiment_configs list approach


In [None]:
# Function to run an experiment with progress tracking
def run_experiment(config_file, run_id, description):
    print(f"\n{'='*80}")
    print(f"Starting experiment: {description}")
    print(f"Config file: {config_file}")
    print(f"Run ID: {run_id}")
    print(f"{'='*80}\n")
    
    # Create timestamp for this run
    timestamp = time.strftime("%Y%m%d_%H%M%S")
    run_id_with_timestamp = f"{run_id}_{timestamp}"
    
    # Run the experiment
    start_time = time.time()
    cmd = f"python experiments/train_secure_sfl.py --config configs/{config_file} --run_id {run_id_with_timestamp}"
    print(f"Command: {cmd}")
    !{cmd}
    end_time = time.time()
    
    # Calculate total runtime
    total_runtime = end_time - start_time
    print(f"\nExperiment completed in {total_runtime:.2f} seconds ({total_runtime/60:.2f} minutes)")
    
    # Return experiment metadata
    return {
        "config": config_file,
        "run_id": run_id_with_timestamp,
        "description": description,
        "start_time": start_time,
        "end_time": end_time,
        "total_runtime": total_runtime,
        "metrics_path": f"experiments/out/{run_id_with_timestamp}/metrics.json"
    }


In [None]:
# Run and record all experiments from configs directory
import os
import time
import glob
import json
from pathlib import Path
from tqdm.notebook import tqdm

# Get all config files directly
config_files = sorted(glob.glob("configs/*.yaml"))
print(f"Found {len(config_files)} configuration files to run")

# Initialize results storage
all_results = []

# Create progress bar
pbar = tqdm(total=len(config_files), desc="Experiments")

# Run each config file directly
for i, config_path in enumerate(config_files):
    # Extract filename without path and extension
    config_file = os.path.basename(config_path)
    run_id = config_file.replace('.yaml', '')
    
    # Generate a simple description from the filename
    desc = f"Experiment with {config_file}"
    
    print(f"\n{'='*80}")
    print(f"Running experiment {i+1}/{len(config_files)}")
    print(f"Config file: {config_file}")
    print(f"{'='*80}")
    
    # Measure execution time
    start_time = time.time()
    
    try:
        # Run the experiment
        cmd = f"python experiments/train_secure_sfl.py --config {config_path} --run_id {run_id}"
        print(f"Command: {cmd}")
        !{cmd}
        
        end_time = time.time()
        total_runtime = end_time - start_time
        
        # Record result
        result = {
            "config": config_file,
            "run_id": run_id,
            "description": desc,
            "start_time": start_time,
            "end_time": end_time,
            "total_runtime": total_runtime,
            "metrics_path": f"experiments/out/{run_id}/metrics.json",
            "status": "completed"
        }
        
        print(f"\nExperiment completed in {total_runtime:.2f} seconds ({total_runtime/60:.2f} minutes)")
        
    except Exception as e:
        # Handle errors
        print(f"\n{'!'*80}")
        print(f"Error running experiment with {config_file}: {str(e)}")
        print(f"{'!'*80}")
        
        end_time = time.time()
        total_runtime = end_time - start_time
        
        # Record failure
        result = {
            "config": config_file,
            "run_id": run_id,
            "description": desc,
            "start_time": start_time,
            "end_time": end_time,
            "total_runtime": total_runtime,
            "error": str(e),
            "status": "failed"
        }
    
    # Add to results
    all_results.append(result)
    
    # Update progress
    pbar.update(1)

# Close progress bar
pbar.close()

# Save experiment summary
summary_file = output_dir / "experiment_summary.json"
with open(summary_file, 'w') as f:
    json.dump(all_results, f, indent=2)
    
print(f"\nAll {len(config_files)} experiments completed")
print(f"Summary saved to {summary_file}")


In [None]:
# Load experiment summary
with open(summary_file, 'r') as f:
    experiment_summary = json.load(f)
    
# Create a DataFrame for easier analysis
results_data = []

for exp in experiment_summary:
    try:
        # Load metrics for this experiment
        metrics_path = Path(project_root) / exp['metrics_path']
        if not metrics_path.exists():
            print(f"Warning: No metrics found for {exp['run_id']}")
            continue
            
        with open(metrics_path, 'r') as f:
            metrics = json.load(f)
        
        # Extract key metrics
        results_data.append({
            'run_id': exp['run_id'],
            'description': exp['description'],
            'config': exp['config'],
            'runtime_minutes': exp['total_runtime'] / 60,
            'final_accuracy': metrics.get('final_test_acc', None),
            'final_epsilon': metrics.get('privacy', {}).get('final_epsilon', None),
            'dataset': metrics.get('config', {}).get('dataset', '').lower(),
            'num_clients': metrics.get('config', {}).get('num_clients', 0),
            'batch_size': metrics.get('config', {}).get('batch_size', 0),
            'cut_layer': metrics.get('config', {}).get('cut_layer', 0),
            'rounds': metrics.get('config', {}).get('num_rounds', 0),
            'dp_mode': metrics.get('config', {}).get('dp_noise', {}).get('mode', 'unknown')
        })
    except Exception as e:
        print(f"Error processing {exp['run_id']}: {e}")
        
# Create DataFrame
results_df = pd.DataFrame(results_data)
results_df


In [None]:
# Plot accuracy vs privacy budget (epsilon)
plt.figure(figsize=(10, 6))
sns.scatterplot(data=results_df, x='final_epsilon', y='final_accuracy', 
                hue='dataset', style='dp_mode', size='num_clients', 
                sizes=(50, 200), alpha=0.7)

plt.title('Accuracy vs. Privacy Trade-off')
plt.xlabel('Privacy Budget (ε)')
plt.ylabel('Test Accuracy')
plt.grid(True, linestyle='--', alpha=0.7)
plt.savefig(output_dir / 'accuracy_vs_privacy.png', dpi=300, bbox_inches='tight')
plt.show()


In [None]:
# Filter for client scaling experiments (MNIST with vanilla DP)
client_scaling_df = results_df[results_df['config'].str.contains('mnist_clients') & 
                              results_df['config'].str.contains('vanilla')].sort_values('num_clients')

# Create comparison plots
fig, (ax1, ax2) = plt.subplots(1, 2, figsize=(15, 6))

# Plot accuracy vs number of clients
sns.barplot(x='num_clients', y='final_accuracy', data=client_scaling_df, ax=ax1)
ax1.set_title('Accuracy vs. Number of Clients')
ax1.set_xlabel('Number of Clients')
ax1.set_ylabel('Test Accuracy')
ax1.grid(axis='y', linestyle='--', alpha=0.7)

# Plot runtime vs number of clients
sns.barplot(x='num_clients', y='runtime_minutes', data=client_scaling_df, ax=ax2)
ax2.set_title('Runtime vs. Number of Clients')
ax2.set_xlabel('Number of Clients')
ax2.set_ylabel('Runtime (minutes)')
ax2.grid(axis='y', linestyle='--', alpha=0.7)

plt.tight_layout()
plt.savefig(output_dir / 'client_scaling_analysis.png', dpi=300, bbox_inches='tight')
plt.show()


In [None]:
# Filter for split strategy experiments
split_strategy_df = results_df[results_df['config'].str.contains('mnist') & 
                              (results_df['config'].str.contains('cut_layer') | 
                               results_df['run_id'].str.contains('clients5_vanilla'))]

# Add a human-readable split strategy column
def get_split_strategy(row):
    if 'central' in row['run_id']:
        return 'Centralized'
    elif 'fl_sim' in row['run_id']:
        return 'Federated Learning'
    else:
        return f"Split Learning (cut={row['cut_layer']})"

split_strategy_df['split_strategy'] = split_strategy_df.apply(get_split_strategy, axis=1)

# Plot comparison
plt.figure(figsize=(12, 6))

# Create grouped bar plot
metrics = ['final_accuracy', 'final_epsilon', 'runtime_minutes']
metric_names = ['Test Accuracy', 'Privacy Budget (ε)', 'Runtime (minutes)']

# Normalize values for better visualization
normalized_df = split_strategy_df.copy()
for metric in metrics:
    max_val = normalized_df[metric].max()
    if max_val > 0:  # Avoid division by zero
        normalized_df[f'{metric}_norm'] = normalized_df[metric] / max_val

# Create plot for each metric
fig, axes = plt.subplots(1, 3, figsize=(18, 6))

for i, (metric, name) in enumerate(zip(metrics, metric_names)):
    sns.barplot(x='split_strategy', y=metric, data=split_strategy_df, ax=axes[i])
    axes[i].set_title(f'{name} by Split Strategy')
    axes[i].set_xlabel('Split Strategy')
    axes[i].set_ylabel(name)
    axes[i].grid(axis='y', linestyle='--', alpha=0.7)
    axes[i].set_xticklabels(axes[i].get_xticklabels(), rotation=45, ha='right')

plt.tight_layout()
plt.savefig(output_dir / 'split_strategy_comparison.png', dpi=300, bbox_inches='tight')
plt.show()


In [None]:
# Filter for DP comparison experiments
dp_comparison_df = results_df[
    ((results_df['config'].str.contains('mnist_clients5_vanilla') | 
      results_df['config'].str.contains('mnist_clients5_adaptive')) |
     (results_df['config'].str.contains('bcw_clients5_vanilla') | 
      results_df['config'].str.contains('bcw_clients5_adaptive')))
]

# Add a dataset-dp column for grouping
dp_comparison_df['dataset_dp'] = dp_comparison_df.apply(
    lambda row: f"{row['dataset']}-{'adaptive' if 'adaptive' in row['run_id'] else 'vanilla'}", 
    axis=1
)

# Plot accuracy evolution over time
plt.figure(figsize=(12, 8))

# For each experiment, load history and plot accuracy evolution
for _, exp in dp_comparison_df.iterrows():
    metrics_path = Path(project_root) / exp['metrics_path'].replace(exp['run_id'], exp['run_id'])
    try:
        with open(metrics_path, 'r') as f:
            metrics = json.load(f)
        
        if 'history' in metrics:
            rounds = metrics['history']['round']
            accuracy = metrics['history']['accuracy']
            
            # Plot line
            plt.plot(rounds, accuracy, marker='o', alpha=0.7, 
                     label=f"{exp['dataset'].upper()} - {'Adaptive' if 'adaptive' in exp['run_id'] else 'Vanilla'} DP")
    except Exception as e:
        print(f"Error processing history for {exp['run_id']}: {e}")

plt.title('Accuracy Evolution: Adaptive vs. Vanilla DP')
plt.xlabel('Training Round')
plt.ylabel('Test Accuracy')
plt.grid(True, linestyle='--', alpha=0.7)
plt.legend()
plt.savefig(output_dir / 'dp_comparison.png', dpi=300, bbox_inches='tight')
plt.show()


In [None]:
# Plot privacy budget evolution for a few selected experiments
selected_experiments = [
    'mnist_clients5_vanilla',
    'mnist_clients5_adaptive',
    'mnist_fl_sim',
    'bcw_clients5_vanilla'
]

plt.figure(figsize=(12, 6))

for exp_type in selected_experiments:
    matching_exps = [exp for exp in experiment_summary if exp['run_id'].startswith(exp_type)]
    
    if matching_exps:
        exp = matching_exps[0]
        metrics_path = Path(project_root) / exp['metrics_path']
        
        try:
            with open(metrics_path, 'r') as f:
                metrics = json.load(f)
            
            if 'history' in metrics:
                rounds = metrics['history']['round']
                privacy_epsilon = metrics['history']['privacy_epsilon']
                
                plt.plot(rounds, privacy_epsilon, marker='o', alpha=0.7, label=exp_type)
        except Exception as e:
            print(f"Error processing privacy history for {exp_type}: {e}")

plt.title('Privacy Budget Evolution')
plt.xlabel('Training Round')
plt.ylabel('Privacy Budget (ε)')
plt.grid(True, linestyle='--', alpha=0.7)
plt.legend()
plt.savefig(output_dir / 'privacy_budget_evolution.png', dpi=300, bbox_inches='tight')
plt.show()


In [None]:
# Create a final summary table
summary_table = results_df[['description', 'final_accuracy', 'final_epsilon', 'runtime_minutes', 
                           'num_clients', 'cut_layer', 'dp_mode']]

# Rename columns for better readability
summary_table = summary_table.rename(columns={
    'description': 'Experiment',
    'final_accuracy': 'Accuracy', 
    'final_epsilon': 'Privacy Budget (ε)', 
    'runtime_minutes': 'Runtime (min)',
    'num_clients': 'Clients',
    'cut_layer': 'Cut Layer',
    'dp_mode': 'DP Mode'
})

# Format numbers
summary_table['Accuracy'] = summary_table['Accuracy'].map(lambda x: f"{x:.4f}" if pd.notnull(x) else "N/A")
summary_table['Privacy Budget (ε)'] = summary_table['Privacy Budget (ε)'].map(lambda x: f"{x:.4f}" if pd.notnull(x) else "N/A")
summary_table['Runtime (min)'] = summary_table['Runtime (min)'].map(lambda x: f"{x:.2f}" if pd.notnull(x) else "N/A")

# Display the table
summary_table


In [None]:
# Save summary as HTML and CSV
summary_table.to_html(output_dir / 'experiment_summary_table.html')
summary_table.to_csv(output_dir / 'experiment_summary_table.csv')

print(f"Summary table saved to {output_dir}")
