# UAV A3C Model Ablation Analysis

This notebook provides comprehensive ablation analysis for UAV A3C models including:
1. Global vs Individual model comparison
2. Worker variability analysis
3. Model structure and parameter analysis
4. Temporal model performance evolution


In [1]:
# Import necessary libraries
import os
import torch
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
from pathlib import Path
import warnings
from scipy import stats
from sklearn.metrics.pairwise import cosine_similarity
from datetime import datetime
import glob

# Custom imports
from drl_framework.networks import ActorCritic, RecurrentActorCritic
from drl_framework.custom_env import CustomEnv
from drl_framework.params import ENV_PARAMS, device

warnings.filterwarnings('ignore')
plt.style.use('default')
sns.set_palette("husl")

print(f"Using device: {device}")
print(f"PyTorch version: {torch.__version__}")

Using device: cpu
PyTorch version: 2.2.1+cu121


In [9]:
# Configuration
class Config:
    def __init__(self):
        self.main_models_dir = "models"
        self.runs_dir = "runs"
        self.state_dim = 7  # Based on UAV environment
        self.action_dim = 3  # LOCAL, OFFLOAD, DISCARD (updated to match environment)
        self.hidden_dim = 128
        self.n_workers = 10  # Number of individual workers
        self.evaluation_episodes = 100
        
config = Config()
print(f"Configuration loaded: {vars(config)}")

Configuration loaded: {'main_models_dir': 'models', 'runs_dir': 'runs', 'state_dim': 7, 'action_dim': 3, 'hidden_dim': 128, 'n_workers': 10, 'evaluation_episodes': 100}


## 1. Model Discovery and Loading

In [10]:
def discover_models():
    """Discover all available models in the project"""
    models_info = {
        'main_models': {},
        'run_models': {}
    }
    
    # Main models directory
    main_dir = Path(config.main_models_dir)
    if main_dir.exists():
        for pth_file in main_dir.glob("*.pth"):
            models_info['main_models'][pth_file.stem] = str(pth_file)
    
    # Run directories
    runs_dir = Path(config.runs_dir)
    if runs_dir.exists():
        for run_dir in runs_dir.iterdir():
            if run_dir.is_dir():
                models_dir = run_dir / "models"
                if models_dir.exists():
                    run_models = {}
                    for pth_file in models_dir.glob("*.pth"):
                        run_models[pth_file.stem] = str(pth_file)
                    if run_models:
                        models_info['run_models'][run_dir.name] = run_models
    
    return models_info

def load_model(model_path, model_type='standard'):
    """Load a model from path"""
    try:
        # Determine model architecture
        if model_type == 'recurrent':
            model = RecurrentActorCritic(
                state_dim=config.state_dim,
                action_dim=config.action_dim,
                hidden_dim=config.hidden_dim
            )
        else:
            model = ActorCritic(
                state_dim=config.state_dim,
                action_dim=config.action_dim,
                hidden_dim=config.hidden_dim
            )
        
        # Load state dict
        checkpoint = torch.load(model_path, map_location=device)
        
        # Handle different checkpoint formats
        if isinstance(checkpoint, dict):
            if 'model_state_dict' in checkpoint:
                model.load_state_dict(checkpoint['model_state_dict'])
            elif 'state_dict' in checkpoint:
                model.load_state_dict(checkpoint['state_dict'])
            else:
                model.load_state_dict(checkpoint)
        else:
            model.load_state_dict(checkpoint)
        
        model.eval()
        return model
    except Exception as e:
        print(f"Error loading model from {model_path}: {e}")
        return None

# Discover all models
models_info = discover_models()
print("\n=== Available Models ===")
print(f"Main models: {list(models_info['main_models'].keys())}")
print(f"Run directories: {len(models_info['run_models'])}")
for run_name, run_models in list(models_info['run_models'].items())[:3]:  # Show first 3
    print(f"  {run_name}: {list(run_models.keys())[:5]}{'...' if len(run_models) > 5 else ''}")


=== Available Models ===
Main models: ['individual_worker_0_final', 'individual_worker_5_final', 'individual_worker_6_final', 'individual_worker_9_final', 'individual_worker_3_final', 'global_final', 'individual_worker_4_final', 'individual_worker_2_final', 'individual_worker_1_final', 'individual_worker_7_final', 'individual_worker_8_final']
Run directories: 23
  individual_20250815_151722: ['individual_worker_0_final', 'individual_worker_3_final', 'individual_worker_4_final', 'individual_worker_2_final', 'individual_worker_1_final']
  individual_20250819_175326: ['individual_worker_0_final', 'individual_worker_3_final', 'individual_worker_4_final', 'individual_worker_2_final', 'individual_worker_1_final']
  a3c_20250815_161957: ['global_final']


## 2. Model Evaluation Framework

In [12]:
def evaluate_model(model, env, episodes=100, deterministic=True):
    """Evaluate a model's performance"""
    total_rewards = []
    episode_lengths = []
    action_counts = {0: 0, 1: 0, 2: 0}  # LOCAL, OFFLOAD, DISCARD (3 actions based on environment)
    
    for episode in range(episodes):
        state = env.reset()
        episode_reward = 0
        episode_length = 0
        done = False
        
        # Initialize hidden state for RNN models
        if hasattr(model, 'init_hidden'):
            hidden = model.init_hidden(1, device)
        else:
            hidden = None
        
        while not done:
            state_tensor = torch.FloatTensor(state).unsqueeze(0).to(device)
            
            with torch.no_grad():
                if hidden is not None:
                    # RNN model
                    if hasattr(model, 'act'):
                        action, _, _, hidden = model.act(state_tensor, hidden)
                        action = action.item()
                    else:
                        logits, _, hidden = model.step(state_tensor, hidden)
                        if deterministic:
                            action = torch.argmax(logits, dim=1).item()
                        else:
                            dist = torch.distributions.Categorical(logits=logits)
                            action = dist.sample().item()
                else:
                    # Standard model
                    logits, _ = model(state_tensor)
                    if deterministic:
                        action = torch.argmax(logits, dim=1).item()
                    else:
                        dist = torch.distributions.Categorical(logits=logits)
                        action = dist.sample().item()
            
            # Ensure action is within valid range
            action = min(action, env.action_space.n - 1)
            action_counts[action] = action_counts.get(action, 0) + 1
            
            state, reward, done, _ = env.step(action)
            episode_reward += reward
            episode_length += 1
            
            if episode_length > 1000:  # Prevent infinite loops
                break
        
        total_rewards.append(episode_reward)
        episode_lengths.append(episode_length)
    
    return {
        'mean_reward': np.mean(total_rewards),
        'std_reward': np.std(total_rewards),
        'min_reward': np.min(total_rewards),
        'max_reward': np.max(total_rewards),
        'mean_length': np.mean(episode_lengths),
        'std_length': np.std(episode_lengths),
        'action_distribution': action_counts,
        'all_rewards': total_rewards,
        'all_lengths': episode_lengths
    }

# Initialize environment
env = CustomEnv(
    max_comp_units=ENV_PARAMS['max_comp_units'],
    max_epoch_size=ENV_PARAMS['max_epoch_size'], 
    max_queue_size=ENV_PARAMS['max_queue_size'],
    max_comp_units_for_cloud=ENV_PARAMS['max_comp_units_for_cloud'],
    reward_weights=ENV_PARAMS['reward_weights'],
    agent_velocities=ENV_PARAMS['agent_velocities']
)
print(f"Environment initialized with state_dim: {env.observation_space.shape}, action_dim: {env.action_space.n}")

Environment initialized with state_dim: None, action_dim: 3


## 3. Global vs Individual Model Comparison

In [13]:
# Load and evaluate main models
print("=== Loading and Evaluating Main Models ===")
main_results = {}

# Load global model
if 'global_final' in models_info['main_models']:
    global_path = models_info['main_models']['global_final']
    print(f"Loading global model from: {global_path}")
    global_model = load_model(global_path)
    if global_model:
        print("Evaluating global model...")
        main_results['global'] = evaluate_model(global_model, env, config.evaluation_episodes)
        print(f"Global model - Mean reward: {main_results['global']['mean_reward']:.2f} ± {main_results['global']['std_reward']:.2f}")

# Load individual worker models
individual_results = {}
for model_name, model_path in models_info['main_models'].items():
    if model_name.startswith('individual_worker_'):
        worker_id = model_name.split('_')[-2]  # Extract worker ID
        print(f"Loading individual worker {worker_id} from: {model_path}")
        worker_model = load_model(model_path)
        if worker_model:
            print(f"Evaluating worker {worker_id}...")
            individual_results[f'worker_{worker_id}'] = evaluate_model(worker_model, env, config.evaluation_episodes)
            print(f"Worker {worker_id} - Mean reward: {individual_results[f'worker_{worker_id}']['mean_reward']:.2f}")

print(f"\nLoaded {len(individual_results)} individual worker models")

=== Loading and Evaluating Main Models ===
Loading global model from: models/global_final.pth
Error loading model from models/global_final.pth: Error(s) in loading state_dict for ActorCritic:
	size mismatch for shared.0.weight: copying a param with shape torch.Size([128, 45]) from checkpoint, the shape in current model is torch.Size([128, 7]).
	size mismatch for policy.weight: copying a param with shape torch.Size([2, 128]) from checkpoint, the shape in current model is torch.Size([3, 128]).
	size mismatch for policy.bias: copying a param with shape torch.Size([2]) from checkpoint, the shape in current model is torch.Size([3]).
Loading individual worker 0 from: models/individual_worker_0_final.pth
Error loading model from models/individual_worker_0_final.pth: Error(s) in loading state_dict for ActorCritic:
	size mismatch for shared.0.weight: copying a param with shape torch.Size([128, 45]) from checkpoint, the shape in current model is torch.Size([128, 7]).
	size mismatch for policy.we

In [14]:
# Statistical comparison between global and individual models
if 'global' in main_results and individual_results:
    fig, axes = plt.subplots(2, 2, figsize=(15, 12))
    
    # 1. Reward comparison
    global_rewards = main_results['global']['all_rewards']
    individual_rewards_all = []
    worker_labels = []
    
    for worker, results in individual_results.items():
        individual_rewards_all.extend(results['all_rewards'])
        worker_labels.extend([worker] * len(results['all_rewards']))
    
    # Box plot comparison
    axes[0, 0].boxplot([global_rewards, individual_rewards_all], 
                       labels=['Global', 'Individual (All)'])
    axes[0, 0].set_title('Reward Distribution: Global vs Individual')
    axes[0, 0].set_ylabel('Episode Reward')
    axes[0, 0].grid(True, alpha=0.3)
    
    # 2. Individual worker comparison
    worker_rewards = [results['all_rewards'] for results in individual_results.values()]
    worker_names = list(individual_results.keys())
    
    bp = axes[0, 1].boxplot(worker_rewards, labels=worker_names)
    axes[0, 1].set_title('Reward Distribution Across Individual Workers')
    axes[0, 1].set_ylabel('Episode Reward')
    axes[0, 1].tick_params(axis='x', rotation=45)
    axes[0, 1].grid(True, alpha=0.3)
    
    # 3. Action distribution comparison
    action_names = ['LOCAL', 'OFFLOAD', 'DISCARD']  # Updated to match 3 actions
    global_actions = [main_results['global']['action_distribution'].get(i, 0) for i in range(3)]
    global_actions_norm = np.array(global_actions) / np.sum(global_actions) if np.sum(global_actions) > 0 else np.zeros(3)
    
    # Average individual action distribution
    individual_actions = np.zeros(3)
    for results in individual_results.values():
        for i in range(3):
            individual_actions[i] += results['action_distribution'].get(i, 0)
    individual_actions_norm = individual_actions / np.sum(individual_actions) if np.sum(individual_actions) > 0 else np.zeros(3)
    
    x = np.arange(len(action_names))
    width = 0.35
    axes[1, 0].bar(x - width/2, global_actions_norm, width, label='Global', alpha=0.8)
    axes[1, 0].bar(x + width/2, individual_actions_norm, width, label='Individual (Avg)', alpha=0.8)
    axes[1, 0].set_title('Action Distribution Comparison')
    axes[1, 0].set_ylabel('Probability')
    axes[1, 0].set_xticks(x)
    axes[1, 0].set_xticklabels(action_names)
    axes[1, 0].legend()
    axes[1, 0].grid(True, alpha=0.3)
    
    # 4. Performance summary
    summary_data = []
    summary_data.append(['Global', main_results['global']['mean_reward'], main_results['global']['std_reward']])
    
    for worker, results in individual_results.items():
        summary_data.append([worker, results['mean_reward'], results['std_reward']])
    
    df_summary = pd.DataFrame(summary_data, columns=['Model', 'Mean Reward', 'Std Reward'])
    axes[1, 1].axis('tight')
    axes[1, 1].axis('off')
    table = axes[1, 1].table(cellText=[[f"{row[0]}", f"{row[1]:.2f}", f"{row[2]:.2f}"] for row in summary_data],
                            colLabels=['Model', 'Mean Reward', 'Std Reward'],
                            cellLoc='center',
                            loc='center')
    table.auto_set_font_size(False)
    table.set_fontsize(9)
    table.scale(1.2, 1.5)
    axes[1, 1].set_title('Performance Summary')
    
    plt.tight_layout()
    plt.show()
    
    # Statistical tests
    print("\n=== Statistical Analysis ===")
    global_mean = main_results['global']['mean_reward']
    individual_means = [results['mean_reward'] for results in individual_results.values()]
    individual_mean = np.mean(individual_means)
    
    print(f"Global model mean reward: {global_mean:.3f} ± {main_results['global']['std_reward']:.3f}")
    print(f"Individual models mean reward: {individual_mean:.3f} ± {np.std(individual_means):.3f}")
    print(f"Performance difference: {global_mean - individual_mean:.3f}")
    
    # t-test between global and pooled individual rewards
    t_stat, p_value = stats.ttest_ind(global_rewards, individual_rewards_all)
    print(f"T-test (Global vs Individual): t={t_stat:.3f}, p={p_value:.3f}")
    
    if p_value < 0.05:
        print("Significant difference detected between global and individual models")
    else:
        print("No significant difference between global and individual models")
    
else:
    print("Unable to perform comparison - missing global or individual models")

Unable to perform comparison - missing global or individual models


## 4. Worker Variability Analysis

In [16]:
if individual_results:
    print("=== Worker Variability Analysis ===")
    
    # Create comprehensive worker analysis
    fig, axes = plt.subplots(2, 3, figsize=(18, 12))
    
    # 1. Worker performance ranking
    worker_performance = [(worker, results['mean_reward'], results['std_reward']) 
                         for worker, results in individual_results.items()]
    worker_performance.sort(key=lambda x: x[1], reverse=True)
    
    workers, means, stds = zip(*worker_performance)
    x_pos = np.arange(len(workers))
    
    bars = axes[0, 0].bar(x_pos, means, yerr=stds, capsize=5, alpha=0.7)
    axes[0, 0].set_title('Worker Performance Ranking')
    axes[0, 0].set_ylabel('Mean Episode Reward')
    axes[0, 0].set_xticks(x_pos)
    axes[0, 0].set_xticklabels(workers, rotation=45)
    axes[0, 0].grid(True, alpha=0.3)
    
    # Color bars by performance
    colors = plt.cm.RdYlGn(np.linspace(0.3, 0.9, len(bars)))
    for bar, color in zip(bars, colors):
        bar.set_color(color)
    
    # 2. Worker consistency (coefficient of variation)
    cvs = [results['std_reward'] / abs(results['mean_reward']) if results['mean_reward'] != 0 else 0 
           for results in individual_results.values()]
    worker_names = list(individual_results.keys())
    
    axes[0, 1].bar(range(len(worker_names)), cvs, alpha=0.7)
    axes[0, 1].set_title('Worker Consistency (Lower = More Consistent)')
    axes[0, 1].set_ylabel('Coefficient of Variation')
    axes[0, 1].set_xticks(range(len(worker_names)))
    axes[0, 1].set_xticklabels(worker_names, rotation=45)
    axes[0, 1].grid(True, alpha=0.3)
    
    # 3. Action distribution heatmap
    action_matrix = []
    for worker in worker_names:
        actions = individual_results[worker]['action_distribution']
        total_actions = sum(actions.values())
        action_probs = [actions.get(i, 0) / total_actions if total_actions > 0 else 0 for i in range(3)]
        action_matrix.append(action_probs)
    
    im = axes[0, 2].imshow(action_matrix, cmap='YlOrRd', aspect='auto')
    axes[0, 2].set_title('Action Distribution Across Workers')
    axes[0, 2].set_xlabel('Actions')
    axes[0, 2].set_ylabel('Workers')
    axes[0, 2].set_xticks(range(3))
    axes[0, 2].set_xticklabels(['LOCAL', 'OFFLOAD', 'DISCARD'])
    axes[0, 2].set_yticks(range(len(worker_names)))
    axes[0, 2].set_yticklabels(worker_names)
    plt.colorbar(im, ax=axes[0, 2], fraction=0.046, pad=0.04)
    
    # 4. Reward distribution violin plot
    all_worker_rewards = [individual_results[worker]['all_rewards'] for worker in worker_names]
    parts = axes[1, 0].violinplot(all_worker_rewards, positions=range(len(worker_names)), showmeans=True)
    axes[1, 0].set_title('Reward Distribution Shape by Worker')
    axes[1, 0].set_ylabel('Episode Reward')
    axes[1, 0].set_xticks(range(len(worker_names)))
    axes[1, 0].set_xticklabels(worker_names, rotation=45)
    axes[1, 0].grid(True, alpha=0.3)
    
    # 5. Episode length analysis
    episode_lengths = [individual_results[worker]['mean_length'] for worker in worker_names]
    length_stds = [individual_results[worker]['std_length'] for worker in worker_names]
    
    axes[1, 1].bar(range(len(worker_names)), episode_lengths, yerr=length_stds, 
                   capsize=5, alpha=0.7, color='skyblue')
    axes[1, 1].set_title('Episode Length by Worker')
    axes[1, 1].set_ylabel('Mean Episode Length')
    axes[1, 1].set_xticks(range(len(worker_names)))
    axes[1, 1].set_xticklabels(worker_names, rotation=45)
    axes[1, 1].grid(True, alpha=0.3)
    
    # 6. Worker correlation matrix
    if len(individual_results) > 1:
        # Create correlation matrix based on episode rewards
        reward_matrix = np.array([individual_results[worker]['all_rewards'] for worker in worker_names])
        correlation_matrix = np.corrcoef(reward_matrix)
        
        im2 = axes[1, 2].imshow(correlation_matrix, cmap='coolwarm', vmin=-1, vmax=1)
        axes[1, 2].set_title('Worker Performance Correlation')
        axes[1, 2].set_xticks(range(len(worker_names)))
        axes[1, 2].set_xticklabels(worker_names, rotation=45)
        axes[1, 2].set_yticks(range(len(worker_names)))
        axes[1, 2].set_yticklabels(worker_names)
        plt.colorbar(im2, ax=axes[1, 2], fraction=0.046, pad=0.04)
        
        # Add correlation values to heatmap
        for i in range(len(worker_names)):
            for j in range(len(worker_names)):
                axes[1, 2].text(j, i, f'{correlation_matrix[i, j]:.2f}', 
                               ha='center', va='center', fontsize=8)
    
    plt.tight_layout()
    plt.show()
    
    # Statistical analysis of worker variability
    print("\n=== Worker Variability Statistics ===")
    worker_means = [individual_results[worker]['mean_reward'] for worker in worker_names]
    print(f"Inter-worker mean reward std: {np.std(worker_means):.3f}")
    print(f"Best worker: {workers[0]} ({means[0]:.3f})")
    print(f"Worst worker: {workers[-1]} ({means[-1]:.3f})")
    print(f"Performance gap: {means[0] - means[-1]:.3f}")
    
    # ANOVA test for significant differences between workers
    worker_reward_lists = [individual_results[worker]['all_rewards'] for worker in worker_names]
    f_stat, p_value = stats.f_oneway(*worker_reward_lists)
    print(f"ANOVA F-statistic: {f_stat:.3f}, p-value: {p_value:.3f}")
    
    if p_value < 0.05:
        print("Significant differences detected between workers")
    else:
        print("No significant differences between workers")

else:
    print("No individual worker models found for variability analysis")

No individual worker models found for variability analysis


## 5. Model Structure and Parameter Analysis

In [None]:
def analyze_model_parameters(model, model_name):
    """Analyze model parameters and structure"""
    param_info = {
        'total_params': 0,
        'trainable_params': 0,
        'layer_info': [],
        'param_distribution': {},
        'weight_stats': {}
    }
    
    for name, param in model.named_parameters():
        param_count = param.numel()
        param_info['total_params'] += param_count
        if param.requires_grad:
            param_info['trainable_params'] += param_count
        
        # Layer information
        param_info['layer_info'].append({
            'name': name,
            'shape': list(param.shape),
            'params': param_count,
            'requires_grad': param.requires_grad
        })
        
        # Parameter distribution
        param_info['param_distribution'][name] = param_count
        
        # Weight statistics
        with torch.no_grad():
            param_info['weight_stats'][name] = {
                'mean': param.mean().item(),
                'std': param.std().item(),
                'min': param.min().item(),
                'max': param.max().item(),
                'norm': param.norm().item()
            }
    
    return param_info

def compare_model_weights(model1, model2, model1_name, model2_name):
    """Compare weights between two models"""
    comparison = {
        'layer_similarities': {},
        'overall_similarity': 0,
        'weight_differences': {}
    }
    
    model1_params = dict(model1.named_parameters())
    model2_params = dict(model2.named_parameters())
    
    similarities = []
    
    for name in model1_params.keys():
        if name in model2_params:
            w1 = model1_params[name].detach().flatten().numpy()
            w2 = model2_params[name].detach().flatten().numpy()
            
            # Cosine similarity
            similarity = cosine_similarity([w1], [w2])[0, 0]
            comparison['layer_similarities'][name] = similarity
            similarities.append(similarity)
            
            # Weight differences
            diff = np.abs(w1 - w2)
            comparison['weight_differences'][name] = {
                'mean_diff': np.mean(diff),
                'max_diff': np.max(diff),
                'l2_distance': np.linalg.norm(w1 - w2)
            }
    
    comparison['overall_similarity'] = np.mean(similarities)
    return comparison

# Analyze model structures
print("=== Model Structure Analysis ===")

# Load a few models for analysis
models_to_analyze = {}

# Global model
if 'global_final' in models_info['main_models']:
    global_model = load_model(models_info['main_models']['global_final'])
    if global_model:
        models_to_analyze['Global'] = global_model

# A few individual workers
worker_count = 0
for model_name, model_path in models_info['main_models'].items():
    if model_name.startswith('individual_worker_') and worker_count < 3:
        worker_id = model_name.split('_')[-2]
        worker_model = load_model(model_path)
        if worker_model:
            models_to_analyze[f'Worker_{worker_id}'] = worker_model
            worker_count += 1

# Analyze each model
model_analyses = {}
for name, model in models_to_analyze.items():
    print(f"\nAnalyzing {name}...")
    analysis = analyze_model_parameters(model, name)
    model_analyses[name] = analysis
    print(f"  Total parameters: {analysis['total_params']:,}")
    print(f"  Trainable parameters: {analysis['trainable_params']:,}")

# Visualize parameter analysis
if model_analyses:
    fig, axes = plt.subplots(2, 2, figsize=(15, 12))
    
    # 1. Parameter count comparison
    model_names = list(model_analyses.keys())
    param_counts = [model_analyses[name]['total_params'] for name in model_names]
    
    axes[0, 0].bar(model_names, param_counts, alpha=0.7)
    axes[0, 0].set_title('Total Parameters by Model')
    axes[0, 0].set_ylabel('Parameter Count')
    axes[0, 0].tick_params(axis='x', rotation=45)
    axes[0, 0].grid(True, alpha=0.3)
    
    # 2. Weight distribution for first model
    if model_names:
        first_model = model_names[0]
        weight_norms = [stats['norm'] for stats in model_analyses[first_model]['weight_stats'].values()]
        layer_names = list(model_analyses[first_model]['weight_stats'].keys())
        
        axes[0, 1].bar(range(len(layer_names)), weight_norms, alpha=0.7)
        axes[0, 1].set_title(f'Layer Weight Norms - {first_model}')
        axes[0, 1].set_ylabel('L2 Norm')
        axes[0, 1].set_xticks(range(len(layer_names)))
        axes[0, 1].set_xticklabels([name.split('.')[-1] for name in layer_names], rotation=45)
        axes[0, 1].grid(True, alpha=0.3)
    
    # 3. Model similarity heatmap (if multiple models)
    if len(models_to_analyze) > 1:
        similarity_matrix = np.zeros((len(model_names), len(model_names)))
        
        for i, name1 in enumerate(model_names):
            for j, name2 in enumerate(model_names):
                if i == j:
                    similarity_matrix[i, j] = 1.0
                elif i < j:
                    comparison = compare_model_weights(
                        models_to_analyze[name1], 
                        models_to_analyze[name2], 
                        name1, name2
                    )
                    similarity = comparison['overall_similarity']
                    similarity_matrix[i, j] = similarity
                    similarity_matrix[j, i] = similarity
        
        im = axes[1, 0].imshow(similarity_matrix, cmap='RdYlBu', vmin=0, vmax=1)
        axes[1, 0].set_title('Model Weight Similarity Matrix')
        axes[1, 0].set_xticks(range(len(model_names)))
        axes[1, 0].set_xticklabels(model_names, rotation=45)
        axes[1, 0].set_yticks(range(len(model_names)))
        axes[1, 0].set_yticklabels(model_names)
        plt.colorbar(im, ax=axes[1, 0], fraction=0.046, pad=0.04)
        
        # Add similarity values
        for i in range(len(model_names)):
            for j in range(len(model_names)):
                axes[1, 0].text(j, i, f'{similarity_matrix[i, j]:.3f}', 
                               ha='center', va='center', fontsize=8)
    
    # 4. Layer-wise parameter distribution
    if model_names:
        first_model = model_names[0]
        param_dist = model_analyses[first_model]['param_distribution']
        
        # Create pie chart for parameter distribution
        layer_params = list(param_dist.values())
        layer_labels = [name.split('.')[-1] for name in param_dist.keys()]
        
        axes[1, 1].pie(layer_params, labels=layer_labels, autopct='%1.1f%%', startangle=90)
        axes[1, 1].set_title(f'Parameter Distribution - {first_model}')
    
    plt.tight_layout()
    plt.show()
    
    # Print detailed comparison
    if len(models_to_analyze) > 1:
        print("\n=== Model Weight Similarity Analysis ===")
        for i, name1 in enumerate(model_names[:-1]):
            for name2 in model_names[i+1:]:
                comparison = compare_model_weights(
                    models_to_analyze[name1], 
                    models_to_analyze[name2], 
                    name1, name2
                )
                print(f"{name1} vs {name2}: {comparison['overall_similarity']:.3f} similarity")

else:
    print("No models available for structure analysis")

## 6. Temporal Model Performance Evolution

In [None]:
# Analyze models across different training runs (temporal evolution)
print("=== Temporal Model Performance Evolution ===")

def extract_timestamp(run_name):
    """Extract timestamp from run directory name"""
    try:
        timestamp_str = run_name.split('_')[-1]
        return datetime.strptime(timestamp_str, '%Y%m%d_%H%M%S')
    except:
        return None

# Collect temporal data
temporal_data = {'a3c': [], 'individual': []}

for run_name, run_models in models_info['run_models'].items():
    timestamp = extract_timestamp(run_name)
    if timestamp is None:
        continue
    
    run_type = 'a3c' if run_name.startswith('a3c_') else 'individual'
    
    # Load and evaluate models from this run
    run_results = {'timestamp': timestamp, 'models': {}}
    
    for model_name, model_path in run_models.items():
        if 'final' in model_name:  # Only analyze final models
            model = load_model(model_path)
            if model:
                print(f"Evaluating {run_name}/{model_name}...")
                # Use fewer episodes for temporal analysis to speed up
                results = evaluate_model(model, env, episodes=50)
                run_results['models'][model_name] = results
    
    if run_results['models']:
        temporal_data[run_type].append(run_results)

# Sort by timestamp
for run_type in temporal_data:
    temporal_data[run_type].sort(key=lambda x: x['timestamp'])

print(f"Found {len(temporal_data['a3c'])} A3C runs and {len(temporal_data['individual'])} individual runs")

In [None]:
# Visualize temporal evolution
if temporal_data['a3c'] or temporal_data['individual']:
    fig, axes = plt.subplots(2, 2, figsize=(16, 12))
    
    # 1. A3C Global model evolution
    if temporal_data['a3c']:
        a3c_times = []
        a3c_rewards = []
        a3c_stds = []
        
        for run_data in temporal_data['a3c']:
            if 'global_final' in run_data['models']:
                a3c_times.append(run_data['timestamp'])
                a3c_rewards.append(run_data['models']['global_final']['mean_reward'])
                a3c_stds.append(run_data['models']['global_final']['std_reward'])
        
        if a3c_times:
            axes[0, 0].errorbar(a3c_times, a3c_rewards, yerr=a3c_stds, 
                               marker='o', capsize=5, label='A3C Global')
            axes[0, 0].set_title('A3C Global Model Performance Over Time')
            axes[0, 0].set_ylabel('Mean Episode Reward')
            axes[0, 0].tick_params(axis='x', rotation=45)
            axes[0, 0].grid(True, alpha=0.3)
            axes[0, 0].legend()
    
    # 2. Individual worker model evolution (average)
    if temporal_data['individual']:
        ind_times = []
        ind_rewards_mean = []
        ind_rewards_std = []
        
        for run_data in temporal_data['individual']:
            worker_rewards = []
            for model_name, results in run_data['models'].items():
                if 'individual_worker_' in model_name and 'final' in model_name:
                    worker_rewards.append(results['mean_reward'])
            
            if worker_rewards:
                ind_times.append(run_data['timestamp'])
                ind_rewards_mean.append(np.mean(worker_rewards))
                ind_rewards_std.append(np.std(worker_rewards))
        
        if ind_times:
            axes[0, 1].errorbar(ind_times, ind_rewards_mean, yerr=ind_rewards_std, 
                               marker='s', capsize=5, label='Individual Workers (Avg)', color='orange')
            axes[0, 1].set_title('Individual Workers Average Performance Over Time')
            axes[0, 1].set_ylabel('Mean Episode Reward')
            axes[0, 1].tick_params(axis='x', rotation=45)
            axes[0, 1].grid(True, alpha=0.3)
            axes[0, 1].legend()
    
    # 3. Combined comparison
    if a3c_times and ind_times:
        axes[1, 0].errorbar(a3c_times, a3c_rewards, yerr=a3c_stds, 
                           marker='o', capsize=3, label='A3C Global', alpha=0.8)
        axes[1, 0].errorbar(ind_times, ind_rewards_mean, yerr=ind_rewards_std, 
                           marker='s', capsize=3, label='Individual (Avg)', alpha=0.8)
        axes[1, 0].set_title('Model Performance Comparison Over Time')
        axes[1, 0].set_ylabel('Mean Episode Reward')
        axes[1, 0].tick_params(axis='x', rotation=45)
        axes[1, 0].grid(True, alpha=0.3)
        axes[1, 0].legend()
    
    # 4. Performance improvement trends
    improvement_data = []
    
    if len(a3c_rewards) > 1:
        a3c_trend = np.polyfit(range(len(a3c_rewards)), a3c_rewards, 1)[0]
        improvement_data.append(['A3C Global', a3c_trend, len(a3c_rewards)])
    
    if len(ind_rewards_mean) > 1:
        ind_trend = np.polyfit(range(len(ind_rewards_mean)), ind_rewards_mean, 1)[0]
        improvement_data.append(['Individual Avg', ind_trend, len(ind_rewards_mean)])
    
    if improvement_data:
        df_trends = pd.DataFrame(improvement_data, columns=['Model Type', 'Trend (reward/run)', 'Runs'])
        
        bars = axes[1, 1].bar(df_trends['Model Type'], df_trends['Trend (reward/run)'], 
                             alpha=0.7, color=['blue', 'orange'][:len(df_trends)])
        axes[1, 1].set_title('Performance Improvement Trends')
        axes[1, 1].set_ylabel('Reward Improvement per Run')
        axes[1, 1].grid(True, alpha=0.3)
        axes[1, 1].axhline(y=0, color='red', linestyle='--', alpha=0.5)
        
        # Add value labels on bars
        for bar, value in zip(bars, df_trends['Trend (reward/run)']):
            axes[1, 1].text(bar.get_x() + bar.get_width()/2, bar.get_height() + 0.001, 
                            f'{value:.4f}', ha='center', va='bottom')
    
    plt.tight_layout()
    plt.show()
    
    # Print temporal analysis summary
    print("\n=== Temporal Analysis Summary ===")
    
    if a3c_times:
        print(f"A3C Global models: {len(a3c_times)} runs")
        print(f"  Performance range: {min(a3c_rewards):.3f} to {max(a3c_rewards):.3f}")
        if len(a3c_rewards) > 1:
            print(f"  Trend: {np.polyfit(range(len(a3c_rewards)), a3c_rewards, 1)[0]:.4f} reward/run")
    
    if ind_times:
        print(f"Individual worker models: {len(ind_times)} runs")
        print(f"  Performance range: {min(ind_rewards_mean):.3f} to {max(ind_rewards_mean):.3f}")
        if len(ind_rewards_mean) > 1:
            print(f"  Trend: {np.polyfit(range(len(ind_rewards_mean)), ind_rewards_mean, 1)[0]:.4f} reward/run")

else:
    print("No temporal data available for analysis")

## 7. Comprehensive Summary and Conclusions

In [None]:
# Generate comprehensive summary
print("\n" + "="*60)
print("COMPREHENSIVE ABLATION ANALYSIS SUMMARY")
print("="*60)

# Create summary dataframe
summary_results = []

# Main models summary
if 'global' in main_results:
    summary_results.append([
        'Global A3C',
        main_results['global']['mean_reward'],
        main_results['global']['std_reward'],
        len(main_results['global']['all_rewards']),
        'Main Models'
    ])

if individual_results:
    individual_means = [results['mean_reward'] for results in individual_results.values()]
    individual_stds = [results['std_reward'] for results in individual_results.values()]
    
    summary_results.append([
        'Individual Workers (Best)',
        max(individual_means),
        individual_stds[np.argmax(individual_means)],
        config.evaluation_episodes,
        'Main Models'
    ])
    
    summary_results.append([
        'Individual Workers (Avg)',
        np.mean(individual_means),
        np.mean(individual_stds),
        config.evaluation_episodes,
        'Main Models'
    ])
    
    summary_results.append([
        'Individual Workers (Worst)',
        min(individual_means),
        individual_stds[np.argmin(individual_means)],
        config.evaluation_episodes,
        'Main Models'
    ])

# Temporal data summary
if temporal_data['a3c'] and a3c_rewards:
    summary_results.append([
        'A3C (Best Run)',
        max(a3c_rewards),
        a3c_stds[np.argmax(a3c_rewards)],
        50,  # Episodes used for temporal analysis
        'Temporal Analysis'
    ])

if temporal_data['individual'] and ind_rewards_mean:
    summary_results.append([
        'Individual (Best Run)',
        max(ind_rewards_mean),
        ind_rewards_std[np.argmax(ind_rewards_mean)],
        50,
        'Temporal Analysis'
    ])

# Create and display summary table
if summary_results:
    df_summary = pd.DataFrame(summary_results, 
                             columns=['Model Type', 'Mean Reward', 'Std Reward', 'Episodes', 'Analysis Type'])
    
    print("\n=== PERFORMANCE SUMMARY ===")
    print(df_summary.to_string(index=False, float_format='%.3f'))

# Key findings
print("\n=== KEY FINDINGS ===")

findings = []

# Global vs Individual comparison
if 'global' in main_results and individual_results:
    global_mean = main_results['global']['mean_reward']
    individual_mean = np.mean([results['mean_reward'] for results in individual_results.values()])
    
    if global_mean > individual_mean:
        findings.append(f"🏆 Global A3C model outperforms individual workers by {global_mean - individual_mean:.3f} reward points")
    else:
        findings.append(f"🔄 Individual workers outperform global model by {individual_mean - global_mean:.3f} reward points")

# Worker variability
if individual_results and len(individual_results) > 1:
    individual_means = [results['mean_reward'] for results in individual_results.values()]
    variability = np.std(individual_means)
    performance_gap = max(individual_means) - min(individual_means)
    
    findings.append(f"📊 Worker performance variability: σ={variability:.3f}, gap={performance_gap:.3f}")
    
    if variability > 0.1:
        findings.append("⚠️  High variability detected between individual workers")
    else:
        findings.append("✅ Consistent performance across individual workers")

# Model similarity
if len(models_to_analyze) > 1:
    findings.append(f"🔍 Analyzed {len(models_to_analyze)} models for structural similarity")

# Temporal trends
if improvement_data:
    for model_type, trend, runs in improvement_data:
        if trend > 0:
            findings.append(f"📈 {model_type} shows positive improvement trend: +{trend:.4f} reward/run over {runs} runs")
        elif trend < 0:
            findings.append(f"📉 {model_type} shows declining trend: {trend:.4f} reward/run over {runs} runs")
        else:
            findings.append(f"➡️  {model_type} shows stable performance over {runs} runs")

# Display findings
for i, finding in enumerate(findings, 1):
    print(f"{i}. {finding}")

# Recommendations
print("\n=== RECOMMENDATIONS ===")

recommendations = []

if 'global' in main_results and individual_results:
    global_mean = main_results['global']['mean_reward']
    individual_mean = np.mean([results['mean_reward'] for results in individual_results.values()])
    
    if global_mean > individual_mean:
        recommendations.append("Use A3C global model for deployment as it shows superior performance")
        recommendations.append("Consider ensemble methods combining global and best individual workers")
    else:
        recommendations.append("Individual training may be more effective for this environment")
        recommendations.append("Investigate A3C hyperparameters and training procedure")

if individual_results and len(individual_results) > 1:
    variability = np.std([results['mean_reward'] for results in individual_results.values()])
    if variability > 0.1:
        recommendations.append("Investigate sources of worker variability (initialization, data, hyperparameters)")
        recommendations.append("Consider worker-specific hyperparameter tuning")

if temporal_data['a3c'] or temporal_data['individual']:
    recommendations.append("Continue temporal monitoring to track model performance evolution")
    recommendations.append("Implement automated model selection based on validation performance")

for i, rec in enumerate(recommendations, 1):
    print(f"{i}. {rec}")

print("\n" + "="*60)
print(f"Analysis completed at: {datetime.now().strftime('%Y-%m-%d %H:%M:%S')}")
print("="*60)

In [None]:
# Save results to CSV for further analysis
if summary_results:
    df_results = pd.DataFrame(summary_results, 
                             columns=['Model Type', 'Mean Reward', 'Std Reward', 'Episodes', 'Analysis Type'])
    
    timestamp = datetime.now().strftime('%Y%m%d_%H%M%S')
    results_file = f'ablation_analysis_results_{timestamp}.csv'
    df_results.to_csv(results_file, index=False)
    print(f"Results saved to: {results_file}")
    
    # Display final results table
    print("\n=== FINAL RESULTS TABLE ===")
    print(df_results.round(3))