In [None]:
import os
os.environ["KMP_DUPLICATE_LIB_OK"]="TRUE"

import numpy as np
import matplotlib.pyplot as plt
import pandas as pd
import pickle
import datetime
import json
from pathlib import Path

from DrlLibs.training import create_parallel_environment, train_drl_agent
from DrlLibs.evaluate import evaluate_drl_agent
from DrlLibs import create_environment, check_env
from Configs import getEnvConfig, visualizeEnvConfig, getPredictorConfig, visualizePredictorConfig
from EnvLibs import PolicyDemoAdaptiveAlpha, PolicySimulator, createEnv
from DrlLibs.visualize import plot_training_results

In [None]:

def main_single_experiment(simParams, simEnv, save_path, agent_name, algorithm_name: str = "SAC", 
                          obvMode="perfect", total_timesteps: int = 20000, 
                          timesteps_per_episode: int = 5000, n_envs: int = 4,
                          moving_avg_window: int = 100):
    """Single experiment function - same as your original main function."""
    
    print("="*80)
    print(f"{algorithm_name} as Agent config{agent_name}'s Training and Evaluation")
    print("="*80)
    
    # Create environment (single or parallel)
    print("Creating environment...")
    env = create_parallel_environment(simParams, simEnv, obvMode, 
                                    timesteps_per_episode, n_envs)
    
    # Check environment (only for single env)
    if n_envs == 1:
        print("Checking environment...")
        check_env(env.unwrapped)
        print("Environment check passed!")
    else:
        print(f"Created {n_envs} parallel environments")
    
    # Remove early stopping parameters from here as well
    model, callback, training_time = train_drl_agent(
        algorithm_name, env, total_timesteps, save_path, agent_name,
        moving_avg_window=moving_avg_window
    )
    
    # Create a clean single environment for evaluation
    print("Creating evaluation environment...")
    eval_env = create_environment(simParams, simEnv, obvMode, timesteps_per_episode)
    
    # Evaluate DRL agent
    eval_results = evaluate_drl_agent(model, eval_env, algorithm_name)
    
    # Close evaluation environment
    eval_env.close()
    
    # Plot training results
    print("\nGenerating visualizations...")
    try:
        plot_training_results(callback, eval_results, algorithm_name, save_plots=True)
        # Move the plot to the correct directory
        plot_file = f'{algorithm_name.lower()}_training_progress.png'
        if os.path.exists(plot_file):
            new_plot_path = os.path.join(save_path, plot_file)
            os.makedirs(os.path.dirname(new_plot_path), exist_ok=True)
            os.rename(plot_file, new_plot_path)
            print(f"Plot moved to: {new_plot_path}")
    except Exception as e:
        print(f"Visualization failed: {e}")
    
    print(f"\n{'='*80}")
    print("EXPERIMENT SUMMARY")
    print(f"{'='*80}")
    print(f"Algorithm: {algorithm_name}")
    print(f"Training completed in: {training_time:.2f} seconds")
    
    print(f"Average evaluation reward: {eval_results['avg_reward']:.4f} ± {eval_results['std_reward']:.4f}")
    print(f"Average packet loss rate: {eval_results['avg_loss_rate']:.4f} ± {eval_results['std_loss_rate']:.4f}")
    print(f"Average alpha value: {eval_results['avg_alpha']:.4f}")
    
    env.close()
    return model, eval_results, callback


In [None]:
def run_all_experiments():
    """Run experiments across all environment configurations and algorithms."""
    
    # Configuration
    ALGORITHMS = ["SAC", "PPO", "A2C", "TD3"]  # Excluding DQN as requested
    ENV_CONFIGS = list(range(3, 8))  # Configs 2-7 only
    
    # Training parameters
    TIMESTEPS = 400000
    TIMESTEPS_PER_EPISODE = 1000
    N_ENVS = 4
    OBVMODE = "perfect"

    MOVING_AVG_WINDOW = 1000
    
    # Create results directory with timestamp
    timestamp = datetime.datetime.now().strftime("%Y%m%d_%H%M%S")
    base_results_dir = f"Results/AllExperiments_{timestamp}"
    os.makedirs(base_results_dir, exist_ok=True)
    
    # Initialize results tracking
    all_results = []
    experiment_log = []
    
    print(f"\n{'='*100}")
    print(f"STARTING COMPREHENSIVE EXPERIMENT SUITE")
    print(f"{'='*100}")
    print(f"Algorithms: {ALGORITHMS}")
    print(f"Environment Configs: {ENV_CONFIGS}")
    print(f"Total Experiments: {len(ALGORITHMS) * len(ENV_CONFIGS)}")
    print(f"Results Directory: {base_results_dir}")
    print(f"{'='*100}")
    
    experiment_count = 0
    total_experiments = len(ALGORITHMS) * len(ENV_CONFIGS)
    
    try:
        # All imports and main code here
        import torch
        for configIdx in ENV_CONFIGS:
            print(f"\n{'='*80}")
            print(f"ENVIRONMENT CONFIG {configIdx}")
            print(f"{'='*80}")
            
            # Get environment configuration
            envParams = getEnvConfig(configIdx)
            visualizeEnvConfig(envParams)
            
            # Get predictor configuration
            predictorParams = getPredictorConfig(configIdx)
            visualizePredictorConfig(predictorParams)
            
            # Create environment
            trafficDataParentPath = f'Results/TrafficData'
            try:
                simEnv = createEnv(envParams, trafficDataParentPath)
                simEnv.selectMode(mode="train", type="data")
            except Exception as e:
                print(f"Failed to create environment for config {configIdx}: {e}")
                continue
            
            for algorithm in ALGORITHMS:
                experiment_count += 1
                print(f"\n{'-'*60}")
                print(f"EXPERIMENT {experiment_count}/{total_experiments}: Config {configIdx} + {algorithm}")
                print(f"{'-'*60}")
                
                # Create experiment-specific directories
                exp_name = f"config{configIdx}_{algorithm}"
                save_path = os.path.join(base_results_dir, f"Config{configIdx}", algorithm)
                os.makedirs(save_path, exist_ok=True)
                
                # Record experiment start
                exp_start_time = datetime.datetime.now()
                
                try:
                    # Run single experiment
                    model, eval_results, callback = main_single_experiment(
                        envParams,
                        simEnv,
                        save_path=save_path,
                        agent_name=exp_name,
                        algorithm_name=algorithm,
                        total_timesteps=TIMESTEPS,
                        timesteps_per_episode=TIMESTEPS_PER_EPISODE,
                        obvMode=OBVMODE,
                        n_envs=N_ENVS,
                        moving_avg_window=MOVING_AVG_WINDOW
                    )
                    
                    # Calculate experiment duration
                    exp_duration = (datetime.datetime.now() - exp_start_time).total_seconds()
                    
                    # Collect results
                    experiment_result = {
                        'config_idx': configIdx,
                        'algorithm': algorithm,
                        'env_config': envParams,
                        'avg_reward': eval_results['avg_reward'],
                        'std_reward': eval_results['std_reward'],
                        'avg_loss_rate': eval_results['avg_loss_rate'],
                        'std_loss_rate': eval_results['std_loss_rate'],
                        'avg_alpha': eval_results['avg_alpha'],
                        'training_timesteps': len(callback.timesteps_log) * callback.log_interval if hasattr(callback, 'timesteps_log') else 0,
                        'experiment_duration': exp_duration,
                        'save_path': save_path
                    }
                    
                    all_results.append(experiment_result)
                    
                    # Save individual experiment results
                    exp_results_file = os.path.join(save_path, 'experiment_results.json')
                    with open(exp_results_file, 'w') as f:
                        json.dump(experiment_result, f, indent=2, default=str)
                    
                    # Save callback data
                    if hasattr(callback, 'timesteps_log'):
                        callback_data = {
                            'timesteps_log': callback.timesteps_log,
                            'rewards_log': callback.rewards_log,
                            'cumulative_rewards': callback.cumulative_rewards,
                        }
                        callback_file = os.path.join(save_path, 'training_data.pickle')
                        with open(callback_file, 'wb') as f:
                            pickle.dump(callback_data, f)
                    
                    print(f"✓ Experiment {experiment_count} completed successfully")
                    
                except Exception as e:
                    print(f"✗ Experiment {experiment_count} failed: {e}")
                    # Log the failure
                    experiment_result = {
                        'config_idx': configIdx,
                        'algorithm': algorithm,
                        'env_config': envParams,
                        'status': 'failed',
                        'error': str(e),
                        'experiment_duration': (datetime.datetime.now() - exp_start_time).total_seconds()
                    }
                    all_results.append(experiment_result)
    except KeyboardInterrupt:
        print("\nInterrupted by user. Exiting gracefully.")
        import sys
        sys.exit(0)
    except Exception as e:
        print(f"Error: {e}")
        sys.exit(1)


In [None]:

if __name__ == "__main__":
    run_all_experiments()