In [1]:
import os
os.environ["KMP_DUPLICATE_LIB_OK"]="TRUE"

import numpy as np
import matplotlib.pyplot as plt
import pandas as pd
import os
import pickle


from DrlLibs.training import train_drl_agent, create_parallel_environment
from DrlLibs.evaluate import evaluate_drl_agent
from DrlLibs import create_environment, check_env
from Configs import getEnvConfig, visualizeEnvConfig, getPredictorConfig, visualizePredictorConfig
from EnvLibs import PolicyDemoAdaptiveAlpha, PolicySimulator, createEnv









In [2]:
def main(simParams, simEnv, save_path, agent_name, algorithm_name: str = "SAC", 
         obvMode="perfect", total_timesteps: int = 20000, 
         timesteps_per_episode: int = 5000, n_envs: int = 4):
    """Main function to train and evaluate a DRL agent."""
    
    print("="*80)
    print(f"{algorithm_name} as Agent config{agent_name}'s Training and Evaluation")
    print("="*80)
    
    # Create environment (single or parallel)
    print("Creating environment...")
    env = create_parallel_environment(simParams, simEnv, obvMode, 
                                    timesteps_per_episode, n_envs)
    
    # Check environment (only for single env)
    if n_envs == 1:
        print("Checking environment...")
        check_env(env.unwrapped)
        print("Environment check passed!")
    else:
        print(f"Created {n_envs} parallel environments")
    
    # Train DRL agent
    model, callback, training_time = train_drl_agent(algorithm_name, env, total_timesteps, save_path, agent_name)
    
    # Create a clean single environment for evaluation
    print("Creating evaluation environment...")
    eval_env = create_environment(simParams, simEnv, obvMode, timesteps_per_episode)
    
    # Evaluate DRL agent
    eval_results = evaluate_drl_agent(model, eval_env, algorithm_name)
    
    # Close evaluation environment
    eval_env.close()
    
    # Plot training results
    print("\nGenerating visualizations...")
    try:
        from DrlLibs.visualize import plot_training_results
        plot_training_results(callback, eval_results, algorithm_name, save_plots=True)
    except Exception as e:
        print(f"Visualization failed: {e}")
        print("Training data summary:")
        if callback.episode_rewards:
            print(f"  Episodes completed: {len(callback.episode_rewards)}")
            print(f"  Reward progression: {callback.episode_rewards[-10:]}")  # Last 10 rewards
    
    # Print final summary
    print(f"\n{'='*80}")
    print("FINAL SUMMARY")
    print(f"{'='*80}")
    print(f"Algorithm: {algorithm_name}")
    print(f"Training completed in: {training_time:.2f} seconds")
    print(f"Total training timesteps: {total_timesteps}")
    
    if callback.episode_rewards:
        print(f"Total episodes completed: {len(callback.episode_rewards)}")
        print(f"Average episode reward: {np.mean(callback.episode_rewards):.4f} ± {np.std(callback.episode_rewards):.4f}")
        print(f"Final episode reward: {callback.episode_rewards[-1]:.4f}")
        print(f"Best episode reward: {max(callback.episode_rewards):.4f}")
        print(f"Reward improvement: {callback.episode_rewards[-1] - callback.episode_rewards[0]:.4f}")
    
    print(f"Average evaluation reward: {eval_results['avg_reward']:.4f} ± {eval_results['std_reward']:.4f}")
    print(f"Average packet loss rate: {eval_results['avg_loss_rate']:.4f} ± {eval_results['std_loss_rate']:.4f}")
    print(f"Average alpha value: {eval_results['avg_alpha']:.4f}")
    
    env.close()
    return model, eval_results, callback

In [3]:
configIdx = 5
envParams = getEnvConfig(configIdx)
visualizeEnvConfig(envParams)
predictorParams = getPredictorConfig(configIdx)
visualizePredictorConfig(predictorParams)
trafficDataParentPath = f'Results/TrafficData'
simEnv = createEnv(envParams, trafficDataParentPath)
simEnv.selectMode(mode="train", type="data")

Environment Configuration
Number of Users:        8
Window Length:          200
Dataflow:               thumb_bk
N_aggregation:          4
Resource Bar:           5
Bandwidth:              200
M List:                 [3, 4, 5]
Random Seed:            999
Alpha Range:            (0.01, 1.0)
Discrete Alpha Steps:   10
Predictor Configuration
Window Length:          200
Upsample K:             10
Dataflow:               thumb_fr
DB Parameter:           0.012
Alpha:                  0.01
Mode:                   fixed
Direction:              forward
Train Ratio:            0.6
Train Data Augment:     False
Smooth Fc:              1.5
Smooth Order:           3


In [4]:
# Configuration - change these parameters as needed
ALGORITHM = "A2C"           # Options: "SAC", "PPO", "A2C", "TD3", "DQN"
TIMESTEPS = 100000         # Training timesteps
TIMESTEPS_PER_EPISODE = 1000
N_ENVS = 1
SAVEPATH = f"Results/DrlAgent/{ALGORITHM}"
AGENTNAME = f"config{configIdx}"
OBVMODE = "perfect"
# Run training and evaluation
# Run training and evaluation
model, results, callback = main(
    envParams,
    simEnv,
    save_path=SAVEPATH,
    agent_name=AGENTNAME,
    algorithm_name=ALGORITHM, 
    total_timesteps=TIMESTEPS, 
    timesteps_per_episode=TIMESTEPS_PER_EPISODE,
    obvMode=OBVMODE,
    n_envs=N_ENVS
)

A2C as Agent configconfig5's Training and Evaluation
Creating environment...
Checking environment...
Environment check passed!

Training A2C as config5 Agent
Total timesteps: 100000
Environment: 8 users, 200 bandwidth
Save path: Results/DrlAgent/A2C.zip
Using cuda device
Wrapping the env in a DummyVecEnv.




Starting training...
------------------------------------
| time/                 |          |
|    fps                | 256      |
|    iterations         | 100      |
|    time_elapsed       | 1        |
|    total_timesteps    | 500      |
| train/                |          |
|    entropy_loss       | -25.8    |
|    explained_variance | -0.0114  |
|    learning_rate      | 0.0007   |
|    n_updates          | 99       |
|    policy_loss        | -30.3    |
|    std                | 1.02     |
|    value_loss         | 1.68     |
------------------------------------
------------------------------------
| rollout/              |          |
|    ep_len_mean        | 1e+03    |
|    ep_rew_mean        | -265     |
| time/                 |          |
|    fps                | 267      |
|    iterations         | 200      |
|    time_elapsed       | 3        |
|    total_timesteps    | 1000     |
| train/                |          |
|    entropy_loss       | -25.8    |
|    explained_va

KeyboardInterrupt: 

In [None]:
#from stable_baselines3 import SAC
#model = SAC.load(f"{save_path}/{agentName}.zip")



