In [1]:
import os
os.environ["KMP_DUPLICATE_LIB_OK"]="TRUE"

import numpy as np
import matplotlib.pyplot as plt
import pandas as pd
import os
import pickle


from DrlLibs.training import train_drl_agent
from DrlLibs.evaluate import evaluate_drl_agent
from DrlLibs import create_environment, check_env
from Configs import getEnvConfig, visualizeEnvConfig, getPredictorConfig, visualizePredictorConfig
from EnvLibs import PolicyDemoAdaptiveAlpha, PolicySimulator, createEnv

In [2]:
def main(simParams, simEnv, save_path, agent_name, algorithm_name: str = "SAC", 
         obvMode="perfect",
         total_timesteps: int = 20000, timesteps_per_episode: int = 5000):
    """Main function to train and evaluate a DRL agent."""
    
    print("="*80)
    print(f"{algorithm_name} as Agent config{agent_name}'s Training and Evaluation")
    print("="*80)
    
    # Create environment
    print("Creating environment...")
    env = create_environment(simParams, simEnv, obvMode, timesteps_per_episode)
    
    # Check environment
    print("Checking environment...")
    check_env(env.unwrapped)
    print("Environment check passed!")
    
    # Train DRL agent
    model, callback, training_time = train_drl_agent(algorithm_name, env, total_timesteps, save_path, agent_name)
    
    # Evaluate DRL agent
    eval_results = evaluate_drl_agent(model, env, algorithm_name)
    
    # Print final summary
    print(f"\n{'='*80}")
    print("FINAL SUMMARY")
    print(f"{'='*80}")
    print(f"Algorithm: {algorithm_name}")
    print(f"Training completed in: {training_time:.2f} seconds")
    print(f"Total training timesteps: {total_timesteps}")
    print(f"Average evaluation reward: {eval_results['avg_reward']:.4f} ± {eval_results['std_reward']:.4f}")
    print(f"Average packet loss rate: {eval_results['avg_loss_rate']:.4f} ± {eval_results['std_loss_rate']:.4f}")
    print(f"Average alpha value: {eval_results['avg_alpha']:.4f}")
    
    env.close()
    return model, eval_results

In [3]:
configIdx = 0
envParams = getEnvConfig(configIdx)
visualizeEnvConfig(envParams)
predictorParams = getPredictorConfig(configIdx)
visualizePredictorConfig(predictorParams)
trafficDataParentPath = f'Results/TrafficData'
simEnv = createEnv(envParams, trafficDataParentPath)
simEnv.selectMode(mode="train", type="data")

Environment Configuration
Number of Users:        4
Window Length:          200
Dataflow:               thumb_fr
N_aggregation:          4
Resource Bar:           5
Bandwidth:              100
M List:                 [2, 3, 4]
Random Seed:            999
Alpha Range:            (0.01, 1.0)
Discrete Alpha Steps:   10
Predictor Configuration
Window Length:          200
Upsample K:             10
Dataflow:               thumb_fr
DB Parameter:           0.001
Alpha:                  0.01
Mode:                   fixed
Direction:              forward
Train Ratio:            0.6
Train Data Augment:     False
Smooth Fc:              1.5
Smooth Order:           3


In [4]:
# Configuration - change these parameters as needed
ALGORITHM = "SAC"           # Options: "SAC", "PPO", "A2C", "TD3", "DQN"
TIMESTEPS = 10000         # Training timesteps
TIMESTEPS_PER_EPISODE = 1000
SAVEPATH = f"Results/{ALGORITHM}_DrlAgent"
AGENTNAME = f"config{configIdx}"
OBVMODE = "perfect"
# Run training and evaluation
model, results = main(
    envParams,
    simEnv,
    save_path=SAVEPATH,
    agent_name=AGENTNAME,
    algorithm_name=ALGORITHM, 
    total_timesteps=TIMESTEPS, 
    timesteps_per_episode=TIMESTEPS_PER_EPISODE,
    obvMode=OBVMODE
)

SAC as Agent configconfig0's Training and Evaluation
Creating environment...
Checking environment...
Environment check passed!

Training SAC as config0 Agent
Total timesteps: 10000
Environment: 4 users, 100 bandwidth
Save path: Results/SAC_DrlAgent.zip
Using cpu device
Wrapping the env in a DummyVecEnv.
Starting training...
---------------------------------
| rollout/           |          |
|    ep_len_mean     | 1e+03    |
|    ep_rew_mean     | -103     |
| time/              |          |
|    episodes        | 4        |
|    fps             | 71       |
|    time_elapsed    | 55       |
|    total_timesteps | 4000     |
| train/             |          |
|    actor_loss      | -55.8    |
|    critic_loss     | 0.382    |
|    ent_coef        | 0.406    |
|    ent_coef_loss   | -15      |
|    learning_rate   | 0.0003   |
|    n_updates       | 2999     |
---------------------------------
---------------------------------
| rollout/           |          |
|    ep_len_mean     | 1e+03

In [5]:
#from stable_baselines3 import SAC
#model = SAC.load(f"{save_path}/{agentName}.zip")



