In [1]:
import os
os.environ["KMP_DUPLICATE_LIB_OK"]="TRUE"

import numpy as np
import matplotlib.pyplot as plt
import pandas as pd
import os
import pickle


from DrlLibs.training import train_drl_agent
from DrlLibs.evaluate import evaluate_drl_agent
from DrlLibs.DRL_config import (
    get_algorithm_config, 
    get_training_config,
    print_algorithm_info
)
from DrlLibs import create_environment, check_env
from Configs import getEnvConfig, visualizeEnvConfig, getPredictorConfig, visualizePredictorConfig
from EnvLibs import PolicyDemoAdaptiveAlpha, PolicySimulator, createEnv

In [2]:
def main(simParams, simEnv, save_path, agent_name, algorithm_name: str = "SAC", total_timesteps: int = None):
    """Main function to train and evaluate a DRL agent."""
    
    print("="*80)
    print(f"{algorithm_name} Agent Training and Evaluation")
    print("="*80)
    
    # Get training configuration
    training_config = get_training_config()
    if total_timesteps is None:
        total_timesteps = training_config["total_timesteps"]
    
    # Create environment
    print("Creating environment...")
    env = create_environment(simParams, simEnv)
    
    # Check environment
    print("Checking environment...")
    check_env(env.unwrapped)
    print("Environment check passed!")
    
    # Train DRL agent
    model, callback, training_time = train_drl_agent(algorithm_name, env, total_timesteps, save_path, agent_name)
    
    # Evaluate DRL agent
    eval_results = evaluate_drl_agent(model, env, algorithm_name)
    
    # Print final summary
    print(f"\n{'='*80}")
    print("FINAL SUMMARY")
    print(f"{'='*80}")
    print(f"Algorithm: {algorithm_name}")
    print(f"Training completed in: {training_time:.2f} seconds")
    print(f"Total training timesteps: {total_timesteps}")
    print(f"Average evaluation reward: {eval_results['avg_reward']:.4f} ± {eval_results['std_reward']:.4f}")
    print(f"Average packet loss rate: {eval_results['avg_loss_rate']:.4f} ± {eval_results['std_loss_rate']:.4f}")
    print(f"Average alpha value: {eval_results['avg_alpha']:.4f}")
    
    env.close()
    return model, eval_results

In [3]:
configIdx = 1
envParams = getEnvConfig(configIdx)
visualizeEnvConfig(envParams)
predictorParams = getPredictorConfig(configIdx)
visualizePredictorConfig(predictorParams)
trafficDataParentPath = f'Results/TrafficData'
simEnv = createEnv(envParams, trafficDataParentPath)
simEnv.selectMode(mode="train", type="data")

Environment Configuration
Number of Users:        4
Window Length:          200
Dataflow:               thumb_bk
N_aggregation:          4
Resource Bar:           4
Bandwidth:              40
M List:                 [2, 3]
Random Seed:            999
Alpha Range:            (0.01, 1.0)
Discrete Alpha Steps:   10
Predictor Configuration
Window Length:          200
Upsample K:             10
Dataflow:               thumb_bk
DB Parameter:           0.012
Alpha:                  0.01
Mode:                   fixed
Direction:              backward
Train Ratio:            0.6
Train Data Augment:     False
Smooth Fc:              1.5
Smooth Order:           3


In [None]:
# Configuration - change these parameters as needed
ALGORITHM = "SAC"           # Options: "SAC", "PPO", "A2C", "TD3", "DQN"
TIMESTEPS = 20000         # Training timesteps
SAVEPATH = f"Results/DrlAgent"
AGENTNAME = f"config{configIdx}"
# Run training and evaluation
print(f"\nStarting training with {ALGORITHM} with config{configIdx}...")
model, results = main(
    envParams,
    simEnv,
    save_path=SAVEPATH,
    agent_name=AGENTNAME,
    algorithm_name=ALGORITHM, 
    total_timesteps=TIMESTEPS, 
)


Starting training with SAC with config1...
SAC Agent Training and Evaluation
Creating environment...
Checking environment...
Environment check passed!

Training SAC Agent
Total timesteps: 20000
Environment: 4 users, 40 bandwidth
Save path: Results/DrlAgent.zip
Using cpu device
Wrapping the env in a DummyVecEnv.
Starting training...
---------------------------------
| rollout/           |          |
|    ep_len_mean     | 1e+03    |
|    ep_rew_mean     | -208     |
| time/              |          |
|    episodes        | 4        |
|    fps             | 52       |
|    time_elapsed    | 75       |
|    total_timesteps | 4000     |
| train/             |          |
|    actor_loss      | -54.7    |
|    critic_loss     | 0.397    |
|    ent_coef        | 0.406    |
|    ent_coef_loss   | -15.1    |
|    learning_rate   | 0.0003   |
|    n_updates       | 2999     |
---------------------------------
---------------------------------
| rollout/           |          |
|    ep_len_mean   

In [4]:
#from stable_baselines3 import SAC
#model = SAC.load(f"{save_path}/{agentName}.zip")



