In [1]:
import os
os.environ["KMP_DUPLICATE_LIB_OK"]="TRUE"

import numpy as np
import matplotlib.pyplot as plt
import pandas as pd
import os
import pickle

import sys
sys.path.append('../../../../')

from stable_baselines3.common.env_checker import check_env
from src.difsched.agents.drl import create_parallel_environment, train_drl_agent
from src.difsched.agents.drl import evaluate_drl_agent
from src.difsched.agents.drl import create_environment, DRLResourceSchedulingEnv
from src.difsched.config import getEnvConfig, visualizeEnvConfig
from src.difsched.agents.drl.visualize import plot_training_results
from src.difsched.env.EnvironmentSim import createEnv


In [2]:
def train(simParams, simEnv, save_path, agent_name, algorithm_name: str = "SAC", 
          obvMode="perfect", total_timesteps: int = 20000, 
          timesteps_per_episode: int = 5000, n_envs: int = 4,
          early_stop_threshold: float = 0.05, min_steps_before_stop: int = 1000,
          moving_avg_window: int = 100):
    """Train a DRL agent with early stopping based on moving average.
    
    Returns:
        model: Trained model
        callback: Training callback with metrics
        training_time: Time taken for training
        env: Training environment (caller should close this)
    """
    
    print("="*80)
    print(f"{algorithm_name} as Agent config{agent_name}'s Training")
    print("="*80)
    
    # Create environment (single or parallel)
    print("Creating environment...")
    env = create_parallel_environment(simParams, simEnv, obvMode, 
                                    timesteps_per_episode, n_envs)
    
    # Check environment (only for single env)
    if n_envs == 1:
        print("Checking environment...")
        check_env(env.unwrapped)
        print("Environment check passed!")
    else:
        print(f"Created {n_envs} parallel environments")
    
    model, callback, training_time = train_drl_agent(
        algorithm_name, env, total_timesteps, save_path, agent_name,
        early_stop_threshold=early_stop_threshold, 
        min_steps_before_stop=min_steps_before_stop,
        moving_avg_window=moving_avg_window
    )
    
    # Print training summary
    print(f"\n{'='*80}")
    print("TRAINING SUMMARY")
    print(f"{'='*80}")
    print(f"Algorithm: {algorithm_name}")
    print(f"Training completed in: {training_time:.2f} seconds")
    print(f"Total training timesteps: {total_timesteps}")
    
    if hasattr(callback, 'cumulative_rewards') and callback.cumulative_rewards:
        print(f"Episodes completed during training: {callback.episodes_seen}")
        print(f"Final cumulative reward: {callback.total_reward:.4f}")
        print(f"Average reward per episode: {callback.total_reward / callback.episodes_seen:.4f}")
    else:
        print("No cumulative reward data captured during training")
    
    env.close()

    return model, callback, training_time, env


def eval(model, simParams, simEnv, algorithm_name: str = "SAC", 
         deterministic=True,    
         obvMode="perfect", timesteps_per_episode: int = 5000, n_steps: int = 1000):
    """Evaluate a trained DRL agent.
    
    Args:
        model: Trained model to evaluate
        simParams: Environment simulation parameters
        simEnv: Simulation environment
        algorithm_name: Name of the algorithm
        obvMode: Observation mode
        timesteps_per_episode: Number of timesteps per episode
        n_steps: Number of steps to evaluate
    
    Returns:
        eval_results: Dictionary with evaluation metrics
    """
    
    print("="*80)
    print(f"{algorithm_name} Agent Evaluation")
    print("="*80)
    
    # Create a clean single environment for evaluation
    print("Creating evaluation environment...")
    env = DRLResourceSchedulingEnv(
        simParams,
        simEnv,
        obvMode,
        n_steps
    )
    
    eval_results = evaluate_drl_agent(
        model=model,
        env=env,
        algorithm_name=algorithm_name,
        deterministic=deterministic,
        n_steps=n_steps
    )
    
    # Close evaluation environment
    env.close()
    
    # Print evaluation summary
    print(f"\n{'='*80}")
    print("EVALUATION SUMMARY")
    print(f"{'='*80}")
    print(f"Algorithm: {algorithm_name}")
    print(f"Average evaluation reward: {eval_results['avg_reward']:.4f} ± {eval_results['std_reward']:.4f}")
    print(f"Average packet loss rate: {eval_results['avg_packet_loss']:.4f} ± {eval_results['std_packet_loss']:.4f}")
    
    return eval_results


In [3]:
configIdx = 0
envParams = getEnvConfig(configIdx)
visualizeEnvConfig(envParams)
trafficDataParentPath = f'../../../../data/processed/traffic'
simEnv = createEnv(envParams, trafficDataParentPath)
simEnv.selectMode(mode="train", type="data")

Environment Configuration
Number of Users:        8
Window Length:          200
Dataflow:               thumb_fr
Sigmoid K List:         [0.1, 0.2, 0.3, 0.4, 0.5]
Sigmoid S List:         [10.0, 10.0, 10.0, 10.0, 10.0]
Resource Bar:           5
Bandwidth:              100
Sub Agents:             [[0, 0]]
User Map:               [[0, 1, 2, 3], [4, 5, 6, 7]]


In [4]:
# Configuration - change these parameters as needed
ALGORITHM = "SAC"           # Options: "SAC", "PPO", "A2C", "TD3", "DQN"
TIMESTEPS = 100000       # Training timesteps
TIMESTEPS_PER_EPISODE = 100
N_ENVS = 8
SAVEPATH = f"../../../../data/results/drl/{ALGORITHM}"
AGENTNAME = f"config{configIdx}"
OBVMODE = "predicted"

# Step 1: Train the model
model, callback, training_time, env = train(
    simParams=envParams,
    simEnv=simEnv,
    save_path=SAVEPATH,
    agent_name=AGENTNAME,
    algorithm_name=ALGORITHM,
    obvMode=OBVMODE,
    total_timesteps=TIMESTEPS,
    timesteps_per_episode=TIMESTEPS_PER_EPISODE,
    n_envs=N_ENVS,
    early_stop_threshold=0.05,
    min_steps_before_stop=100,
    moving_avg_window=100
)

# Close training environment
env.close()

# Step 2: Evaluate the model
results = eval(
    model=model,
    simParams=envParams,
    simEnv=simEnv,
    algorithm_name=ALGORITHM,
    obvMode=OBVMODE,
    deterministic=True,
    timesteps_per_episode=TIMESTEPS_PER_EPISODE,
    n_steps=1000
)


SAC as Agent configconfig0's Training
Creating environment...
Created 8 parallel environments

Training SAC as config0 Agent
Total timesteps: 100000
Early stop threshold: 0.05
Min steps before stop: 100
Moving average window: 100 data points
Environment: Parallel environments (details not accessible)
Save path: ../../../../data/results/drl/SAC.zip
Starting training...

Timestep: 800
Episode: 8
  Env  0: Packet Loss Rate = 0.6401
  Env  1: Packet Loss Rate = 0.6760
  Env  2: Packet Loss Rate = 0.6750
  Env  3: Packet Loss Rate = 0.6820
  Env  4: Packet Loss Rate = 0.6581
  Env  5: Packet Loss Rate = 0.6645
  Env  6: Packet Loss Rate = 0.6596
  Env  7: Packet Loss Rate = 0.6563
--------------------------------------------------------------------------------
  Best Loss (this step):    0.6401
  Avg Loss (this step):     0.6640
  Avg Loss (last 10 eps):   0.6640
  Total Episodes So Far:    8


Timestep: 1600
Episode: 16
  Env  0: Packet Loss Rate = 0.6823
  Env  1: Packet Loss Rate = 0.645

In [5]:


# Step 3: Visualize results
print("\nGenerating visualizations...")
try:
    plot_training_results(callback, results, ALGORITHM, save_plots=True)
except Exception as e:
    print(f"Visualization failed: {e}")
    print("Training data summary:")
    if hasattr(callback, 'cumulative_rewards') and callback.cumulative_rewards:
        print(f"  Cumulative reward data points: {len(callback.cumulative_rewards)}")
        print(f"  Final cumulative reward: {callback.cumulative_rewards[-1]:.4f}")
    else:
        print("  No cumulative reward data captured")



Generating visualizations...
Plotting training progress: 6250 data points over 100000 timesteps
Plot saved to: sac_training_progress.png
