In [1]:
import os
os.environ["KMP_DUPLICATE_LIB_OK"]="TRUE"

import sys
sys.path.insert(0, '../../../../')

import numpy as np
import matplotlib.pyplot as plt
import pickle

from src.difsched.config import getExpConfig, visualizeExpConfig
from src.difsched.config import getEnvConfig, visualizeEnvConfig
from src.difsched.config import getDatasetConfig, visualizeDatasetConfig
from src.difsched.env.EnvironmentSim import createEnv
from src.difsched.utils.EnvInterface import EnvInterface
from src.difsched.training import training

In [2]:
hyperparams = {
    'N_diffusion_steps':30,
    'schedule_type': "vp",
    'abs_action_max': 1.0,
    'gamma': 0.99,
    'lr': 5e-4,
    'decay_lr': True,
    'weight_decay': 0.0,
    'num_critics': 8,
    'lcb_coef': 0.15,
    'q_sample_eta': 1.0,
    'weight_entropy_loss': 0.01,
    'weight_q_loss': 1.0,
    'approximate_action': True,
    'ema_tau': 0.001,
    'ema_period': 20,
    'ema_begin_update': 1000,
    'layer_norm': True,
    'grad_clip': 3.0,
    'device': 'cuda',
}

trainingConfig = {
    'iterations': 100,
    'batch_size': 100,
    'LEN_eval': 50,
    'report_period': 10,
    'len_period': 50,
    'warm_up_period': 50,
    'max_sp_ratio': 1.0,
    'min_sp_ratio': 0.5,
    'max_weight_bc_loss': 1.0,
    'min_weight_bc_loss': 1.0,
    'rb_capacity': 30000
}

In [None]:
expConfigIdx = 3
expParams = getExpConfig(expConfigIdx)
visualizeExpConfig(expParams)

envConfigIdx = expParams['env_config_idx']
envParams = getEnvConfig(envConfigIdx)
visualizeEnvConfig(envParams)

datasetConfigIdx = expParams['dataset_config_idx']
datasetParams = getDatasetConfig(datasetConfigIdx)
visualizeDatasetConfig(datasetParams)

trafficDataParentPath = f'../../../../data/raw/traffic'
env = createEnv(envParams, trafficDataParentPath)
env.selectMode(mode="train", type="data")

with open(f'../../../../data/processed/offline_dataset/subOptimalAgent_encConfig{datasetConfigIdx}_{envParams["sub_agents_idx"]}.pkl', 'rb') as f:
    dataset_expert = pickle.load(f)
dataset_off = {
    'observations': dataset_expert['uRecord'],     
    'actions': dataset_expert['actionsRecord'], 
    'rewards': dataset_expert['rewardRecord'], 
    'next_observations': dataset_expert['uNextRecord']
}
print(f"Avg. packet loss rate: {np.mean(dataset_expert['rewardRecord'])}")
print(f"length of dataset: {len(dataset_off['observations'])}")

save_folder = f"../../../../data/results/dql/config_{expConfigIdx}"
if not os.path.exists(save_folder):
    os.makedirs(save_folder)

envInterface = EnvInterface(envParams, discrete_state=False)
training(
    trainingConfig, dataset_off, hyperparams, env, envInterface, save_folder, N_exp_list=[0,1,2]
)

dataset_config_idx: 5
env_config_idx: 3
Environment Configuration
Number of Users:        20
Window Length:          200
Dataflow:               thumb_bk
Sigmoid K List:         [0.1, 0.2, 0.3, 0.4, 0.5]
Sigmoid S List:         [10.0, 10.0, 10.0, 10.0, 10.0]
Resource Bar:           5
Bandwidth:              200
Sub Agents:             [[1, 1, 1, 1, 1]]
User Map:               [[0, 1, 2, 3], [4, 5, 6, 7], [8, 9, 10, 11], [12, 13, 14, 15], [16, 17, 18, 19]]
Dataset Configuration
Number of Users:        20
Window Length:          200
N_aggregation:          4
Dataflow:               thumb_bk
Random Seed:            999
Resource Bar:           5
Bandwidth:              200
Sigmoid K List:         [0.3]
Sigmoid S List:         [10.0]
Sub Agents:             [[1, 1, 1, 1, 1]]
User Map:               [[0, 1, 2, 3], [4, 5, 6, 7], [8, 9, 10, 11], [12, 13, 14, 15], [16, 17, 18, 19]]
Avg. packet loss rate: 0.47515393810082424
length of dataset: 10000
Expert's Reward: 0.5251383185386658
state_dim:

                                                                   

Ld: 0.1252062913030386, Lq: 0.5238900661468506, Le: -1.5308978867530822, loss_Q: 0.007402314264327288
Avg. Reward: 0.6811436353469725, sample_ratio: 0.9, weight_bc_loss: 1.0


                                                                   

save model 0_best, smoothed reward: 0.6523986187820937


                                                                   

save model 0_best, smoothed reward: 0.6483800506500028


                                                                   

save model 0_best, smoothed reward: 0.6342212397501098


                                                                   

save model 0_best, smoothed reward: 0.6192407262636752


                                                                   

save model 0_best, smoothed reward: 0.598244279515646


                                                                   

save model 0_best, smoothed reward: 0.5788081395998873


                                                                   

save model 0_best, smoothed reward: 0.5581735799653629


                                                                   

save model 0_best, smoothed reward: 0.5449868415692767


                                                                   

save model 0_best, smoothed reward: 0.5307652869091581


                                                                   

save model 0_best, smoothed reward: 0.5174725404948173
Ld: 0.12026184476912022, Lq: 0.5398865842819214, Le: -1.5362488389015199, loss_Q: 0.007531142416410148
Avg. Reward: 0.5578584100052316, sample_ratio: 0.8, weight_bc_loss: 1.0


                                                                   

save model 0_best, smoothed reward: 0.5073533151058931


                                                                   

save model 0_best, smoothed reward: 0.4974433113327074


                                                                   

save model 0_best, smoothed reward: 0.4921204434482426


Evaluation windows:  70%|███████   | 35/50 [00:01<00:00, 26.82it/s]