In [1]:
import os
os.environ["KMP_DUPLICATE_LIB_OK"]="TRUE"

import sys
sys.path.insert(0, '../../../../')

import numpy as np
import matplotlib.pyplot as plt
import pickle

from src.difsched.config import getExpConfig, visualizeExpConfig
from src.difsched.config import getEnvConfig, visualizeEnvConfig
from src.difsched.config import getDatasetConfig, visualizeDatasetConfig
from src.difsched.env.EnvironmentSim import createEnv
from src.difsched.utils.EnvInterface import EnvInterface
from src.difsched.training import training

In [2]:
hyperparams = {
    'N_diffusion_steps':30,
    'schedule_type': "vp",
    'abs_action_max': 1.0,
    'gamma': 0.99,
    'lr': 5e-4,
    'decay_lr': True,
    'weight_decay': 0.0,
    'num_critics': 8,
    'lcb_coef': 0.15,
    'q_sample_eta': 1.0,
    'weight_entropy_loss': 0.01,
    'weight_q_loss': 1.0,
    'approximate_action': True,
    'ema_tau': 0.001,
    'ema_period': 20,
    'ema_begin_update': 1000,
    'layer_norm': True,
    'grad_clip': 3.0,
    'device': 'cuda',
}


trainingConfig = {
    'BC_loss': True,
    'iterations': 100,
    'batch_size': 100,
    'LEN_eval': 50,
    'report_period': 10,
    'len_period': 500,
    'warm_up_period': 50,
    'max_sp_ratio': 1.0,
    'min_sp_ratio': 0.5,
    'max_weight_bc_loss': 1.0,
    'min_weight_bc_loss': 0.2,
    'rb_capacity': 30000
}



In [3]:
for expConfigIdx in range(4):
    expParams = getExpConfig(expConfigIdx)
    visualizeExpConfig(expParams)

    envConfigIdx = expParams['env_config_idx']
    envParams = getEnvConfig(envConfigIdx)
    visualizeEnvConfig(envParams)

    datasetConfigIdx = expParams['dataset_train_config_idx']
    datasetParams = getDatasetConfig(datasetConfigIdx)
    visualizeDatasetConfig(datasetParams)

    trafficDataParentPath = f'../../../../data/processed/traffic'
    env = createEnv(envParams, trafficDataParentPath)
    env.selectMode(mode="train", type="data")
    envInterface = EnvInterface(envParams, discrete_state=False)

    datasetFolder = f'../../../../data/processed/offline_dataset'
    with open(f'{datasetFolder}/subOptimalAgent_encConfig{datasetConfigIdx}_{envParams["sub_agents_idx"]}.pkl', 'rb') as f:
        dataset_expert = pickle.load(f)
    dataset_off = {
        'observations': dataset_expert['uRecord'],     
        'actions': dataset_expert['actionsRecord'], 
        'rewards': dataset_expert['rewardRecord'], 
        'next_observations': dataset_expert['uNextRecord']
    }
    print(f"Avg. packet loss rate: {np.mean(dataset_expert['rewardRecord'])}")
    print(f"length of dataset: {len(dataset_off['observations'])}")

    # With BC
    
    save_folder = f"../../../../data/results/dql/config_{expConfigIdx}/long"
    if not os.path.exists(save_folder):
        os.makedirs(save_folder)

    training(
        trainingConfig, dataset_off, hyperparams, env, envInterface, save_folder, N_exp_list=[1,2]
    )
    

dataset_train_config_idx: 0
dataset_test_config_idx: 2
env_config_idx: 0
Environment Configuration
Number of Users:        8
Window Length:          200
Dataflow:               thumb_fr
Sigmoid K List:         [0.1, 0.2, 0.3, 0.4, 0.5]
Sigmoid S List:         [10.0, 10.0, 10.0, 10.0, 10.0]
Resource Bar:           5
Bandwidth:              100
Sub Agents:             [[0, 0]]
User Map:               [[0, 1, 2, 3], [4, 5, 6, 7]]
Dataset Configuration
Number of Users:        8
Window Length:          200
N_aggregation:          4
Dataflow:               thumb_fr
Random Seed:            999
Resource Bar:           5
Bandwidth:              100
Sigmoid K List:         [0.3]
Sigmoid S List:         [10.0]
Sub Agents:             [[0, 0]]
User Map:               [[0, 1, 2, 3], [4, 5, 6, 7]]
Avg. packet loss rate: 0.32451573558442404
length of dataset: 10000
1.0 0.2
Expert's Reward: 0.6743894815444946
state_dim: 8, action_dim: 8


                                                                   

Ld: 0.14892275653779508, Lq: 0.6739043366909027, Le: -1.5640923225879668, loss_Q: 0.008429121258668602
Avg. Reward: 0.4452561476330543, sample_ratio: 0.9, weight_bc_loss: 0.84


                                                                   

save model 1_best, smoothed reward: 0.40540113605709005


                                                                   

save model 1_best, smoothed reward: 0.40055308935597456


                                                                   

save model 1_best, smoothed reward: 0.3985655590823134


                                                                   

save model 1_best, smoothed reward: 0.3970601090756587


                                                                   

save model 1_best, smoothed reward: 0.3917101344244475


                                                                   

save model 1_best, smoothed reward: 0.3901908546721513


                                                                   

save model 1_best, smoothed reward: 0.383229072626394


                                                                   

save model 1_best, smoothed reward: 0.3756676720242898
Ld: 0.15229831464588642, Lq: 0.6861505633592606, Le: -1.544719054698944, loss_Q: 0.008954102974385023
Avg. Reward: 0.38822665955821173, sample_ratio: 0.8, weight_bc_loss: 0.6799999999999999


                                                                   

save model 1_best, smoothed reward: 0.37108781998370655


                                                                   

save model 1_best, smoothed reward: 0.37100166921270017


                                                                   

save model 1_best, smoothed reward: 0.368463042506357


                                                                   

save model 1_best, smoothed reward: 0.362407185560114


                                                                   

save model 1_best, smoothed reward: 0.36123955611502157


                                                                   

save model 1_best, smoothed reward: 0.3549494447384173
Ld: 0.12953221619129182, Lq: 0.6968738669157029, Le: -1.5274147033691405, loss_Q: 0.009417618233710527
Avg. Reward: 0.36355914852817806, sample_ratio: 0.7, weight_bc_loss: 0.52


                                                                   

save model 1_best, smoothed reward: 0.35368246658335406


                                                                   

save model 1_best, smoothed reward: 0.3508758782127498


                                                                   

save model 1_best, smoothed reward: 0.3483261992635627


                                                                   

save model 1_best, smoothed reward: 0.34327250553203353
Ld: 0.1472952302545309, Lq: 0.7137804299592971, Le: -1.4640526390075683, loss_Q: 0.009803332891315221
Avg. Reward: 0.34888456093289866, sample_ratio: 0.6, weight_bc_loss: 0.36


                                                                   

save model 1_best, smoothed reward: 0.34279333260895384


                                                                   

save model 1_best, smoothed reward: 0.33940872626620894


                                                                   

Ld: 0.14126367874443532, Lq: 0.7351236653327942, Le: -1.4140260767936708, loss_Q: 0.009907772224396467
Avg. Reward: 0.34629783942548775, sample_ratio: 0.5, weight_bc_loss: 0.2


                                                                   

Ld: 0.1622897854447365, Lq: 0.7436058729887008, Le: -1.4541399586200714, loss_Q: 0.010037265047430992
Avg. Reward: 0.34929507592221043, sample_ratio: 0.5, weight_bc_loss: 0.2


                                                                   

Ld: 0.1381592170894146, Lq: 0.7392136859893799, Le: -1.5042879056930543, loss_Q: 0.00994401636067778
Avg. Reward: 0.34927539555431786, sample_ratio: 0.5, weight_bc_loss: 0.2


                                                                   

save model 1_best, smoothed reward: 0.336385300626665


                                                                   

save model 1_best, smoothed reward: 0.3325917621811684


                                                                   

Ld: 0.14725202798843384, Lq: 0.7452993202209472, Le: -1.517020364999771, loss_Q: 0.010411682315170765
Avg. Reward: 0.3409068616423959, sample_ratio: 0.5, weight_bc_loss: 0.2


                                                                   

Ld: 0.12379820868372918, Lq: 0.7490597504377365, Le: -1.5481984901428223, loss_Q: 0.009829280893318354
Avg. Reward: 0.338600877931043, sample_ratio: 0.5, weight_bc_loss: 0.2


                                                                   

Ld: 0.13573652379214762, Lq: 0.7612938672304154, Le: -1.5370420050621032, loss_Q: 0.010318070915527642
Avg. Reward: 0.3464769192801323, sample_ratio: 0.5, weight_bc_loss: 0.2


                                                                   

Ld: 0.11611989922821522, Lq: 0.7707113760709763, Le: -1.5661921250820159, loss_Q: 0.009696564665064215
Avg. Reward: 0.34708288854869995, sample_ratio: 0.5, weight_bc_loss: 0.2


                                                                   

Ld: 0.12385011650621891, Lq: 0.7846882021427155, Le: -1.5604680120944976, loss_Q: 0.01002985866740346
Avg. Reward: 0.34594034811381, sample_ratio: 0.5, weight_bc_loss: 0.2


                                                                   

save model 1_best, smoothed reward: 0.3324044683834506


                                                                   

Ld: 0.11206394128501415, Lq: 0.802804805636406, Le: -1.5919020569324493, loss_Q: 0.009520480539649726
Avg. Reward: 0.3394311279121512, sample_ratio: 0.5, weight_bc_loss: 0.2


                                                                   

Ld: 0.1185513886809349, Lq: 0.8202411150932312, Le: -1.5872066390514374, loss_Q: 0.009718218077905476
Avg. Reward: 0.33586743871216174, sample_ratio: 0.5, weight_bc_loss: 0.2


                                                                   

Expert's Reward: 0.6759028434753418
state_dim: 8, action_dim: 8


                                                                   

KeyboardInterrupt: 