In [10]:
import os
os.environ["KMP_DUPLICATE_LIB_OK"]="TRUE"

import sys
sys.path.insert(0, '../../../../')

import numpy as np
import matplotlib.pyplot as plt
import pickle

from src.difsched.config import getExpConfig, visualizeExpConfig
from src.difsched.config import getEnvConfig, visualizeEnvConfig
from src.difsched.config import getDatasetConfig, visualizeDatasetConfig
from src.difsched.env.EnvironmentSim import createEnv
from src.difsched.utils.EnvInterface import EnvInterface
from src.difsched.training import training

In [11]:
hyperparams = {
    'N_diffusion_steps':30,
    'schedule_type': "vp",
    'abs_action_max': 1.0,
    'gamma': 0.99,
    'lr': 5e-4,
    'decay_lr': True,
    'weight_decay': 0.0,
    'num_critics': 8,
    'lcb_coef': 0.15,
    'q_sample_eta': 1.0,
    'weight_entropy_loss': 0.01,
    'weight_q_loss': 1.0,
    'approximate_action': True,
    'ema_tau': 0.001,
    'ema_period': 20,
    'ema_begin_update': 1000,
    'layer_norm': True,
    'grad_clip': 3.0,
    'device': 'cuda',
}

WithoutBchyperparams = {
    'N_diffusion_steps':30,
    'schedule_type': "vp",
    'abs_action_max': 1.0,
    'gamma': 0.99,
    'lr': 5e-3,
    'decay_lr': True,
    'weight_decay': 0.0,
    'num_critics': 8,
    'lcb_coef': 0.01,
    'q_sample_eta': 1.0,
    'weight_entropy_loss': 0.01,
    'weight_q_loss': 1.0,
    'approximate_action': True,
    'ema_tau': 0.001,
    'ema_period': 20,
    'ema_begin_update': 1000,
    'layer_norm': True,
    'grad_clip': 3.0,
    'device': 'cuda',
}

trainingConfig = {
    'BC_loss': True,
    'iterations': 100,
    'batch_size': 100,
    'LEN_eval': 50,
    'report_period': 10,
    'len_period': 50,
    'warm_up_period': 50,
    'max_sp_ratio': 1.0,
    'min_sp_ratio': 0.5,
    'max_weight_bc_loss': 1.0,
    'min_weight_bc_loss': 1.0,
    'rb_capacity': 30000
}

trainingWithoutBcConfig = {
    'BC_loss': False,
    'iterations': 100,
    'batch_size': 100,
    'LEN_eval': 50,
    'report_period': 10,
    'len_period': 50,
    'warm_up_period': 50,
    'max_sp_ratio': 0.01,
    'min_sp_ratio': 0.01,
    'max_weight_bc_loss': 1.0,
    'min_weight_bc_loss': 1.0,
    'rb_capacity': 30000
}

In [12]:
for expConfigIdx in range(4):
    expParams = getExpConfig(expConfigIdx)
    visualizeExpConfig(expParams)

    envConfigIdx = expParams['env_config_idx']
    envParams = getEnvConfig(envConfigIdx)
    visualizeEnvConfig(envParams)

    datasetConfigIdx = expParams['dataset_train_config_idx']
    datasetParams = getDatasetConfig(datasetConfigIdx)
    visualizeDatasetConfig(datasetParams)

    trafficDataParentPath = f'../../../../data/processed/traffic'
    env = createEnv(envParams, trafficDataParentPath)
    env.selectMode(mode="train", type="data")
    envInterface = EnvInterface(envParams, discrete_state=False)

    datasetFolder = f'../../../../data/processed/offline_dataset'
    with open(f'{datasetFolder}/subOptimalAgent_encConfig{datasetConfigIdx}_{envParams["sub_agents_idx"]}.pkl', 'rb') as f:
        dataset_expert = pickle.load(f)
    dataset_off = {
        'observations': dataset_expert['uRecord'],     
        'actions': dataset_expert['actionsRecord'], 
        'rewards': dataset_expert['rewardRecord'], 
        'next_observations': dataset_expert['uNextRecord']
    }
    print(f"Avg. packet loss rate: {np.mean(dataset_expert['rewardRecord'])}")
    print(f"length of dataset: {len(dataset_off['observations'])}")

    # With BC
    
    save_folder = f"../../../../data/results/dql/config_{expConfigIdx}"
    if not os.path.exists(save_folder):
        os.makedirs(save_folder)

    training(
        trainingConfig, dataset_off, hyperparams, env, envInterface, save_folder, N_exp_list=[1,2]
    )
    

    # Without BC
    save_folder = f"../../../../data/results/dql/config_{expConfigIdx}/without_bc"
    if not os.path.exists(save_folder):
        os.makedirs(save_folder)

    training(
        trainingWithoutBcConfig, dataset_off, WithoutBchyperparams, env, envInterface, save_folder, N_exp_list=[1,2]
    )

dataset_train_config_idx: 0
dataset_test_config_idx: 2
env_config_idx: 0
Environment Configuration
Number of Users:        8
Window Length:          200
Dataflow:               thumb_fr
Sigmoid K List:         [0.1, 0.2, 0.3, 0.4, 0.5]
Sigmoid S List:         [10.0, 10.0, 10.0, 10.0, 10.0]
Resource Bar:           5
Bandwidth:              100
Sub Agents:             [[0, 0]]
User Map:               [[0, 1, 2, 3], [4, 5, 6, 7]]
Dataset Configuration
Number of Users:        8
Window Length:          200
N_aggregation:          4
Dataflow:               thumb_fr
Random Seed:            999
Resource Bar:           5
Bandwidth:              100
Sigmoid K List:         [0.3]
Sigmoid S List:         [10.0]
Sub Agents:             [[0, 0]]
User Map:               [[0, 1, 2, 3], [4, 5, 6, 7]]
Avg. packet loss rate: 0.32451573558442404
length of dataset: 10000
1.0 1.0
Expert's Reward: 0.6756755709648132
state_dim: 8, action_dim: 8


                                                                   

Ld: 0.14715328589081764, Lq: 0.6695501345396042, Le: -1.5933293879032135, loss_Q: 0.00813582656905055
Avg. Reward: 0.4141829729853999, sample_ratio: 0.9, weight_bc_loss: 1.0


                                                                   

save model 1_best, smoothed reward: 0.393937292974722


                                                                   

save model 1_best, smoothed reward: 0.3869814588384461


                                                                   

save model 1_best, smoothed reward: 0.38185780598789687


                                                                   

save model 1_best, smoothed reward: 0.3782196482896384


                                                                   

save model 1_best, smoothed reward: 0.3736041234554618


                                                                   

save model 1_best, smoothed reward: 0.3648540200610441


                                                                   

save model 1_best, smoothed reward: 0.36061055495765765


                                                                   

save model 1_best, smoothed reward: 0.35223398333238176


                                                                   

save model 1_best, smoothed reward: 0.35150726900000095


                                                                   

save model 1_best, smoothed reward: 0.3510695557739449
Ld: 0.14746171727776528, Lq: 0.6761547142267227, Le: -1.577999885082245, loss_Q: 0.009042526679113507
Avg. Reward: 0.36233683961470337, sample_ratio: 0.8, weight_bc_loss: 1.0


                                                                   

save model 1_best, smoothed reward: 0.34990230133694994


                                                                   

save model 1_best, smoothed reward: 0.34688582192574297


                                                                   

Ld: 0.1297011637687683, Lq: 0.6818511879444122, Le: -1.5794182515144348, loss_Q: 0.009272116054780781
Avg. Reward: 0.35109873237325434, sample_ratio: 0.7, weight_bc_loss: 1.0


                                                                   

save model 1_best, smoothed reward: 0.3462866980410925


                                                                   

Ld: 0.13752345889806747, Lq: 0.6871314054727554, Le: -1.5765607643127442, loss_Q: 0.010194091266021133
Avg. Reward: 0.3531682651577357, sample_ratio: 0.6, weight_bc_loss: 1.0


                                                                   

save model 1_best, smoothed reward: 0.3452775184672758


                                                                   

save model 1_best, smoothed reward: 0.3415521712467021


                                                                   

Ld: 0.12000786304473877, Lq: 0.692154278755188, Le: -1.597023595571518, loss_Q: 0.009891086486168205
Avg. Reward: 0.3516577399666295, sample_ratio: 0.5, weight_bc_loss: 1.0


                                                                   

Expert's Reward: 0.6754611134529114
state_dim: 8, action_dim: 8


                                                                   

Ld: 0.1506381620466709, Lq: 0.6719689923524856, Le: -1.570216372013092, loss_Q: 0.008412794936448335
Avg. Reward: 0.4320982822912381, sample_ratio: 0.9, weight_bc_loss: 1.0


                                                                   

save model 2_best, smoothed reward: 0.39859495546584284


                                                                   

save model 2_best, smoothed reward: 0.3985516120527042


                                                                   

save model 2_best, smoothed reward: 0.3957184046487251


                                                                   

save model 2_best, smoothed reward: 0.39182261332893703


                                                                   

save model 2_best, smoothed reward: 0.38628162277851885


                                                                   

save model 2_best, smoothed reward: 0.38441884864135245


                                                                   

save model 2_best, smoothed reward: 0.37862890389838827


                                                                   

save model 2_best, smoothed reward: 0.3763556707673674
Ld: 0.14850423455238343, Lq: 0.6788480269908905, Le: -1.5598970127105714, loss_Q: 0.009264974440447986
Avg. Reward: 0.3868697575822709, sample_ratio: 0.8, weight_bc_loss: 1.0


                                                                   

save model 2_best, smoothed reward: 0.3714022608904425


                                                                   

save model 2_best, smoothed reward: 0.3706597009698363


                                                                   

save model 2_best, smoothed reward: 0.3674596693855743


                                                                   

save model 2_best, smoothed reward: 0.36632719246938056


                                                                   

save model 2_best, smoothed reward: 0.36231274950871983


                                                                   

save model 2_best, smoothed reward: 0.35997529379428894


                                                                   

save model 2_best, smoothed reward: 0.3491347963042681


                                                                   

save model 2_best, smoothed reward: 0.3480707721057541


                                                                   

save model 2_best, smoothed reward: 0.3416112247899549


                                                                   

save model 2_best, smoothed reward: 0.3398916043675754
Ld: 0.1223992021381855, Lq: 0.683270468711853, Le: -1.5843065643310548, loss_Q: 0.009278269060887396
Avg. Reward: 0.3511021769381476, sample_ratio: 0.7, weight_bc_loss: 1.0


                                                                   

save model 2_best, smoothed reward: 0.3391500948112104


                                                                   

Ld: 0.13893370114266873, Lq: 0.6867573606967926, Le: -1.566473926305771, loss_Q: 0.010013578347861768
Avg. Reward: 0.34638728705869326, sample_ratio: 0.6, weight_bc_loss: 1.0


                                                                   

Ld: 0.12208979785442352, Lq: 0.6923777890205384, Le: -1.5996898090839387, loss_Q: 0.010038760798051953
Avg. Reward: 0.3479872172844344, sample_ratio: 0.5, weight_bc_loss: 1.0


                                                                   

1.0 1.0
Expert's Reward: 0.6749942302703857
state_dim: 8, action_dim: 8


                                                                   

Ld: 0.5431693422794343, Lq: 0.3083316311240196, Le: -1.4579694724082948, loss_Q: 0.022530973684042693
Avg. Reward: 0.714218125298064, sample_ratio: 0.01, weight_bc_loss: 1.0


                                                                   

save model 1_best, smoothed reward: 0.7117333511002327


                                                                   

Ld: 0.5336419910192489, Lq: 0.3183470073342323, Le: -1.4804542922973634, loss_Q: 0.023110907599329947
Avg. Reward: 0.7191386947406488, sample_ratio: 0.01, weight_bc_loss: 1.0


                                                                   

Ld: 0.4847541680932045, Lq: 0.3330920633673668, Le: -1.4727417254447936, loss_Q: 0.022040921207517387
Avg. Reward: 0.7209847757943815, sample_ratio: 0.01, weight_bc_loss: 1.0


                                                                   

save model 1_best, smoothed reward: 0.7075390368904229


                                                                   

save model 1_best, smoothed reward: 0.7074787960931912


                                                                   

save model 1_best, smoothed reward: 0.7006563893179221


                                                                   

Ld: 0.4998850983381271, Lq: 0.3461413049697876, Le: -1.4765368974208832, loss_Q: 0.023403006568551064
Avg. Reward: 0.7044969885421943, sample_ratio: 0.01, weight_bc_loss: 1.0


                                                                   

save model 1_best, smoothed reward: 0.7005765003119742


                                                                   

save model 1_best, smoothed reward: 0.6987478543924831


                                                                   

save model 1_best, smoothed reward: 0.6958221955971606


                                                                   

save model 1_best, smoothed reward: 0.6928786642421636


                                                                   

save model 1_best, smoothed reward: 0.6918348259562845


                                                                   

save model 1_best, smoothed reward: 0.6893069299600139


                                                                   

save model 1_best, smoothed reward: 0.6864370294174916


                                                                   

save model 1_best, smoothed reward: 0.6834672525355752
Ld: 0.47529074788093567, Lq: 0.3722406217455864, Le: -1.4723799443244934, loss_Q: 0.02288367919623852
Avg. Reward: 0.6876510392459299, sample_ratio: 0.01, weight_bc_loss: 1.0


                                                                   

save model 1_best, smoothed reward: 0.680384495145209
Expert's Reward: 0.6753304600715637
state_dim: 8, action_dim: 8


                                                                   

Ld: 0.5307747367024421, Lq: 0.39926372468471527, Le: -1.4743405377864838, loss_Q: 0.024476598724722864
Avg. Reward: 0.5975826354109226, sample_ratio: 0.01, weight_bc_loss: 1.0


                                                                   

save model 2_best, smoothed reward: 0.563396026348959


                                                                   

Ld: 0.49979153215885164, Lq: 0.4146329337358475, Le: -1.4823503232002258, loss_Q: 0.026717634554952383
Avg. Reward: 0.5912545189827105, sample_ratio: 0.01, weight_bc_loss: 1.0


                                                                   

Ld: 0.4399695244431496, Lq: 0.4410083866119385, Le: -1.4942565977573394, loss_Q: 0.026886981800198554
Avg. Reward: 0.6020079128598, sample_ratio: 0.01, weight_bc_loss: 1.0


                                                                   

Ld: 0.4421609628200531, Lq: 0.4508476647734642, Le: -1.4958367764949798, loss_Q: 0.029085093811154365
Avg. Reward: 0.617191627849009, sample_ratio: 0.01, weight_bc_loss: 1.0


                                                                   

Ld: 0.40449705988168716, Lq: 0.478469300866127, Le: -1.505316573381424, loss_Q: 0.028504441175609827
Avg. Reward: 0.59210114319071, sample_ratio: 0.01, weight_bc_loss: 1.0


                                                                   

dataset_train_config_idx: 1
dataset_test_config_idx: 3
env_config_idx: 1
Environment Configuration
Number of Users:        20
Window Length:          200
Dataflow:               thumb_fr
Sigmoid K List:         [0.1, 0.2, 0.3, 0.4, 0.5]
Sigmoid S List:         [10.0, 10.0, 10.0, 10.0, 10.0]
Resource Bar:           5
Bandwidth:              200
Sub Agents:             [[0, 0, 0, 0, 0]]
User Map:               [[0, 1, 2, 3], [4, 5, 6, 7], [8, 9, 10, 11], [12, 13, 14, 15], [16, 17, 18, 19]]
Dataset Configuration
Number of Users:        20
Window Length:          200
N_aggregation:          4
Dataflow:               thumb_fr
Random Seed:            999
Resource Bar:           5
Bandwidth:              200
Sigmoid K List:         [0.3]
Sigmoid S List:         [10.0]
Sub Agents:             [[0, 0, 0, 0, 0]]
User Map:               [[0, 1, 2, 3], [4, 5, 6, 7], [8, 9, 10, 11], [12, 13, 14, 15], [16, 17, 18, 19]]
Avg. packet loss rate: 0.4536922222182947
length of dataset: 10000
1.0 1.0
Expert

                                                                   

Ld: 0.1209978262335062, Lq: 0.541104182600975, Le: -1.5160063922405242, loss_Q: 0.00657491029240191
Avg. Reward: 0.6557965637411078, sample_ratio: 0.9, weight_bc_loss: 1.0


                                                                   

save model 1_best, smoothed reward: 0.6219377521125644


                                                                   

save model 1_best, smoothed reward: 0.6152251774193611


                                                                   

save model 1_best, smoothed reward: 0.6010223310788494


                                                                   

save model 1_best, smoothed reward: 0.5882737846492504


                                                                   

save model 1_best, smoothed reward: 0.5673968817956893


                                                                   

save model 1_best, smoothed reward: 0.5507892570511863


                                                                   

save model 1_best, smoothed reward: 0.5286535152695778


                                                                   

save model 1_best, smoothed reward: 0.5180792739748348


                                                                   

save model 1_best, smoothed reward: 0.510573655596709


                                                                   

save model 1_best, smoothed reward: 0.5062177688510869
Ld: 0.11382213935256004, Lq: 0.5654358017444611, Le: -1.5149482893943786, loss_Q: 0.00651515394449234
Avg. Reward: 0.536807325323388, sample_ratio: 0.8, weight_bc_loss: 1.0


                                                                   

save model 1_best, smoothed reward: 0.5017674671516141


                                                                   

save model 1_best, smoothed reward: 0.49890675108492


                                                                   

save model 1_best, smoothed reward: 0.49113174066642995


                                                                   

save model 1_best, smoothed reward: 0.4841613051483667


                                                                   

save model 1_best, smoothed reward: 0.4745863557495538


                                                                   

save model 1_best, smoothed reward: 0.47233616792775546


                                                                   

save model 1_best, smoothed reward: 0.4689471252679797


                                                                   

save model 1_best, smoothed reward: 0.46527031459012147


                                                                   

save model 1_best, smoothed reward: 0.4627862948648247


                                                                   

Ld: 0.0901018512994051, Lq: 0.5657812148332596, Le: -1.5157644486427306, loss_Q: 0.006132363933138549
Avg. Reward: 0.46898761644680836, sample_ratio: 0.7, weight_bc_loss: 1.0


                                                                   

save model 1_best, smoothed reward: 0.4599395348197941


                                                                   

Ld: 0.09536518491804599, Lq: 0.5649663984775544, Le: -1.5094686448574066, loss_Q: 0.0063890823279507455
Avg. Reward: 0.46834310868881446, sample_ratio: 0.6, weight_bc_loss: 1.0


                                                                   

save model 1_best, smoothed reward: 0.4591349348511142


                                                                   

save model 1_best, smoothed reward: 0.4583736903230995
Ld: 0.0820808208733797, Lq: 0.5646535241603852, Le: -1.5142433249950409, loss_Q: 0.00642894197255373
Avg. Reward: 0.4627208549490692, sample_ratio: 0.5, weight_bc_loss: 1.0


                                                                   

Expert's Reward: 0.5460950136184692
state_dim: 20, action_dim: 20


                                                                   

Ld: 0.12049375236034393, Lq: 0.5429915553331375, Le: -1.5231224858760835, loss_Q: 0.006335289031267166
Avg. Reward: 0.6855467574487164, sample_ratio: 0.9, weight_bc_loss: 1.0


                                                                   

save model 2_best, smoothed reward: 0.6516050405281251


                                                                   

save model 2_best, smoothed reward: 0.6506029021482417


                                                                   

save model 2_best, smoothed reward: 0.6404439342683728


                                                                   

save model 2_best, smoothed reward: 0.6299859062597577


                                                                   

save model 2_best, smoothed reward: 0.6148670050288412


                                                                   

save model 2_best, smoothed reward: 0.5973876328707306


                                                                   

save model 2_best, smoothed reward: 0.5768924245970497


                                                                   

save model 2_best, smoothed reward: 0.5579943449207593


                                                                   

save model 2_best, smoothed reward: 0.5369318369939347


                                                                   

save model 2_best, smoothed reward: 0.5217790586360496
Ld: 0.1147314228862524, Lq: 0.5593702208995819, Le: -1.5168315029144288, loss_Q: 0.006549462876282632
Avg. Reward: 0.5683230318324454, sample_ratio: 0.8, weight_bc_loss: 1.0


                                                                   

save model 2_best, smoothed reward: 0.5037523685019714


                                                                   

save model 2_best, smoothed reward: 0.49402947296158756


                                                                   

save model 2_best, smoothed reward: 0.49039764740676245


                                                                   

save model 2_best, smoothed reward: 0.48272279955552255


                                                                   

save model 2_best, smoothed reward: 0.47850745162461006


                                                                   

save model 2_best, smoothed reward: 0.478159812583816


                                                                   

save model 2_best, smoothed reward: 0.47251019055815957


                                                                   

save model 2_best, smoothed reward: 0.4697008775990287


                                                                   

save model 2_best, smoothed reward: 0.46817070432007957
Ld: 0.09086511887609959, Lq: 0.5628041416406632, Le: -1.5055652642250061, loss_Q: 0.006181628522463143
Avg. Reward: 0.47333907797234476, sample_ratio: 0.7, weight_bc_loss: 1.0


                                                                   

save model 2_best, smoothed reward: 0.46624594716717815


                                                                   

save model 2_best, smoothed reward: 0.465252487910678


                                                                   

save model 2_best, smoothed reward: 0.4645668174675576


                                                                   

save model 2_best, smoothed reward: 0.46300049751512595


                                                                   

Ld: 0.096280677318573, Lq: 0.5631649625301361, Le: -1.5044450294971465, loss_Q: 0.006656128149479627
Avg. Reward: 0.46813598360068465, sample_ratio: 0.6, weight_bc_loss: 1.0


                                                                   

save model 2_best, smoothed reward: 0.4619810915991472


                                                                   

save model 2_best, smoothed reward: 0.4604558237654634


                                                                   

save model 2_best, smoothed reward: 0.4601195551507026


                                                                   

save model 2_best, smoothed reward: 0.45907194682511043


                                                                   

Ld: 0.0813717332482338, Lq: 0.56294852912426, Le: -1.5107044208049774, loss_Q: 0.00626516836695373
Avg. Reward: 0.4597045086841076, sample_ratio: 0.5, weight_bc_loss: 1.0


                                                                   

save model 2_best, smoothed reward: 0.45534606335554645
1.0 1.0
Expert's Reward: 0.5456954836845398
state_dim: 20, action_dim: 20


                                                                   

Ld: 0.7450529325008393, Lq: 0.21479694291949272, Le: -1.4243397259712218, loss_Q: 0.012961568860337138
Avg. Reward: 0.8175662878018224, sample_ratio: 0.01, weight_bc_loss: 1.0


                                                                   

save model 1_best, smoothed reward: 0.8012715343647693


                                                                   

save model 1_best, smoothed reward: 0.7985772973608708


                                                                   

save model 1_best, smoothed reward: 0.7965378708231561


                                                                   

save model 1_best, smoothed reward: 0.795170850427019


                                                                   

Ld: 0.672681006193161, Lq: 0.20354262694716455, Le: -1.4483337306976318, loss_Q: 0.013699768055230379
Avg. Reward: 0.8006965074548102, sample_ratio: 0.01, weight_bc_loss: 1.0


                                                                   

Ld: 0.6237317222356796, Lq: 0.2213130636513233, Le: -1.4496252143383026, loss_Q: 0.008401442263275384
Avg. Reward: 0.8054982393975365, sample_ratio: 0.01, weight_bc_loss: 1.0


                                                                   

save model 1_best, smoothed reward: 0.7916881890085427


                                                                   

Ld: 0.612917817234993, Lq: 0.20658849105238913, Le: -1.455633214712143, loss_Q: 0.011586584812030197
Avg. Reward: 0.7958948295950791, sample_ratio: 0.01, weight_bc_loss: 1.0


                                                                   

Ld: 0.5854059863090515, Lq: 0.21645181849598885, Le: -1.4565209531784058, loss_Q: 0.005785105619579554
Avg. Reward: 0.7947568957173654, sample_ratio: 0.01, weight_bc_loss: 1.0


                                                                   

save model 1_best, smoothed reward: 0.7886921118217045
Expert's Reward: 0.5470169186592102
state_dim: 20, action_dim: 20


                                                                   

Ld: 0.7160496824979782, Lq: 0.1936461801826954, Le: -1.428456153869629, loss_Q: 0.010425377739593386
Avg. Reward: 0.8136931540701143, sample_ratio: 0.01, weight_bc_loss: 1.0


                                                                   

save model 2_best, smoothed reward: 0.8028981641627139


                                                                   

save model 2_best, smoothed reward: 0.8026022805973383


                                                                   

save model 2_best, smoothed reward: 0.7984437344239633


                                                                   

save model 2_best, smoothed reward: 0.7948767612884857


                                                                   

Ld: 0.667856330871582, Lq: 0.19449494153261185, Le: -1.446903153657913, loss_Q: 0.013177498951554298
Avg. Reward: 0.8031119530979455, sample_ratio: 0.01, weight_bc_loss: 1.0


                                                                   

Ld: 0.6245024979114533, Lq: 0.208167916983366, Le: -1.445963156223297, loss_Q: 0.007713131625205278
Avg. Reward: 0.8058262554733217, sample_ratio: 0.01, weight_bc_loss: 1.0


                                                                   

save model 2_best, smoothed reward: 0.7947547820627563
Ld: 0.6189647769927978, Lq: 0.20422838404774665, Le: -1.4548823392391206, loss_Q: 0.010500474302098155
Avg. Reward: 0.8007648799756076, sample_ratio: 0.01, weight_bc_loss: 1.0


                                                                   

save model 2_best, smoothed reward: 0.7942729988863281


                                                                   

Ld: 0.5844037967920304, Lq: 0.21517769902944564, Le: -1.4566529774665833, loss_Q: 0.005119274561293423
Avg. Reward: 0.8033784656840194, sample_ratio: 0.01, weight_bc_loss: 1.0


                                                                   

dataset_train_config_idx: 4
dataset_test_config_idx: 6
env_config_idx: 2
Environment Configuration
Number of Users:        8
Window Length:          200
Dataflow:               thumb_bk
Sigmoid K List:         [0.1, 0.2, 0.3, 0.4, 0.5]
Sigmoid S List:         [10.0, 10.0, 10.0, 10.0, 10.0]
Resource Bar:           5
Bandwidth:              100
Sub Agents:             [[1, 1]]
User Map:               [[0, 1, 2, 3], [4, 5, 6, 7]]
Dataset Configuration
Number of Users:        8
Window Length:          200
N_aggregation:          4
Dataflow:               thumb_bk
Random Seed:            999
Resource Bar:           5
Bandwidth:              100
Sigmoid K List:         [0.3]
Sigmoid S List:         [10.0]
Sub Agents:             [[1, 1]]
User Map:               [[0, 1, 2, 3], [4, 5, 6, 7]]
Avg. packet loss rate: 0.3505504511341419
length of dataset: 10000
1.0 1.0
Expert's Reward: 0.6494050025939941
state_dim: 8, action_dim: 8


                                                                   

Ld: 0.16470880106091498, Lq: 0.6483861792087555, Le: -1.572875679731369, loss_Q: 0.01321479163132608
Avg. Reward: 0.45027360889103507, sample_ratio: 0.9, weight_bc_loss: 1.0


                                                                   

save model 1_best, smoothed reward: 0.4224675709843244


                                                                   

save model 1_best, smoothed reward: 0.4115592401758254


                                                                   

save model 1_best, smoothed reward: 0.40816642815875986


                                                                   

save model 1_best, smoothed reward: 0.3993367127998323


                                                                   

save model 1_best, smoothed reward: 0.3967561578758533


                                                                   

Ld: 0.1625543111562729, Lq: 0.6561079388856887, Le: -1.5623593938350677, loss_Q: 0.013688402101397514
Avg. Reward: 0.3984661172272385, sample_ratio: 0.8, weight_bc_loss: 1.0


                                                                   

save model 1_best, smoothed reward: 0.3942073796150251


                                                                   

save model 1_best, smoothed reward: 0.39060989189592155


                                                                   

save model 1_best, smoothed reward: 0.3900969548826875


                                                                   

save model 1_best, smoothed reward: 0.38700890996927917


                                                                   

save model 1_best, smoothed reward: 0.38700263722142014


                                                                   

save model 1_best, smoothed reward: 0.3813289805780057


                                                                   

save model 1_best, smoothed reward: 0.38012636866670346


                                                                   

Ld: 0.14444871924817562, Lq: 0.6637109023332596, Le: -1.5692272889614105, loss_Q: 0.01317181734368205
Avg. Reward: 0.3846066570214943, sample_ratio: 0.7, weight_bc_loss: 1.0


                                                                   

save model 1_best, smoothed reward: 0.37688906452644344


                                                                   

save model 1_best, smoothed reward: 0.37462041425359405


                                                                   

save model 1_best, smoothed reward: 0.3734540312214192


                                                                   

save model 1_best, smoothed reward: 0.37207098992531923
Ld: 0.14874487139284612, Lq: 0.6671472465991974, Le: -1.5599465596675872, loss_Q: 0.01427885794080794
Avg. Reward: 0.37571546712420967, sample_ratio: 0.6, weight_bc_loss: 1.0


                                                                   

save model 1_best, smoothed reward: 0.3709066615930072


                                                                   

save model 1_best, smoothed reward: 0.3708375805293632


                                                                   

save model 1_best, smoothed reward: 0.36576053974120404


                                                                   

save model 1_best, smoothed reward: 0.36518082865207313


                                                                   

Ld: 0.13539067819714545, Lq: 0.6714966928958893, Le: -1.5770558393001557, loss_Q: 0.01370577784255147
Avg. Reward: 0.3742075753836821, sample_ratio: 0.5, weight_bc_loss: 1.0


                                                                   

Expert's Reward: 0.64894700050354
state_dim: 8, action_dim: 8


                                                                   

Ld: 0.16720750384032726, Lq: 0.6487921226024628, Le: -1.5709783244132995, loss_Q: 0.01320825157687068
Avg. Reward: 0.4726992898134597, sample_ratio: 0.9, weight_bc_loss: 1.0


                                                                   

save model 2_best, smoothed reward: 0.4342699737270564


                                                                   

save model 2_best, smoothed reward: 0.4241904685659872


                                                                   

save model 2_best, smoothed reward: 0.4198936310205631


                                                                   

save model 2_best, smoothed reward: 0.4170678570717025


                                                                   

save model 2_best, smoothed reward: 0.411179880521589


                                                                   

save model 2_best, smoothed reward: 0.4074486543855651


                                                                   

save model 2_best, smoothed reward: 0.4012357119151716


                                                                   

save model 2_best, smoothed reward: 0.39640196222619156


                                                                   

save model 2_best, smoothed reward: 0.3942556077882588
Ld: 0.16435579255223273, Lq: 0.6597010242938995, Le: -1.5599449837207795, loss_Q: 0.013421860039234162
Avg. Reward: 0.4079006198368635, sample_ratio: 0.8, weight_bc_loss: 1.0


                                                                   

save model 2_best, smoothed reward: 0.3912336893938969


                                                                   

save model 2_best, smoothed reward: 0.38587902489543247


                                                                   

save model 2_best, smoothed reward: 0.3821044674588859


                                                                   

save model 2_best, smoothed reward: 0.3798863712813355


                                                                   

Ld: 0.14122552178800107, Lq: 0.6648060762882233, Le: -1.5627405297756196, loss_Q: 0.013633838398382067
Avg. Reward: 0.3859097071364851, sample_ratio: 0.7, weight_bc_loss: 1.0


                                                                   

save model 2_best, smoothed reward: 0.37484211003619317


                                                                   

save model 2_best, smoothed reward: 0.3720145460149321


                                                                   

save model 2_best, smoothed reward: 0.36682492286057355


                                                                   

save model 2_best, smoothed reward: 0.3652364524599401


                                                                   

Ld: 0.15271556250751017, Lq: 0.6715172219276428, Le: -1.5478014719486237, loss_Q: 0.013747283788397908
Avg. Reward: 0.367626114372105, sample_ratio: 0.6, weight_bc_loss: 1.0


                                                                   

Ld: 0.13206755436956882, Lq: 0.6735185754299163, Le: -1.572035036087036, loss_Q: 0.013477920647710561
Avg. Reward: 0.3724302612354486, sample_ratio: 0.5, weight_bc_loss: 1.0


                                                                   

1.0 1.0
Expert's Reward: 0.6500326991081238
state_dim: 8, action_dim: 8


                                                                   

Ld: 0.5711994227766991, Lq: 0.32145956307649615, Le: -1.4620800817012787, loss_Q: 0.0258020375482738
Avg. Reward: 0.6936043988097509, sample_ratio: 0.01, weight_bc_loss: 1.0


                                                                   

save model 1_best, smoothed reward: 0.6888765825157501


                                                                   

Ld: 0.5486816108226776, Lq: 0.32593744426965715, Le: -1.4642612040042877, loss_Q: 0.026449743621051312
Avg. Reward: 0.7057312251040839, sample_ratio: 0.01, weight_bc_loss: 1.0


                                                                   

Ld: 0.4935907021164894, Lq: 0.34621287137269974, Le: -1.4758582973480225, loss_Q: 0.025008902847766877
Avg. Reward: 0.6998628820458151, sample_ratio: 0.01, weight_bc_loss: 1.0


                                                                   

Ld: 0.4922317636013031, Lq: 0.35613473266363144, Le: -1.481677372455597, loss_Q: 0.02526073783636093
Avg. Reward: 0.7031650183465218, sample_ratio: 0.01, weight_bc_loss: 1.0


                                                                   

Ld: 0.4740194472670555, Lq: 0.3707504263520241, Le: -1.4836409711837768, loss_Q: 0.024618262834846975
Avg. Reward: 0.7198051183473133, sample_ratio: 0.01, weight_bc_loss: 1.0


                                                                   

Expert's Reward: 0.6479772925376892
state_dim: 8, action_dim: 8


                                                                   

Ld: 0.5626474606990814, Lq: 0.34851479798555374, Le: -1.4541610372066498, loss_Q: 0.024971038438379764
Avg. Reward: 0.6714063844479263, sample_ratio: 0.01, weight_bc_loss: 1.0


                                                                   

save model 2_best, smoothed reward: 0.6616453813793322


                                                                   

save model 2_best, smoothed reward: 0.6529981616017738


                                                                   

Ld: 0.5277039259672165, Lq: 0.3594167187809944, Le: -1.4705978214740754, loss_Q: 0.028238359205424786
Avg. Reward: 0.6664880353513354, sample_ratio: 0.01, weight_bc_loss: 1.0


                                                                   

Ld: 0.4712674355506897, Lq: 0.3830169740319252, Le: -1.4810447931289672, loss_Q: 0.02597428085282445
Avg. Reward: 0.6743303581433597, sample_ratio: 0.01, weight_bc_loss: 1.0


                                                                   

save model 2_best, smoothed reward: 0.6475215235718088


                                                                   

save model 2_best, smoothed reward: 0.6462002070571943


                                                                   

save model 2_best, smoothed reward: 0.6411160040893759


                                                                   

Ld: 0.47684049248695376, Lq: 0.39369056969881056, Le: -1.483207094669342, loss_Q: 0.028856995813548565
Avg. Reward: 0.6472742939235234, sample_ratio: 0.01, weight_bc_loss: 1.0


                                                                   

save model 2_best, smoothed reward: 0.6288006264179034


                                                                   

save model 2_best, smoothed reward: 0.6241230380502836


                                                                   

Ld: 0.44146458476781847, Lq: 0.41805588334798816, Le: -1.4867608428001404, loss_Q: 0.026293083671480418
Avg. Reward: 0.636039878538977, sample_ratio: 0.01, weight_bc_loss: 1.0


                                                                   

dataset_train_config_idx: 5
dataset_test_config_idx: 7
env_config_idx: 3
Environment Configuration
Number of Users:        20
Window Length:          200
Dataflow:               thumb_bk
Sigmoid K List:         [0.1, 0.2, 0.3, 0.4, 0.5]
Sigmoid S List:         [10.0, 10.0, 10.0, 10.0, 10.0]
Resource Bar:           5
Bandwidth:              200
Sub Agents:             [[1, 1, 1, 1, 1]]
User Map:               [[0, 1, 2, 3], [4, 5, 6, 7], [8, 9, 10, 11], [12, 13, 14, 15], [16, 17, 18, 19]]
Dataset Configuration
Number of Users:        20
Window Length:          200
N_aggregation:          4
Dataflow:               thumb_bk
Random Seed:            999
Resource Bar:           5
Bandwidth:              200
Sigmoid K List:         [0.3]
Sigmoid S List:         [10.0]
Sub Agents:             [[1, 1, 1, 1, 1]]
User Map:               [[0, 1, 2, 3], [4, 5, 6, 7], [8, 9, 10, 11], [12, 13, 14, 15], [16, 17, 18, 19]]
Avg. packet loss rate: 0.47515393810082424
length of dataset: 10000
1.0 1.0
Exper

                                                                   

Ld: 0.12748679995536805, Lq: 0.5241856390237808, Le: -1.5316669976711272, loss_Q: 0.007160748513415456
Avg. Reward: 0.6957527601765543, sample_ratio: 0.9, weight_bc_loss: 1.0


                                                                   

save model 1_best, smoothed reward: 0.665122315989898


                                                                   

save model 1_best, smoothed reward: 0.6591318269660731


                                                                   

save model 1_best, smoothed reward: 0.6557071399720718


                                                                   

save model 1_best, smoothed reward: 0.644367780705523


                                                                   

save model 1_best, smoothed reward: 0.6262242183805091


                                                                   

save model 1_best, smoothed reward: 0.6069450145940779


                                                                   

save model 1_best, smoothed reward: 0.5922178375769862


                                                                   

save model 1_best, smoothed reward: 0.5709833859470576


                                                                   

save model 1_best, smoothed reward: 0.5515821224243891


                                                                   

save model 1_best, smoothed reward: 0.5345496445693345
Ld: 0.12005731865763664, Lq: 0.5347440803050995, Le: -1.5257968783378602, loss_Q: 0.007739526247605682
Avg. Reward: 0.5803869314749217, sample_ratio: 0.8, weight_bc_loss: 1.0


                                                                   

save model 1_best, smoothed reward: 0.5227482209129901


                                                                   

save model 1_best, smoothed reward: 0.5113479767039851


                                                                   

save model 1_best, smoothed reward: 0.5015363832858417


                                                                   

save model 1_best, smoothed reward: 0.4979059984400698


                                                                   

save model 1_best, smoothed reward: 0.49315595816562446


                                                                   

save model 1_best, smoothed reward: 0.4896903662619767


                                                                   

save model 1_best, smoothed reward: 0.48488215225222403


                                                                   

save model 1_best, smoothed reward: 0.4842719481882478


                                                                   

save model 1_best, smoothed reward: 0.48262365751224084


                                                                   

Ld: 0.09701054267585278, Lq: 0.5398220348358155, Le: -1.5168010592460632, loss_Q: 0.0072562116011977195
Avg. Reward: 0.48819528850830246, sample_ratio: 0.7, weight_bc_loss: 1.0


                                                                   

save model 1_best, smoothed reward: 0.48247574988713443


                                                                   

Ld: 0.10357907861471176, Lq: 0.5433122348785401, Le: -1.5057309007644653, loss_Q: 0.007379288654774428
Avg. Reward: 0.48565413251248624, sample_ratio: 0.6, weight_bc_loss: 1.0


                                                                   

save model 1_best, smoothed reward: 0.4822749911605827
Ld: 0.08958023555576801, Lq: 0.5456569123268128, Le: -1.5219852662086486, loss_Q: 0.0070461912406608465
Avg. Reward: 0.48369149739991146, sample_ratio: 0.5, weight_bc_loss: 1.0


                                                                   

save model 1_best, smoothed reward: 0.4795780447187099
Expert's Reward: 0.5252515077590942
state_dim: 20, action_dim: 20


                                                                   

Ld: 0.1297121462225914, Lq: 0.5211267149448395, Le: -1.5280838358402251, loss_Q: 0.007688456648029387
Avg. Reward: 0.699063960207145, sample_ratio: 0.9, weight_bc_loss: 1.0


                                                                   

save model 2_best, smoothed reward: 0.6718743394836934


                                                                   

save model 2_best, smoothed reward: 0.665866836522229


                                                                   

save model 2_best, smoothed reward: 0.6521895727019145


                                                                   

save model 2_best, smoothed reward: 0.6323386499074318


                                                                   

save model 2_best, smoothed reward: 0.61395746215145


                                                                   

save model 2_best, smoothed reward: 0.5916007123948479


                                                                   

save model 2_best, smoothed reward: 0.5653974082042506


                                                                   

save model 2_best, smoothed reward: 0.5467341708603516


                                                                   

save model 2_best, smoothed reward: 0.5314062911113171


                                                                   

save model 2_best, smoothed reward: 0.5180152694161299
Ld: 0.11906079851090907, Lq: 0.5320814347267151, Le: -1.5253416657447816, loss_Q: 0.008168378798291086
Avg. Reward: 0.5659863657837898, sample_ratio: 0.8, weight_bc_loss: 1.0


                                                                   

save model 2_best, smoothed reward: 0.5087572490384057


                                                                   

save model 2_best, smoothed reward: 0.5044062901515587


                                                                   

save model 2_best, smoothed reward: 0.49934758743561636


                                                                   

save model 2_best, smoothed reward: 0.4926234956273548


                                                                   

save model 2_best, smoothed reward: 0.49174784929055704


                                                                   

save model 2_best, smoothed reward: 0.4886518140974195


                                                                   

save model 2_best, smoothed reward: 0.4848584523547418


                                                                   

save model 2_best, smoothed reward: 0.4806630243839974


                                                                   

Ld: 0.09503029882907868, Lq: 0.5379338300228119, Le: -1.532287724018097, loss_Q: 0.008012044499628246
Avg. Reward: 0.4870424626180793, sample_ratio: 0.7, weight_bc_loss: 1.0


                                                                   

Ld: 0.10361129753291606, Lq: 0.5410537242889404, Le: -1.5144742035865784, loss_Q: 0.007949847369454801
Avg. Reward: 0.4848631260732512, sample_ratio: 0.6, weight_bc_loss: 1.0


                                                                   

save model 2_best, smoothed reward: 0.48048907822335235


                                                                   

Ld: 0.08836083613336086, Lq: 0.5427813154458999, Le: -1.518298008441925, loss_Q: 0.007197132110595703
Avg. Reward: 0.4858762869829854, sample_ratio: 0.5, weight_bc_loss: 1.0


                                                                   

1.0 1.0
Expert's Reward: 0.5247440338134766
state_dim: 20, action_dim: 20


                                                                   

Ld: 0.625973169207573, Lq: 0.20457743868231773, Le: -1.4648551321029664, loss_Q: 0.012206145254895091
Avg. Reward: 0.8127810412022587, sample_ratio: 0.01, weight_bc_loss: 1.0


                                                                   

save model 1_best, smoothed reward: 0.8109187422323159


                                                                   

save model 1_best, smoothed reward: 0.8062920080889198


                                                                   

save model 1_best, smoothed reward: 0.8032102544915352


                                                                   

save model 1_best, smoothed reward: 0.7933145965072536


                                                                   

Ld: 0.5902589356899262, Lq: 0.19464780628681183, Le: -1.4672504830360413, loss_Q: 0.014826401807367802
Avg. Reward: 0.8035376636955103, sample_ratio: 0.01, weight_bc_loss: 1.0


                                                                   

Ld: 0.5911687970161438, Lq: 0.21480777531862258, Le: -1.4667750251293183, loss_Q: 0.008584497985430062
Avg. Reward: 0.8044114905201054, sample_ratio: 0.01, weight_bc_loss: 1.0


                                                                   

save model 1_best, smoothed reward: 0.7914731355964217
Ld: 0.5841464030742646, Lq: 0.20731898680329322, Le: -1.460693222284317, loss_Q: 0.010560192801058292
Avg. Reward: 0.7935209494961605, sample_ratio: 0.01, weight_bc_loss: 1.0


                                                                   

Ld: 0.5585286590456963, Lq: 0.21775216981768608, Le: -1.4617079484462738, loss_Q: 0.0049481724691577255
Avg. Reward: 0.8029515457717986, sample_ratio: 0.01, weight_bc_loss: 1.0


                                                                   

Expert's Reward: 0.525273323059082
state_dim: 20, action_dim: 20


                                                                   

Ld: 0.7089137786626816, Lq: 0.21504372760653495, Le: -1.4297530484199523, loss_Q: 0.00952867408748716
Avg. Reward: 0.8082789843929905, sample_ratio: 0.01, weight_bc_loss: 1.0


                                                                   

save model 2_best, smoothed reward: 0.7953569997074266


                                                                   

save model 2_best, smoothed reward: 0.7912123623956235


                                                                   

save model 2_best, smoothed reward: 0.7905443456087322


                                                                   

save model 2_best, smoothed reward: 0.7818874744575437


                                                                   

Ld: 0.6614537131786347, Lq: 0.21432434067130088, Le: -1.4481236815452576, loss_Q: 0.011510680122300982
Avg. Reward: 0.7945696607189896, sample_ratio: 0.01, weight_bc_loss: 1.0


                                                                   

Ld: 0.6083961999416352, Lq: 0.22709284350275993, Le: -1.4512690091133118, loss_Q: 0.006044489000923931
Avg. Reward: 0.7904928675751094, sample_ratio: 0.01, weight_bc_loss: 1.0


                                                                   

save model 2_best, smoothed reward: 0.7788381438283044
Ld: 0.591794530749321, Lq: 0.22064969539642335, Le: -1.4607233834266662, loss_Q: 0.008998885001055897
Avg. Reward: 0.7845525738993928, sample_ratio: 0.01, weight_bc_loss: 1.0


                                                                   

Ld: 0.5586224341392517, Lq: 0.23562367022037506, Le: -1.457375227212906, loss_Q: 0.0040179467387497425
Avg. Reward: 0.7903630243824957, sample_ratio: 0.01, weight_bc_loss: 1.0


                                                                   