In [1]:
import os
os.environ["KMP_DUPLICATE_LIB_OK"]="TRUE"

import numpy as np
import matplotlib.pyplot as plt
import pickle
import torch

from Configs import getEnvConfig, visualizeEnvConfig
from Helpers.DataSampler import ReplayBuffer, ReplayBufferHybrid
from Helpers.Visualization import MultiLivePlot
from Environment.EnvironmentSim import createEnv
from Helpers.EnvInterface import EnvInterface
from Helpers.Eval import eval

In [2]:
configIdx = 0
envParams = getEnvConfig(configIdx)
visualizeEnvConfig(envParams)
trafficDataParentPath = f'Datasets/TrafficDataset/TrafficData'
env = createEnv(envParams, trafficDataParentPath)
env.selectMode(mode="train", type="data")

Environment Configuration
Number of Users:        8
Window Length:          200
Dataflow:               thumb_fr
Sigmoid K List:         [0.1, 0.2, 0.3, 0.4, 0.5]
Sigmoid S List:         [10.0, 10.0, 10.0, 10.0, 10.0]
Resource Bar:           5
Bandwidth:              50
Sub Agents:             [[0, 0]]
User Map:               [[0, 1, 2, 3], [4, 5, 6, 7]]


In [3]:
with open(f'Datasets/OfflineDataset/subOptimalAgent_encConfig{configIdx}_{envParams["sub_agents_idx"]}.pkl', 'rb') as f:
    dataset_expert = pickle.load(f)
with open(f'Datasets/OfflineDataset/random_policy_8_users.pkl', 'rb') as f:
    dataset_random = pickle.load(f)

dataset_off = {
    'observations': dataset_expert['uRecord'],     
    'actions': dataset_expert['actionsRecord'], 
    'rewards': dataset_expert['rewardRecord'], 
    'next_observations': dataset_expert['uNextRecord']
}
print(f"Avg. packet loss rate: {np.mean(dataset_expert['rewardRecord'])}")
print(f"length of dataset: {len(dataset_off['observations'])}")

Avg. packet loss rate: 0.2857042178658874
length of dataset: 10000


In [4]:
hyperparams = {
    'N_diffusion_steps':30,
    'schedule_type': "vp",
    'abs_action_max': 1.0,
    'gamma': 0.99,
    'lr': 5e-3,
    'decay_lr': True,
    'weight_decay': 0.0,
    'num_critics': 12,
    'lcb_coef': 0.15,
    'q_sample_eta': 1.0,
    'weight_entropy_loss': 0.01,
    'weight_q_loss': 1.0,
    'approximate_action': True,
    'ema_tau': 0.001,
    'ema_period': 20,
    'ema_begin_update': 1000,
    'layer_norm': True,
    'grad_clip': 3.0,
    'device': 'cuda',
}

In [26]:
envInterface = EnvInterface(
    envParams, discrete_state=False,
    n_bits_state=2, base_state=200,
    n_bits_action=2, base_action=200,
)

r = np.random.uniform(0, envParams['B'], size=(1, 8))
a = envInterface.preprocess_action(r)
r_ = envInterface.postprocess_action(a)
print(r)
print(a)
print(r_)

u = np.random.uniform(0, envParams['LEN_window'], size=(1, 8))
u_ = envInterface.preprocess_state(u)
print(u)
print(u_)


[[15.22609634  1.63529003 45.7072439  18.2314975  44.48540138  1.23220172
  46.94356305 43.69457049]]
[[-0.39095615 -0.9345884   0.82828976 -0.2707401   0.77941606 -0.95071193
   0.87774252  0.74778282]]
[[15.22609634  1.63529003 45.7072439  18.2314975  44.48540138  1.23220172
  46.94356305 43.69457049]]
[[135.8310905   65.99705895 188.64967946  54.80144247 186.22326775
  158.22291623  81.73272067  77.76618722]]
[[ 0.3583109  -0.34002941  0.88649679 -0.45198558  0.86223268  0.58222916
  -0.18267279 -0.22233813]]
