### import required packages

In [None]:
import matplotlib
import matplotlib.pyplot as plt
import numpy as np
import os

import sys
if "../" not in sys.path:
  sys.path.append("../")

from lib.envs.slicing_env import SlicingEnvironment
from lib.agents import qlearning
from lib import utils

matplotlib.style.use('ggplot')

### configure the simulation

In [None]:
# set a random seed for reproducibility
np.random.seed(2021)

# number of DRL agent timesteps per episode 
max_episode_timesteps = 100

total_data_episodes = 1

# number of DRL agent episodes (for the sake of better results visulization)
total_episodes = 200

# qlearning or sarsa
agent_name = 'qlearning'
# agent_name = 'sarsa'

learning_type = 'non_accelerated'

# sigmoid reward function configurations
c1_volte = 0.5
c2_volte = 10
c1_urllc = 2
c2_urllc = 3
c1_video = 1
c2_video = 7

# q-learning agent configurations
discount_factor=0.3
alpha=0.1
epsilon=1
epsilon_decay=0.9
decay_steps=100

# policy reuse flag
loaded_qtable='no'

# slicing configurations
# number of users per slice in the following order: VoLTE, Video, URLLC
num_users = [int(46/4), int(46/4), int(8/4)]

poisson_volte = np.full((1, 200), 1)
poisson_video = np.full((1, 200), 1)
poisson_urllc = np.full((1, 200), 1)

max_num_users = [max(poisson_volte[0]), max(poisson_video[0]), max(poisson_urllc[0])]

num_users_poisson = [poisson_video[0], poisson_volte[0], poisson_urllc[0]]

max_size_per_tti = 40
max_num_packets = 0
max_traffic_percentage = 1
num_action_lvls = 15
num_slices = 3
sl_win_size = 40
time_quantum = 1
max_trans_per_tti = 6

### generate sample traffic data


In [None]:
traffic_df = utils.generate_data(max_num_users[0], max_num_users[1], 
                                 max_num_users[2], sl_win_size*max_episode_timesteps)
traffic_df = traffic_df.reset_index(drop=True)

### train the basic expert agents from scratch given the configured reward function weights

In [None]:
for i in range(0, 16):
    # set the weights of the reward function
    w_volte = utils.get_reward_weights(i)[0]
    w_urllc = utils.get_reward_weights(i)[1]
    w_video = utils.get_reward_weights(i)[2]
    
    # initialize the OpenAI gym-compatible environment using the configured simulation parameters
    enviro = SlicingEnvironment(traffic_df, max_num_packets, max_size_per_tti, num_action_lvls, 
                         num_slices, max_episode_timesteps, sl_win_size, time_quantum,total_data_episodes,
                         num_users_poisson, max_traffic_percentage, max_trans_per_tti, w_volte, w_urllc,
                            w_video, c1_volte, c1_urllc, c1_video, c2_volte, c2_urllc, c2_video)

    env = enviro
    
    # start the simulation using a q-learning agent 
    Q, stats = qlearning.q_learning(env=env, num_episodes=total_episodes, discount_factor=discount_factor,
                      alpha=alpha, epsilon=epsilon, epsilon_decay=epsilon_decay,
                      decay_steps=decay_steps, loaded_qtable=loaded_qtable)
    
    # log the trained agents' data
    dictionary = {'config': {'generic': {'max_episode_timesteps': max_episode_timesteps, 'total_episodes': total_episodes,
                         'agent_name': agent_name, 'max_size_per_tti': max_size_per_tti,
                         'max_traffic_percentage': max_traffic_percentage, 'num_action_lvls': num_action_lvls,
                         'num_slices': num_slices, 'sl_win_size': sl_win_size, 'max_trans_per_tti': max_trans_per_tti,
                         'w_volte': w_volte, 'w_urllc': w_urllc, 'w_video': w_video, 'c1_volte': c1_volte,
                         'c2_volte': c2_volte, 'c1_urllc': c1_urllc, 'c2_urllc': c2_urllc,
                         'c1_video': c1_video, 'c2_video': c2_video, 'learning_type': learning_type},
                         'agent_specific': {'discount_factor': discount_factor, 'alpha': alpha,
                                            'epsilon': epsilon, 'epsilon_decay': epsilon_decay,
                                            'decay_steps': decay_steps, 'loaded_qtable': loaded_qtable}
                        },
              'rewards': {'steps': env.step_rewards, 'episodes': list(stats[1])},
              'qtable':dict(Q),
              'KPIs': {'delay': env.total_avg_waiting_times,
                       'throughput': env.total_throughputs, 'finished_throughput': env.finished_throughputs,
                       'remaining_sizes_sum': env.remaining_sizes_sum, 'remaining_sizes': env.remaining_sizes,
                       'remaining_times_sum': env.remaining_times_sum, 'remaining_times': env.remaining_times,
                       'total_p_numbers': env.total_p_numbers, 'done_p_numbers': env.done_p_numbers
                     }}

    # save training data to file
    path = 'saved_models/base/'
    if not os.path.exists(path):
      # create a new directory because it does not exist 
      os.makedirs(path)
    file_name = path + str(learning_type) + '_' + str(agent_name) + '_' + str(int(w_volte*100)) + \
                str(int(w_urllc*100)) + str(int(w_video*100)) + '_' + str(alpha) + '_' + str(epsilon) + '_' + str(epsilon_decay) + '_' + str(total_episodes) + 'ep.npy'
    np.save(file_name, dictionary)