# # TL DQN - Adaptation phase
* shared network: 32, 32, 15
* learner alpha=0.01

### import required packages

In [4]:
import matplotlib
import matplotlib.pyplot as plt
import numpy as np
import os

import sys
if "../" not in sys.path:
    sys.path.append("../")

from lib.envs.slicing_env import SlicingEnvironment
from lib.agents import dqn 
from lib import utils

matplotlib.style.use('ggplot')

### configure the simulation

In [5]:
##################### configure the simulation ##################### 

# set a random seed for reproducibility
np.random.seed(2023)

# number of DRL agent timesteps per episode 
max_episode_timesteps = 100

total_data_episodes = 1

# number of DRL agent episodes (for the sake of better results visulization)
total_episodes = 50

# qlearning or sarsa
# agent_name = 'qlearning'
agent_name = 'dqn'

learning_type = 'accelerated'
loaded_learning_type = 'non_accelerated'

# sigmoid reward function configurations
c1_volte = 0.5
c2_volte = 10
c1_urllc = 2
c2_urllc = 3
c1_video = 1
c2_video = 7

# q-learning agent configurations
discount_factor=0.3
alpha=0.01
epsilon=0.1 
epsilon_decay=0.5 
decay_steps=10 
exploration = 'e_greedy'

# slicing configurations
# number of users per slice in the following order: VoLTE, Video, URLLC
num_users = [int(46/4), int(46/4), int(8/4)]

poisson_volte = np.full((1, 200), 1)
poisson_video = np.full((1, 200), 1)
poisson_urllc = np.full((1, 200), 1)

max_num_users = [max(poisson_volte[0]), max(poisson_video[0]), max(poisson_urllc[0])]

num_users_poisson = [poisson_video[0], poisson_volte[0], poisson_urllc[0]]

max_size_per_tti = 40
max_num_packets = 0
max_traffic_percentage = 1
num_action_lvls = 15
num_slices = 3
sl_win_size = 40
time_quantum = 1
max_trans_per_tti = 6

### generate sample traffic data

In [6]:
traffic_df = utils.generate_data(max_num_users[0], max_num_users[1], 
                                 max_num_users[2], sl_win_size*max_episode_timesteps)
traffic_df = traffic_df.reset_index(drop=True)

## Adaptation phase

In [None]:
base_path = os.getcwd()+'/saved_models/base/'

# train learner agents using the first batch of reward function weights
order = 0

for i in range(0, 10):
    # set the weights of the learner agent's reward function
    w_volte = utils.get_reward_weights_acc(i, order)[0]
    w_urllc = utils.get_reward_weights_acc(i, order)[1]
    w_video = utils.get_reward_weights_acc(i, order)[2]       
    
    for j in range(0, 16):
        # set the weights of the expert agent's reward function
        by_w_volte = utils.get_reward_weights(j)[0]
        by_w_urllc = utils.get_reward_weights(j)[1]
        by_w_video = utils.get_reward_weights(j)[2]
        print('i=%d: w_volte %f, w_urllc %f, w_video %f' %(i, w_volte, w_urllc, w_video))
        print('j=%d: by_w_volte %f, by_w_urllc %f, by_w_video %f' %(j,by_w_volte, by_w_urllc, by_w_video))
        
        by_net_params = base_path + 'net.params_' + \
                        str(exploration) +'_'+ \
                        str(loaded_learning_type) + '_' +  \
                        str(agent_name) + '_' + \
                        str(int(by_w_volte*100)) + str(int(by_w_urllc*100)) +  str(int(by_w_video*100))

        # initialize the OpenAI gym-compatible environment using the configured simulation parameters
        enviro = SlicingEnvironment(traffic_df, max_num_packets, max_size_per_tti, num_action_lvls, 
                             num_slices, max_episode_timesteps, sl_win_size, time_quantum,total_data_episodes,
                             num_users_poisson, max_traffic_percentage, max_trans_per_tti, w_volte, w_urllc,
                                w_video, c1_volte, c1_urllc, c1_video, c2_volte, c2_urllc, c2_video)

        env = enviro

        # start the simulation using a q-learning agent 
        qnet, stats = dqn.dqn(env,
                      num_episodes=total_episodes,
                      exploration=exploration,
                      gamma=discount_factor,
                      lr=alpha,
                      epsilon=epsilon,
                      epsilon_decay=epsilon_decay,
                      decay_steps=decay_steps,
                      loaded_qnet=by_net_params)
        
        # log the trained agents' data
        dictionary = {'config': {'generic': {'max_episode_timesteps': max_episode_timesteps, 'total_episodes': total_episodes,
                             'agent_name': agent_name, 'max_size_per_tti': max_size_per_tti,
                             'max_traffic_percentage': max_traffic_percentage, 'num_action_lvls': num_action_lvls,
                             'num_slices': num_slices, 'sl_win_size': sl_win_size, 'max_trans_per_tti': max_trans_per_tti,
                             'w_volte': w_volte, 'w_urllc': w_urllc, 'w_video': w_video, 'by_w_volte': by_w_volte, 
                             'by_w_urllc': by_w_urllc, 'by_w_video': by_w_video,
                             'c1_volte': c1_volte,'c2_volte': c2_volte, 'c1_urllc': c1_urllc, 'c2_urllc': c2_urllc,
                             'c1_video': c1_video, 'c2_video': c2_video,
                             'learning_type': learning_type},
                             'agent_specific': {'discount_factor': discount_factor, 'alpha': alpha,
                                                'epsilon': epsilon, 'epsilon_decay': epsilon_decay,
                                                'decay_steps': decay_steps, 'loaded_qnet': by_net_params}
                            },
                  'rewards': {'steps': env.step_rewards, 'episodes': list(stats[1])},
                  'KPIs': {'delay': env.total_avg_waiting_times,
                           'throughput': env.total_throughputs, 'finished_throughput': env.finished_throughputs,
                           'remaining_sizes_sum': env.remaining_sizes_sum, 'remaining_sizes': env.remaining_sizes,
                           'remaining_times_sum': env.remaining_times_sum, 'remaining_times': env.remaining_times,
                           'total_p_numbers': env.total_p_numbers, 'done_p_numbers': env.done_p_numbers
                         }}

        # save training data to file
        path = 'saved_models/accelerated/'
        if not os.path.exists(path):
            # create a new directory because it does not exist 
            os.makedirs(path)
        file_name = path + str(learning_type) + '_' + str(agent_name) + '_' + \
                    str(int(w_volte*100)) + str(int(w_urllc*100)) + str(int(w_video*100)) + '_by_' + \
                    str(int(by_w_volte*100)) + str(int(by_w_urllc*100)) + str(int(by_w_video*100)) + '_ep.npy'
        np.save(file_name, dictionary)