# Documentation of the NB_IoT environment

This notebook explains how to use the NB_IoT environment.

## System Creation with ```create_system```

The arguments of this function are:
* ```rng```: an random generator created with ```default_rng``` from ```numpy.random```
* ```conf```: a dictionary with the system configuration

In [1]:
# import system creation function
from system.system_creator import create_system

# random number generator object
from numpy.random import default_rng

Define the configuration

In [2]:
# simulator configuration
conf = {
    'animate_carrier': True, # to generate an animation of the carrier occupation
    'ratio': 1, # ratio of uniform/beta traffic
    'M': 1000, # number of UEs
    'buffer_range': [100, 400], # range for the number of bits in the UE buffer
    'reward_criteria': 'users', # there are multiple criteria defined in perf_monitor.py
    'statistics': True, # to store historical data for statistical evaluation
    'animate_stats': False, # to generate an animation of the statsitics over time
    'sc_adjustment': True, # to automatically adjust the number of subcarriers
    'mcs_automatic': True, # to autimatically select mcs and Nrep
    'tx_all_buffer': True
    }

In [3]:
# create random number generator
rng = default_rng(seed = 827)

# create system
node, perf_monitor, population, carrier = create_system(rng, conf)

We can run a step-by-step simulation of the system as follows

In [None]:
# this auxiliary function generates a simple default action
from system.utils import generate_reasonable_action

In [5]:
import system.parameters as par

def generate_reasonable_action():
    action = [0]*23
    for name, value in par.control_default_values.items():
        index = par.control_items[name]
        action[index] = value
    return action

In [6]:
# generate initial action
action = generate_reasonable_action()

print(action)

# reset the system
info = node.reset()

# simulation loop
n = 0
while node.time < 800:
    n += 1
    r, Done, info = node.step(action) 
    if n % 10 == 0:
        print(f'event: {n}, time {node.time}, reward: {r}')

[0, 1, 1, 4, 0, 3, 1, 2, 6, 2, 4, 12, 3, 0, 1, 2, 0, 1, 3, 2, 1, 4, 8]
event: 10, time 423, reward: 0
event: 20, time 663, reward: 3


## Video Generation

In [7]:
# we can generate an animation of the carrier resource occupation process
movie_name = "1_nbiot_carrier_animation"
carrier.generate_movie(movie_name = movie_name)

frames/*png
movies/1_nbiot_carrier_animation.mp4


ffmpeg version 4.2.2 Copyright (c) 2000-2019 the FFmpeg developers
  built with Apple clang version 11.0.0 (clang-1100.0.33.17)
  configuration: --prefix=/usr/local/Cellar/ffmpeg/4.2.2_2 --enable-shared --enable-pthreads --enable-version3 --enable-avresample --cc=clang --host-cflags= --host-ldflags= --enable-ffplay --enable-gnutls --enable-gpl --enable-libaom --enable-libbluray --enable-libmp3lame --enable-libopus --enable-librubberband --enable-libsnappy --enable-libtesseract --enable-libtheora --enable-libvidstab --enable-libvorbis --enable-libvpx --enable-libwebp --enable-libx264 --enable-libx265 --enable-libxvid --enable-lzma --enable-libfontconfig --enable-libfreetype --enable-frei0r --enable-libass --enable-libopencore-amrnb --enable-libopencore-amrwb --enable-libopenjpeg --enable-librtmp --enable-libspeex --enable-libsoxr --enable-videotoolbox --disable-libjack --disable-indev=jack
  libavutil      56. 31.100 / 56. 31.100
  libavcodec     58. 54.100 / 58. 54.100
  libavformat   

In [8]:
# imports for video insertion in the notebook
import io
import base64
from IPython.display import HTML

In [9]:
# code for inserting the video
video = io.open(f'./movies/{movie_name}.mp4', 'r+b').read()
encoded = base64.b64encode(video)
HTML(data='''
    <video width="500" height="auto" alt="test" controls><source src="data:video/mp4;base64,{0}" type="video/mp4" /></video>'''
.format(encoded.decode('ascii')))

## Agent Creation

Agents inherit from the ```DummyAgent``` class which provides the basic atributes:
* ```action_items```: list with the action items controlled by the agent
* ```obs_items```: list with the state items observed by the agent
* ```next```: integer pointing to the id of the next agent to take over in the same state
* ```states```: list with the Node B states in which the agent operates

In [12]:
from control_agents import DummyAgent

# agent configurations:
agent_0 = {
    'id': 0, # UE Imcs and Nrep selection
    'action_items': ['id', 'Imcs', 'Nrep', 'carrier', 'delay', 'sc'], # action items controlled by this agent
    # 'obs_items': ['total_ues', 'connection_time', 'loss', 'sinr', 'buffer', 'carrier_state'], # state indexes observed by this agent
    'obs_items': [],
    'next': -1, # next agent operating in the same nodeb state
    'states': ['Scheduling'] # nodeb state where this agent operates 
    }

agent_1 = {
    'id': 1, # ce_level selection
    'action_items': ['ce_level', 'rar_Imcs', 'Nrep'],
    'obs_items': [],
    'next': -1,
    'states': ['RAR_window']
}

agent_2 = {
    'id': 2, # RA parameters selection
    'action_items': ['rar_window', 'mac_timer', 'transmax', 'panchor', 'backoff'],
    'obs_items': [],
    'next': -1,
    'states': ['RAR_window_end'],
}

agent_3 = {
    'id': 3, # NPRACH configuration
    'action_items': ['th_C1', 'th_C0', 'sc_C0', 'sc_C1', 'sc_C2', 'period_C0', 'period_C1', 'period_C2'],
    'obs_items': ['detection_ratios', 'colision_ratios', 'msg3_detection', 'NPRACH_occupation', 'av_delay', 'distribution'],
    'next': -1,
    'states': ['NPRACH_update']
}

# agents are arranged in a list ordered by their id attribute
agents = [
    DummyAgent(agent_0),
    DummyAgent(agent_1),
    DummyAgent(agent_2),
    DummyAgent(agent_3)
]

## Controller Creation

The Controller class creates an object that orchestrates the list of agents to operate the system.

Two attributes are required:
* the controlled system
* the list of agents

In [13]:
# import system creation and controller
from controller import Controller

ENV_STATISTICS = True
ENV_TRACES = True

# simulator configuration
conf = {
    'statistics': ENV_STATISTICS, # to store historical data for statistical evaluation
    'traces': ENV_TRACES,
    'ratio': 1.0, # ratio of uniform/beta traffic
    'M': 1000, # number of UEs
    'buffer_range': [100, 600], # range for the number of bits in the UE buffer
    'reward_criteria': 'throughput', # users served
    }

# create random number generator
rng = default_rng(seed = 2)

# create system
node, perf_monitor, _, _ = create_system(rng, conf)

# create the controller
controller = Controller(node, agents = agents)

# reset the environment
_ = controller.reset()

## Learning with Stable-Baselines Agents

One of the agents in the list can be replaced by an external agent provided by stable baselines

For example, agent 3 is in charge of the RSRP thresholds (that determine the coverage of the CE levels) and the NPRACH parameters (n_sc and periodicity) of the three CE levels.

The next cells create an RL agent that takes over agent 3 responsibilities, using the Stable-Baselines3 package.

In [14]:
from stable_baselines3 import A2C # DQN, PPO
from stable_baselines3.common.env_util import make_vec_env
from wrappers import NPRACH_wrapper_traces, DiscreteActions
import gymnasium as gym

# set external agent specifying the index
controller.set_ext_agent(3)
print(' > External agent configured')
print(' ')

# create the gym environment
nbiot_env = gym.make('gym_system:System-v1', system = controller)
print(' > gym environment created')
print(' ')

# wrap the environment
metrics = ['departures', 'NPRACH_occupation', 'service_times']
nbiot_env = NPRACH_wrapper_traces(nbiot_env, metrics, verbose = False, n_report = 1)
print(nbiot_env.action_space)

## ONLY FOR DQN
# nbiot_env = DiscreteActions(nbiot_env) 
# print(nbiot_env.action_space)
print(' > environment wrapped')
print(' ')

# prepare the agent
env = make_vec_env(lambda: nbiot_env, n_envs=1)
print(' > vectorised environment created')
print(' ')

# # check if compatibility of the environment
# from stable_baselines3.common import env_checker
# env_checker.check_env(nbiot_env)

 > External agent configured
 
 > gym environment created
 
MultiDiscrete([78  4  4  4  6  6  6])
 > environment wrapped
 
 > vectorised environment created
 


In [15]:
# create the agent 
model = A2C('MlpPolicy', env, verbose=0, seed = 21)
print(' > Model created!')
print(' ')

# determine time_steps
steps = 5_000

# and learn
model.learn(total_timesteps = steps)
print(' > Learning completed!')
print(' ')

 > Model created!
 
 > Learning completed!
 


In [16]:
from plot_traces import plot_metrics

In [None]:
plot_metrics(nbiot_env)

## Control with Model Based Agent + RL

Here we use a MBRL agent as agent 3, i.e. to control RSRP thresholds and NPRACH parameters.

In [18]:
# agents.py defines DummyAgent and a list of actions for NPRACH control
from control_agents import DummyAgent
# import system creation and controller
from system.system_creator import create_system
from numpy.random import default_rng
from controller import Controller
from stable_baselines3 import A2C # SAC #, PPO
from stable_baselines3.common.env_util import make_vec_env
from wrappers import NPRACH_agent_wrapper
from agent_nprach import NPRACH_THAgent
import gymnasium as gym

# agent configurations:

agent_0 = {
    'id': 0, # UE Imcs and Nrep selection
    'action_items': ['id', 'Imcs', 'Nrep', 'carrier', 'delay', 'sc'], # action items controlled by this agent
    # 'obs_items': ['total_ues', 'connection_time', 'loss', 'sinr', 'buffer', 'carrier_state'], # state indexes observed by this agent
    'obs_items': [],
    'next': -1, # next agent operating in the same nodeb state
    'states': ['Scheduling'] # nodeb state where this agent operates 
    }

agent_1 = {
    'id': 1, # ce_level selection
    'action_items': ['ce_level', 'rar_Imcs', 'Nrep'],
    'obs_items': [],
    'next': -1,
    'states': ['RAR_window']
}

agent_2 = {
    'id': 2, # RA parameters selection
    'action_items': ['rar_window', 'mac_timer', 'transmax', 'panchor', 'backoff'],
    'obs_items': [],
    'next': -1,
    'states': ['RAR_window_end'],
}

agent_3 = {
    'id': 3, # NPRACH configuration
    'action_items': ['th_C1', 'th_C0', 'sc_C0', 'sc_C1', 'sc_C2', 'period_C0', 'period_C1', 'period_C2'],
    'obs_items': ['detection_ratios', 'colision_ratios', 'msg3_detection', 'NPRACH_occupation', 'av_delay', 'distribution'],
    'next': -1,
    'states': ['NPRACH_update']
}

# agents are arranged in a list ordered by their id attribute
agents = [
    DummyAgent(agent_0),
    DummyAgent(agent_1),
    DummyAgent(agent_2),
    DummyAgent(agent_3)
]

ENV_STATISTICS = False
ENV_TRACES = False

# simulator configuration
conf = {
    'statistics': ENV_STATISTICS, # to store historical data for statistical evaluation
    'traces': ENV_TRACES,
    'ratio': 1.0, # ratio of uniform/beta traffic
    'M': 1000, # number of UEs
    'buffer_range': [100, 600], # range for the number of bits in the UE buffer
    'reward_criteria': 'throughput', # users served
    }

# create random number generator
rng = default_rng(seed = 8) #233

# create system
node, perf_monitor, _, _ = create_system(rng, conf)

# create the controller
controller = Controller(node, agents = agents)

# reset the environment
_ = controller.reset()

# set external agent specifying the index
controller.set_ext_agent(3)
print(' > External agent configured')
print(' ')

NO_TH = False
VERBOSE = False

metrics = ['departures', 'NPRACH_occupation', 'service_times', 'beta']
# create the intermediate agent
agent = NPRACH_THAgent(agent_3, metrics, verbose = VERBOSE, no_th = NO_TH)

# create the gym environment
nbiot_e = gym.make('gym_system:System-v1', system = controller)

# wrap the environment with the intermediate agent
observation = [0,1,2,3,4,5,9,10,11,12]
nbiot_e = NPRACH_agent_wrapper(nbiot_e, agent, n_actions = 26, obs_items = observation, bounds = [0.3, 2.0])

print(nbiot_e.action_space)
print(' > environment wrapped')
print(' ')

# prepare the agent
env = make_vec_env(lambda: nbiot_e, n_envs=1)
print(' > vectorised environment created')
print(' ')

# determine time_steps
steps = 5_000

# # create the agent
model = A2C('MlpPolicy', env, verbose = 0, ent_coef = 0.01, seed = 321)
print(' > Model created!')
print(' ')

 > External agent configured
 
Discrete(26)
 > environment wrapped
 
 > vectorised environment created
 
 > Model created!
 


In [None]:
# and learn
model.learn(total_timesteps = steps, reset_num_timesteps=False)
print(' > Learning completed!')
print(' ')

In [None]:
from plot_traces import plot_metrics

plot_metrics(nbiot_e)