-
Notifications
You must be signed in to change notification settings - Fork 1
/
control_agents.py
153 lines (124 loc) · 4.73 KB
/
control_agents.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
#!/usr/bin/env python3
# -*- coding: utf-8 -*-
"""
Definition of several types of control agents
Created on Oct 2022
@author: juanjosealcaraz
"""
# import time
# import numpy as np
# from itertools import product
# import matplotlib.pyplot as plt
from wrappers import to_discrete
import system.parameters as par
# configuration of the agents that control the system
# the names and indexes of the state and control elements
# are defined in parameter.py
nprach_actions = ['rar_window', 'mac_timer', 'transmax', 'panchor',
'period_C0', 'rep_C0', 'sc_C0',
'period_C1', 'rep_C1', 'sc_C1',
'period_C2', 'rep_C2', 'sc_C2',
'th_C1', 'th_C0']
agents_conf = [
{'id': 0, # UE selection
'action_items': ['id'], # action items controlled by this agent
'obs_items': ['total_ues', 'connection_time', 'loss', 'sinr', 'buffer', 'carrier_state'],
'next': 1, # next agent operating in the same nodeb state
'states': ['Scheduling'] # nodeb state where this agent operates
},
{'id': 1, # Imcs, N_rep selection
'action_items': ['Imcs', 'Nrep'],
'obs_items': [],
'next': 2,
'states': ['Scheduling']
},
{'id': 2, # carrier, delay and subcarriers
'action_items': ['carrier', 'delay', 'sc'],
'obs_items': [],
'next': -1,
'states': ['Scheduling']
},
{'id': 3, # ce_level selection
'action_items': ['carrier', 'ce_level', 'rar_Imcs', 'delay', 'sc', 'Nrep'],
'obs_items': [],
'next': -1,
'states': ['RAR_window']
},
{'id': 4, # backoff selection
'action_items': ['backoff'],
'obs_items': [],
'next': -1,
'states': ['RAR_window_end'],
},
{'id': 5, # NPRACH configuration
'action_items': nprach_actions,
'obs_items': [],
'next': -1,
'states': ['NPRACH_update']
}
]
# auxiliary functions for retrieving action indexes and max action values
def get_control_indexes(name_list):
return [par.control_items[name] for name in name_list]
def get_max_control_values(name_list):
return [par.control_max_values[name][par.N_carriers - 1] for name in name_list]
def get_control_default_values(name_list):
return [par.control_default_values[name] for name in name_list]
class DummyAgent:
'''dummy agent that simply applies a fixed action'''
def __init__(self, dict):
self.__dict__.update(dict)
action_items = self.action_items
self.a_mask = get_control_indexes(action_items)
self.a_max = get_max_control_values(action_items)
self.fixed_action = get_control_default_values(action_items)
self.total_steps = 0
def reset(self):
action_items = self.action_items
self.a_mask = get_control_indexes(action_items)
self.a_max = get_max_control_values(action_items)
self.fixed_action = get_control_default_values(action_items)
def set_action(self, dict_action_values):
for i, action_name in enumerate(self.action_items):
if action_name in dict_action_values.keys():
self.fixed_action[i] = dict_action_values[action_name]
def get_action(self, obs, r, info, action):
# action contains the action so far
# agents communicate using this argument
self.total_steps += 1
return self.fixed_action
def print_action(self):
for name, value in zip(self.action_items, self.fixed_action):
print(f' {name}: {value}')
print('')
class TrainedAgent(DummyAgent):
def __init__(self, dict, model, deterministic = False):
super().__init__(dict)
self.model = model
self.deterministic = deterministic
def get_action(self, obs, r, info, action):
action, _ = self.model.predict(obs, deterministic = self.deterministic)
return action
class DiscreteTrainedAgent(TrainedAgent):
'''
auxiliary class that encapsulates an rl agent that selects discrete actions but interacts with an envirominent with multidiscrete actions
'''
def __init__(self, dict, model, nvec, deterministic = False):
super().__init__(dict, model, deterministic)
self.actions = to_discrete(nvec)
def get_action(self, obs, r, info, action):
action, _ = self.model.predict(obs, deterministic = self.deterministic)
return self.actions[action]
class RandomUserAgent(DummyAgent):
'''agent that selects a user at random'''
def __init__(self, dict, rng):
super().__init__(dict)
self.rng = rng
def get_action(self, obs, r, info, action):
if 'ues' not in info:
return [0]
users = min(len(info['ues']), par.N_users)
if users == 0:
return [0]
selection = self.rng.integers(users)
return [selection]