In [22]:
from array import array
import numpy as np
import random

In [31]:
class Env_WSN():
    ### Initialization ###
    def __init__(self, MAX_EP_STEPS,
                 BS_R_max,                   # Radius of BS's area
                 T_nodes,                    # Time transmit
                 Number_Nodes,   
                 Number_APs,
                 Number_sub,
                 P_node_max,
                 Bandwidth, 
                 Frequency,
                 noise,
                 sigma):                     # noise, dunno when to do
        # Visualization
        self.visualization = "2D"            # 3D or else
        # Data logging
        self.verbose_distance = False
        self.verbose_channelGain = False
        self.verbose_SINR = False
        self.verbose_DataRate = False
        
        # Channel settings
        self.MAX_EP_STEPS = MAX_EP_STEPS
        self.BS_R_max      = BS_R_max        # Khoang cach tu BS toi AP, Node
        self.T_nodes      = T_nodes         # Thoi gian truyen tin cua cac node
        self.Num_Nodes    = Number_Nodes    # So luong cac node
        self.Num_APs      = Number_APs      # So luong cac AP
        self.Num_sub      = Number_sub      # So luong cac sub
        self.P_node_max   = P_node_max      # Cong suat node toi da
        self.B            = Bandwidth       # Bang thong (~ 100 MB)
        self.F            = Frequency       # Tan so trung tam (5G ~ 5GHz)
        self.sigma        = sigma           # Noise in the env
        self.noise        = noise           # Noise exploration
        # Base Station initialization
        self.BS_x = 0                      # BS location initialization 
        self.BS_y = 0                      # BS location initialization 
        self.BS_location = np.expand_dims(self._location_BS_Generator(), axis=0) #fixed
        #[[BS-coordinate]]
        
        
        # AP initialization
        self.AP_location   = self._location_AP_Generator() # fixed
        # [[AP1 coordinate] [AP2 coordinate] ...]
        
        # Nodes initialization
        self.Node_Center = self._location_Node_Center_Generator() # fixed
        self.Node_location = self._location_Node_Generator()      # random
        
        ## Stimulate Environment
        # Sub-carrier initialization
        # Sub-carrier is an array included (Number_sub) numbers from f to f+B,
        self.subcarrier    = np.linspace(self.F, self.F + self.B, self.Num_sub)
        self.lamda         = (3*(10**8)) / self.F  # Wave length     
        
        # Calculate Distance N2A
        self.Distance_N2A = self._distance_Calculated()
        
        # Pathloss 
        self.Pathloss = self._Pathloss_Calculated()
        
        # Channel gain (Ray-leigh fading)
        self.ChannelGain = self._ChannelGain_Calculated()
        
        # Data Rate Target
        
        # Data Rate Calculated
        # self.Rate_actual = self._Total_Rate_Calculated()
        
        # Data Rate Target Generator
        self.Rate_Target = self._Rate_Target_Generator()
        
        # Action
        self.tau = np.random.randint(self.Num_sub, size = self.Num_Nodes)
        self.eta = np.random.randint(self.Num_APs, size = self.Num_Nodes)
        self.P_nodes = np.random.uniform(0, self.P_node_max, size = self.Num_Nodes)
        
        self.rewardMatrix = np.array([])
        self.observation_space = self._wrapState().squeeze()
        self.action_space      = self._wrapAction().squeeze()
        self.reward_space      = np.array(())
        
    ### Functions ###
    # BS coordinate initialization
    def _location_BS_Generator(self):
        BS_location = [self.BS_x, self.BS_y]
        return np.array(BS_location)
        
        # AP coordinate initialization
    def _location_AP_Generator(self):
        user_list = []
        for i in range(self.Num_APs):
            r = 2*self.BS_R_max/3 * np.sqrt(np.random.rand()) + self.BS_R_max/3
            theta = np.random.uniform(-np.pi, np.pi)
            AP_x_temp = self.BS_x + r*np.cos(theta)
            AP_y_temp = self.BS_y + r*np.sin(theta)
            user_list.append([AP_x_temp, AP_y_temp])
        AP_location = np.array(user_list)
        return np.array(AP_location)
    
        # Nodes coordinate initialization
        # Nodes center Coordinate
    def _location_Node_Center_Generator(self):
        userlist = []
        for i in range(self.Num_Nodes):
            r = self.BS_R_max* np.sqrt(np.random.rand()) + self.BS_R_max/3
            theta = np.random.uniform(-np.pi, np.pi)
            Node_Center_x_temp = self.BS_x + r*np.cos(theta)
            Node_Center_y_temp = self.BS_y + r*np.sin(theta)
            userlist.append([Node_Center_x_temp, Node_Center_y_temp])
        Node_Center = np.array(userlist)
        return np.array(Node_Center)
        
        # Node Coordinate
    def _location_Node_Generator(self):
        userList = []
        for i in range(self.Num_Nodes):
            r = self.BS_R_max/3 * np.sqrt(np.random.rand())
            theta = np.random.uniform(-np.pi, np.pi)
            Node_x_temp = self.Node_Center[i][0] + r*np.cos(theta)
            Node_y_temp = self.Node_Center[i][1] + r*np.sin(theta)
            userList.append([Node_x_temp, Node_y_temp])
        Node_location = np.array(userList)
        return np.array(Node_location)
    
    def _distance_Calculated(self):
        dist = np.zeros((self.Num_Nodes, self.Num_APs))
        for i in range(self.Num_Nodes):
            for j in range(self.Num_APs):
                dist[i][j] = np.sqrt(np.sum(self.Node_location[i]-self.AP_location[j])**2)
        return dist
    
    def _Pathloss_Calculated(self):
        pathloss = np.zeros((self.Num_Nodes, self.Num_APs))
        pathloss = -20*np.log10(4*np.pi*self.Distance_N2A/self.lamda)
        return pathloss
    
    def _ChannelGain_Calculated(self):
        ChannelGain = np.zeros((self.Num_Nodes, self.Num_APs))
        for i in range(self.Num_Nodes):
            for j in range(self.Num_APs):
                ChannelGain[i][j] = self.Pathloss[i][j]*(np.random.rand())
        return ChannelGain
    
    def _Rate_Target_Generator(self):
        rate_target = []
        for i in range(self.Num_Nodes):
            rate_target.append(random.randint(2,5)/self.T_nodes)
        return np.array(rate_target)
            
    def _Total_Rate_Calculated(self):
        total_rate = 0
        Rate = []
        SINR = []
        interference = 0
        numerator = []
        denominator = []
        channelgain = []
        for i in range(self.Num_Nodes):
            n = self.eta[i]-1
            channelgain[i] = self.ChannelGain[i][n]
        
        for i in range(self.Num_Nodes):
            interference = 0
            numerator[i] = self.P_nodes[i]*channelgain[i]
            for j in range(self.Num_Nodes):
                if (j != i and tau[j] == tau[i]):
                    interference += self.P_nodes[j]*channelgain[j]
            denominator[i] = interference + self.sigma
            SINR[i] = numerator[i] / denominator[i]
        
        for i in range(Num_Nodes):
            Rate[i] = -self.B*np.log2(1+SINR[i])
        
        total_rate = np.sum(Rate)
        return total_rate

    def step(self, action):
        self.eta, self.tau, self.P_nodes = _decomposeAction(action)

    def _wrapState(self):
        
        
        state = np.concatenate((np.array(self.ChannelGain).reshape(1, self.Num_Nodes*self.Num_APs), 
                                np.array(self.Rate_Target).reshape(1, self.Num_Nodes)), 
                                axis = 1)
        return state
    
    def _wrapAction(self):
        action = np.concatenate((np.array(self.tau).reshape(1, self.Num_Nodes),
                                 np.array(self.eta).reshape(1, self.Num_Nodes),
                                 np.array(self.P_nodes).reshape(1, self.Num_Nodes)),
                                 axis = 1)
        return action
    
    
    def _decomposeState(self, state):
        ChannelGain = state[0 : self.Num_Nodes*self.Num_APs]
        Rate_Target = state[self.Num_Nodes*self.Num_APs: self.Num_Nodes*self.Num_APs + self.Num_Nodes]
        
        return  [
                 np.array(ChannelGain).reshape (self.Num_Nodes*self.Num_APs,1),                
                 np.array(Rate_Target).reshape(self.Num_Nodes,1)
                ]
    
    def _decomposeAction(self, action):
        tau = action[0 : self.Num_Nodes]
        eta = action[self.Num_Nodes : 2*self.Num_Nodes]
        P_nodes = action[2*self.Num_Nodes : 3*self.Num_Nodes]
        
        return [
                np.array(tau).reshape(self.Num_Nodes, 1),
                np.array(eta).reshape(self.Num_Nodes, 1),
                np.array(P_nodes).reshape(self.Num_Nodes, 1)
               ]
    
    def step(self,action):
        self.tau, self.eta, self.P_nodes = self._decomposeAction(action)
        
        state_next = self._wrapState()  
        
        Rate_actual = self._Total_Rate_Calculated()
        
        reward = Rate_actual
        
        done = False
        
        return state_next, reward, done, {}
    
    def reset(self):
        
        self.Node_location = self._location_Node_Generator()      # random
        self.Rate_Target = self._Rate_Target_Generator()
        
        state_next = self._wrapState()
        
        return state_next
    
    def close(self):
        pass        

In [32]:
env = Env_WSN(MAX_EP_STEPS = 100000,
              BS_R_max = 100,         # Radius of BS's area
              T_nodes = 1,            # Time transmit
              Number_Nodes = 5,   
              Number_APs = 4,
              Number_sub = 3,
              P_node_max = 100,
              Bandwidth = 10**9, 
              Frequency = 6*(10**9),
              noise = 0.001,
              sigma = 0.001)

## Relay Buffer

In [None]:
class ReplayBuffer:
    """A simple numpy replay buffer."""

    def __init__(self, obs_dim: int, action_dim: int, size: int, batch_size: int = 32):
        """Initializate."""
        self.obs_buf = np.zeros([size, obs_dim], dtype=np.float32)
        self.next_obs_buf = np.zeros([size, obs_dim], dtype=np.float32)
        self.acts_buf = np.zeros([size, action_dim], dtype=np.float32)
        self.rews_buf = np.zeros([size], dtype=np.float32)
        self.done_buf = np.zeros([size], dtype=np.float32)
        self.max_size, self.batch_size = size, batch_size
        self.ptr, self.size, = 0, 0

    def store(
        self,
        obs: np.ndarray,
        act: np.ndarray, 
        rew: float, 
        next_obs: np.ndarray, 
        done: bool,
    ):
        """Store the transition in buffer."""
        self.obs_buf[self.ptr] = obs
        self.next_obs_buf[self.ptr] = next_obs
        self.acts_buf[self.ptr] = act
        self.rews_buf[self.ptr] = rew
        self.done_buf[self.ptr] = done
        self.ptr = (self.ptr + 1) % self.max_size
        self.size = min(self.size + 1, self.max_size)

    def sample_batch(self) -> Dict[str, np.ndarray]:
        """Randomly sample a batch of experiences from memory."""
        idxs = np.random.choice(self.size, size=self.batch_size, replace=False)
        return dict(obs=self.obs_buf[idxs],
                    next_obs=self.next_obs_buf[idxs],
                    acts=self.acts_buf[idxs],
                    rews=self.rews_buf[idxs],
                    done=self.done_buf[idxs])

    def __len__(self) -> int:
        return self.size

## OU NOISE

In [None]:
class OUNoise:
    """Ornstein-Uhlenbeck process.
    Taken from Udacity deep-reinforcement-learning github repository:
    https://github.com/udacity/deep-reinforcement-learning/blob/master/
    ddpg-pendulum/ddpg_agent.py
    """

    def __init__(
        self, 
        size: int, 
        mu: float = 0.0, 
        theta: float = 0.15, 
        sigma: float = 0.2,
    ):
        """Initialize parameters and noise process."""
        self.state = np.float64(0.0)
        self.mu = mu * np.ones(size)
        self.theta = theta
        self.sigma = sigma
        self.reset()

    def reset(self):
        """Reset the internal state (= noise) to mean (mu)."""
        self.state = copy.copy(self.mu)

    def sample(self) -> np.ndarray:
        """Update internal state and return it as a noise sample."""
        x = self.state
        dx = self.theta * (self.mu - x) + self.sigma * np.array(
            [random.random() for _ in range(len(x))]
        )
        self.state = x + dx
        return self.state

# Network
## Old network