# Actor Critic Agent(s) for Resource Allocation
- Tests with Gym first
- https://pytorch.org/docs/stable/tensorboard.html

In [5]:
import numpy as np
import matplotlib.pyplot as plt
import torch
import gym

from torch.utils.tensorboard import SummaryWriter
import torch.nn as nn
import gym.spaces as spaces

## Actor-Critic
- https://pytorch.org/docs/stable/generated/torch.nn.Sequential.html

In [7]:
NUM_HIDDEN_UNITS = 32

In [8]:
class ActorCritic(nn.Module):
    def __init__(self, obs_shape, action_space, discrete=True):
        super(ActorCritic, self).__init__()
        
        self.obs_shape = obs_shape
        self.action_space = action_space
        
        self._is_discrete = discrete
        self._num_inputs = obs_shape[0]
        self._n_hidden = NUM_HIDDEN_UNITS
        if discrete:
            self._num_outputs = action_space.n
        else:
            self._num_outputs = action_space.shape[0]
        
        self.actor = nn.Sequential(
            nn.Linear(self._num_inputs, self._n_hidden),
            nn.Tanh(),
            nn.Linear(self._n_hidden, self._n_hidden),
            nn.Tanh(),
            nn.Linear(self._n_hidden, self._num_outputs)
        )
        
        self.critic = nn.Sequential(
            nn.Linear(self._num_inputs, self._n_hidden),
            nn.Tanh(),
            nn.Linear(self._n_hidden, self._n_hidden),
            nn.Tanh(),
            nn.Linear(self._n_hidden, 1)
        )
        

In [9]:
ActorCritic((3, 1), spaces.Discrete(5))

ActorCritic(
  (actor): Sequential(
    (0): Linear(in_features=3, out_features=32, bias=True)
    (1): Tanh()
    (2): Linear(in_features=32, out_features=32, bias=True)
    (3): Tanh()
    (4): Linear(in_features=32, out_features=5, bias=True)
  )
  (critic): Sequential(
    (0): Linear(in_features=3, out_features=32, bias=True)
    (1): Tanh()
    (2): Linear(in_features=32, out_features=32, bias=True)
    (3): Tanh()
    (4): Linear(in_features=32, out_features=1, bias=True)
  )
)