In [1]:
import torch
import torch.optim as optim
from torch.distributions import Normal
import torch.nn as nn
import numpy as np
from gym.wrappers.monitoring.video_recorder import VideoRecorder
import warnings
from typing import Union
from utils import ReplayBuffer, get_env, run_episode

warnings.filterwarnings("ignore", category=DeprecationWarning)
warnings.filterwarnings("ignore", category=UserWarning)

In [10]:
class NeuralNetwork(nn.Module):
    '''
    This class implements a neural network with a variable number of hidden layers and hidden units.
    You may use this function to parametrize your policy and critic networks.
    '''
    def __init__(self, input_dim: int, output_dim: int, hidden_size: int, 
                                hidden_layers: int, activation: str):
        super(NeuralNetwork, self).__init__()

        # TODO: Implement this function which should define a neural network 
        # with a variable number of hidden layers and hidden units.
        # Here you should define layers which your network will use.
        self.fc_layers = nn.ModuleList()
        
        self.fc_layers.append(nn.Linear(input_dim, hidden_size))
        self.fc_layers.append(activation)

        for _ in range(hidden_layers):
            self.fc_layers.append(nn.Linear(hidden_size, hidden_size))
            self.fc_layers.append(activation)

        self.fc_layers.append(nn.Linear(hidden_size, output_dim))
        # self.fc_layers.append(activation())


    def forward(self, s: torch.Tensor) -> torch.Tensor:
        # TODO: Implement the forward pass for the neural network you have defined.
        x = s
        for layer in self.fc_layers:
            x = layer(x)
        return x

In [8]:
state_dim = 2
hidden_size = 256
hidden_layers = 7
activation = nn.ReLU()

In [9]:
fc_layers = nn.ModuleList()
fc_layers.append(nn.Linear(state_dim, hidden_size))
fc_layers.append(activation)

ModuleList(
  (0): Linear(in_features=2, out_features=256, bias=True)
  (1): ReLU()
)

In [15]:
x = torch.randn((32, 2))
x.shape

torch.Size([32, 2])

In [11]:
actor = NeuralNetwork(state_dim, 2,hidden_size, hidden_layers, nn.ReLU())

In [17]:
actor(x).shape

torch.Size([32, 2])

In [20]:
out = actor(x)
out[:,:1].shape

torch.Size([32, 1])

In [22]:
mu, sigma = out[:,0], out[:,1]
print(mu.shape)
print(sigma.shape)

torch.Size([32])
torch.Size([32])


In [24]:
sigma = torch.exp(sigma)
act_dist = Normal(mu, sigma)
act = act_dist.sample()
act.shape

torch.Size([32])

In [25]:
act


tensor([-2.9289e+00, -2.2944e+00,  3.4285e+00, -9.7064e-01,  4.1584e+00,
        -4.6575e-03,  8.9469e-01,  3.0172e+00, -1.7857e+00, -1.8274e+00,
         5.3921e+00, -1.8917e-01, -1.3850e+00,  1.3022e+00,  9.4828e-01,
         1.2434e+00, -1.7217e+00,  1.5420e+00, -1.3863e+00, -6.9463e-01,
         7.0211e-01,  6.9256e+00, -1.8727e-01, -3.3838e-02, -5.8439e-01,
        -6.4931e-01,  3.7811e+00, -2.6516e+00,  3.4855e+00, -9.0331e-01,
         2.1066e+00, -6.2564e-01])

In [27]:
act_dist.log_prob(act).shape

torch.Size([32])

In [28]:
torch.clamp(act, -1, +1)

tensor([-1.0000, -1.0000,  1.0000, -0.9706,  1.0000, -0.0047,  0.8947,  1.0000,
        -1.0000, -1.0000,  1.0000, -0.1892, -1.0000,  1.0000,  0.9483,  1.0000,
        -1.0000,  1.0000, -1.0000, -0.6946,  0.7021,  1.0000, -0.1873, -0.0338,
        -0.5844, -0.6493,  1.0000, -1.0000,  1.0000, -0.9033,  1.0000, -0.6256])