In [89]:
import gym
import torch
import torch.nn as nn
import torch.nn.functional as F
import numpy as np

In [63]:
class Network(nn.Module):
    
    def __init__(self,input_size,hidden_layers,output_size,drop_p=0.5):
        super().__init__()
        
        self.hidden_layers = nn.ModuleList([nn.Linear(input_size,hidden_layers[0])])
        self.hidden_layers.extend([nn.Linear(h1,h2) for h1,h2 in zip(hidden_layers[:-1],hidden_layers[1:])])
        self.output = nn.Linear(hidden_layers[-1],output_size)
        
        self.dropout = nn.Dropout(p=drop_p)
        
    def forward(self,x):
        
        for linear in self.hidden_layers:
            x = F.relu(linear(x))
            x = self.dropout(x)
            
        x = self.output(x)
        
        return x
        

In [64]:
input_size = 8
hidden_layers = [64,64]
output_size = 4

model = Network(input_size,hidden_layers,output_size)
model

Network(
  (hidden_layers): ModuleList(
    (0): Linear(in_features=8, out_features=64, bias=True)
    (1): Linear(in_features=64, out_features=64, bias=True)
  )
  (output): Linear(in_features=64, out_features=4, bias=True)
  (dropout): Dropout(p=0.5)
)

In [65]:
env = gym.make('LunarLander-v2')
env.seed(0)

[33mWARN: gym.spaces.Box autodetected dtype as <class 'numpy.float32'>. Please provide explicit dtype.[0m


[0]

In [66]:
state = torch.rand(1,8);state

tensor([[0.7025, 0.3168, 0.8797, 0.3960, 0.5786, 0.0601, 0.4068, 0.8604]])

In [67]:
ps = model.forward(state);torch.exp(ps)

tensor([[1.0386, 0.6998, 0.9959, 1.1509]], grad_fn=<ExpBackward>)

In [86]:
state_alt = env.reset()
state_alt = torch.from_numpy(state_alt).float().unsqueeze(0)

In [87]:
model.forward(state_alt)

tensor([[-0.0311,  0.0725, -0.1546,  0.0468]], grad_fn=<AddmmBackward>)

In [100]:
states = []
for i in range(10):
    states.append(env.reset())

states = np.array(states)

In [101]:
states = torch.from_numpy(states).float().unsqueeze(0)

In [106]:
output = model.forward(states)
output

tensor([[[-0.0834, -0.1540,  0.0864, -0.2373],
         [-0.0420, -0.0373, -0.1307, -0.0411],
         [ 0.0387, -0.2646,  0.0915, -0.2563],
         [ 0.0870, -0.1001, -0.2139,  0.0010],
         [ 0.0021, -0.0062, -0.0599, -0.0766],
         [ 0.0655, -0.0931, -0.2337, -0.1153],
         [-0.0255, -0.2861,  0.0050,  0.0848],
         [-0.2504, -0.0925, -0.1283, -0.1092],
         [ 0.1076, -0.0580, -0.3340,  0.0228],
         [ 0.1741, -0.1651, -0.2266, -0.1461]]], grad_fn=<AddBackward0>)

In [116]:
output.detach().max(2)[0].unsqueeze(1)

tensor([[[ 0.0864, -0.0373,  0.0915,  0.0870,  0.0021,  0.0655,  0.0848,
          -0.0925,  0.1076,  0.1741]]])

In [135]:
actions = np.array([1,2,1,0,0,0,1,0,0,1])
actions = torch.from_numpy(actions)

model(states).detach().max(2)[0].size()

torch.Size([1, 10])

In [140]:
model(states).detach().max(2)[0].unsqueeze(dim=1)

tensor([[[ 0.0875,  0.1424,  0.1077,  0.1002,  0.1377,  0.0021,  0.1228,
          -0.0229,  0.0152,  0.1007]]])