#Neurosmash Training
Notebook for training models, thanks Google

##Mount GDrive

In [0]:
from google.colab import drive


drive.mount('/content/drive/', force_remount=True)

root_dir = '/content/drive/My Drive/'

Go to this URL in a browser: https://accounts.google.com/o/oauth2/auth?client_id=947318989803-6bn6qk8qdgf4n4g3pfee6491hc0brc4i.apps.googleusercontent.com&redirect_uri=urn%3aietf%3awg%3aoauth%3a2.0%3aoob&response_type=code&scope=email%20https%3a%2f%2fwww.googleapis.com%2fauth%2fdocs.test%20https%3a%2f%2fwww.googleapis.com%2fauth%2fdrive%20https%3a%2f%2fwww.googleapis.com%2fauth%2fdrive.photos.readonly%20https%3a%2f%2fwww.googleapis.com%2fauth%2fpeopleapi.readonly

Enter your authorization code:
··········
Mounted at /content/drive/


##Dependencies

In [0]:
import random
import socket
import struct
import numpy as np
import matplotlib.pyplot as plt
import pickle

import torch
import torch.nn
import torch.nn.functional as F


#Import training data

In [0]:
experience_replay = None
with open(root_dir + "/NIPS/randomwalk_1.pkl", 'rb') as f:
    experience_replay = pickle.load(f)

print(len(experience_replay))

23


#Model Definition

In [0]:
class NeurosmashAgent(torch.nn.Module):
    def __init__(self):
        super(NeurosmashAgent, self).__init__()
        #Shape of input image
        self.state_shape = (256, 256, 3)
        #Size of action space (Nothing, Left, Right)
        self.num_actions = 3
        
        #Input channels = 3, output channels = 256
        self.conv1 = torch.nn.Conv2d(3, 64, kernel_size=3, stride=1, padding=1)
        #256, 256, 64
        self.pool = torch.nn.MaxPool2d(kernel_size=2, stride=2, padding=0)
        #128, 128, 64
        self.conv2 = torch.nn.Conv2d(64, 128, kernel_size=3, stride=1, padding=1)
        #128, 128, 128
        
        #self.pool again
        #64, 64, 128

        self.linear = torch.nn.Linear(64*64*128, 128)

        self.output = torch.nn.Linear(128, self.num_actions)
        #3 output channels (Nothing=0, Left=1, Right=2)
        

    def step(self, state):
        # return 0 # no action
        # return 1 # left action
        # return 2 # right action
        # return 3 # built-in random action
        
        #TODO: Check this view transformation actually produces an image
        state = torch.tensor(state, dtype=torch.float).view(3, 256, 256).view(1,3,256,256).float()
        
        action = self.forward(state)
        
        return action
    
    def forward(self, x):
        #Convolution layer, ReLU activation
        x = F.relu(self.conv1(x))
        #MaxPooling2D
        x = self.pool(x)

        x = F.relu(self.conv2(x))
        x = self.pool(x)

        #Flatten pooled layer
        x = x.view(-1, 64 * 64 * 128)

        #Linear layer
        x = F.relu(self.linear(x))

        #Dropout
        x = F.dropout(x, 0.2, training=self.training)

        #Softmax on linear output layer
        x = F.softmax(self.output(x), dim=1)
        
        return x


#Training Loop

In [0]:
#Init Agent
#train_agent = NeurosmashAgent()   
print(train_agent)
train_agent.training = True
#Enable GPU if available
if torch.cuda.is_available():
  train_agent.cuda()

#Training CNN
N_epochs = 20

#Fitting CNN model:
#Loss Function
loss_func = torch.nn.CrossEntropyLoss()
#Optimizer
optimizer = torch.optim.Adam(train_agent.parameters(), lr=0.0001)

for epoch in range(N_epochs):
    for i,game in enumerate(experience_replay):
        #Reset loss counter
        running_loss = 0.0
        for j,steps in enumerate(game):
            state, action = steps[0], steps[1]

            state_norm = [s / 255 for s in state]            
            state_tensor = torch.tensor(state_norm, dtype=torch.float).view(3, 256, 256).view(1, 3, 256, 256).cuda()

            action_tensor = torch.tensor(action, dtype=torch.long).view(1).cuda()
            #print("Actual: ", action_tensor)
                    
            optimizer.zero_grad()
            outputs = train_agent(state_tensor)
            #print("Predicted: ", outputs)

            loss = loss_func(outputs, action_tensor)
            
            loss.backward()
            optimizer.step()
            running_loss += loss.item()
            
            #if (i+1) % 10 == 0:    # print every 10 steps
        print('[%d, %5d, %5d] loss: %.3f' %
        (epoch + 1,  i + 1,j+1,  running_loss / j ))
        running_loss = 0.0
        print("Actual: ", action_tensor)
        print("predicted: ", outputs)
# #Save trained agent's brain
torch.save(train_agent.state_dict(), "/content/drive/My Drive/NIPS/model_3.pt")

NeurosmashAgent(
  (conv1): Conv2d(3, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
  (pool): MaxPool2d(kernel_size=2, stride=2, padding=0, dilation=1, ceil_mode=False)
  (conv2): Conv2d(64, 128, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
  (linear): Linear(in_features=524288, out_features=128, bias=True)
  (output): Linear(in_features=128, out_features=3, bias=True)
)
[1,     1,   180] loss: 0.968
Actual:  tensor([1], device='cuda:0')
predicted:  tensor([[1.6661e-05, 9.9998e-01, 9.7647e-07]], device='cuda:0',
       grad_fn=<SoftmaxBackward>)
[1,     2,   162] loss: 0.910
Actual:  tensor([1], device='cuda:0')
predicted:  tensor([[3.8477e-02, 9.6138e-01, 1.4036e-04]], device='cuda:0',
       grad_fn=<SoftmaxBackward>)
[1,     3,   224] loss: 0.914
Actual:  tensor([0], device='cuda:0')
predicted:  tensor([[9.9996e-01, 3.6569e-05, 6.0845e-06]], device='cuda:0',
       grad_fn=<SoftmaxBackward>)
[1,     4,   190] loss: 0.917
Actual:  tensor([2], device='cuda:0')
predicted

#Check some predictions


In [0]:

torch.save(train_agent.state_dict(), "/content/drive/My Drive/NIPS/model_2.pt")