In [1]:
%pylab inline

Populating the interactive namespace from numpy and matplotlib


In [2]:
import gym
import math
import random
import numpy as np
import matplotlib
import matplotlib.pyplot as plt
from collections import namedtuple
from itertools import count
from copy import deepcopy
from PIL import Image
import os 
from torch.nn import parameter
Parameter = parameter.Parameter

import torch
import torch.nn as nn
import torch.optim as optim
import torch.nn.functional as F
from torch.autograd import Variable, Function
import torchvision.transforms as T
from catch import Catch
# if gpu is to be used
use_cuda = torch.cuda.is_available()
FloatTensor = torch.cuda.FloatTensor if use_cuda else torch.FloatTensor
LongTensor = torch.cuda.LongTensor if use_cuda else torch.LongTensor
ByteTensor = torch.cuda.ByteTensor if use_cuda else torch.ByteTensor
Tensor = FloatTensor

In [3]:
env = gym.make('CartPole-v0').unwrapped

# set up matplotlib
is_ipython = 'inline' in matplotlib.get_backend()
if is_ipython:
    from IPython import display

plt.ion()

# if gpu is to be used
use_cuda = torch.cuda.is_available()
FloatTensor = torch.cuda.FloatTensor if use_cuda else torch.FloatTensor
LongTensor = torch.cuda.LongTensor if use_cuda else torch.LongTensor
ByteTensor = torch.cuda.ByteTensor if use_cuda else torch.ByteTensor
Tensor = FloatTensor

In [40]:
Transition = namedtuple('Transition',
                        ('state', 'action', 'next_state', 'reward'))

class ReplayMemory(object):
    def __init__(self, max_memory=100, discount=.9):
        self.max_memory = max_memory
        self.memory = []
        self.discount = discount
        self.current_memory = []

    def push(self, states, game_over, ball_over):
        # memory[i] = [[state_t, action_t, reward_t, state_t+1], game_over?]
        self.current_memory.append([states, game_over])
        if ball_over or game_over:
            self.memory.append(self.current_memory)
            if len(self.memory) > self.max_memory:
                del self.memory[0]
            self.current_memory = []


    def sample(self, model, batch_size=1):
        len_memory = len(self.memory)
        num_actions = model.output_shape[-1]

        Oin = None
        Otar = None
        for i, idx in enumerate(np.random.randint(0, len_memory,
                                                  size=min(len_memory, batch_size))):

            mems = self.memory[idx]
            for mem in mems:
                state_t, action_t, reward_t, state_tp1 = mem[0]
                game_over = mem[1]
                inputs = state_t
                temp_state_t = Variable(torch.from_numpy(state_t).type(Tensor))
                temp_state_tp1 = Variable(torch.from_numpy(state_tp1).type(Tensor))
                
                # There should be no target values for actions not taken.
                # Thou shalt not correct actions not taken #deep
                targets = model(temp_state_t).data.numpy()[0]
                temp_queue = model.save_queue()
                Q_sa = model(temp_state_tp1).data.max(1)[0].view(1, 1).numpy()[0][0]
                model.load_queue(temp_queue)
                
                if game_over:  # if game_over is True
                    targets[action_t] = reward_t
                else:
                    # reward_t + gamma * max_a' Q(s', a')
                    targets[action_t] = reward_t + self.discount * Q_sa
                if Oin is None:
                    Oin = inputs
                    Otar = np.expand_dims(targets, axis=0)
                else:
                    Oin = np.concatenate([Oin, inputs], axis=0)
                    Otar = np.concatenate([Otar, np.expand_dims(targets, axis=0)], axis=0)
            model.reset_queue()
        return Oin, Otar
    


In [74]:


class fifo_queue(Function):
    def forward(self, old_queue, inputs):
        outputs = None
        queue = old_queue.clone()
        for temp in inputs:
            queue = torch.cat((queue, temp.unsqueeze(0)), 0)
            queue = queue[1:]
            if outputs is None:
                outputs = queue.clone().unsqueeze(0)
            else:
                outputs = torch.cat((outputs, queue.unsqueeze(0)), 0)
        
        self.mark_non_differentiable(outputs)
        return outputs, queue
    
    def backward(self, output_grad):
        return None


class Queue(nn.Module):
    def __init__(self, input_features, out_features, queue_size, bias=None):
        super(Queue, self).__init__()
        # Setup the Queue
        self.input_features = input_features
        self.queue_size = queue_size
        self.reset_queue()
        
        # Init the queue function
        self.fifo_queue = fifo_queue()

        # Setup the weights to be used in the linear function later. 
        self.weight = Parameter(torch.Tensor(out_features, input_features*queue_size))
        if bias:
            self.bias = Parameter(torch.Tensor(out_features))
        else:
            self.register_parameter('bias', None)
        # Init the parameters
        self.reset_parameters()
        
    def reset_parameters(self):
        stdv = 1. / math.sqrt(self.weight.size(1))
        self.weight.data.uniform_(-stdv, stdv)
        if self.bias is not None:
            self.bias.data.uniform_(-stdv, stdv)
            
    def forward(self, inputs):
        # The actual call that is run when you pass data into this model. 
        
        x = self.fifo_queue(self.queue, inputs)
        output = x[0]
        self.queue = x[1]
        #output, self.queue 
        # Return 
        output = output.view(output.size()[0], -1)
        return F.linear(output, self.weight, self.bias)
            
    # reunit the queue
    def reset_queue(self):
        if type(self.input_features) is list:
            queue = Variable(torch.zeros([self.queue_size]+self.input_features))
        else:
            queue = Variable(torch.zeros([self.queue_size]+[self.input_features]))
        self.queue = queue
        #self.register_buffer("queue", queue)
        self.queue.requires_grad = False
        




"""class Queue(nn.Module):
    def __init__(self, input_features, out_features, queue_size):
        super(Queue, self).__init__()
        self.input_features = input_features
        self.queue_size = queue_size
        # Setup the queue
        if type(input_features) is list:
            self.queue = np.zeros([queue_size]+input_features)
        else:
            self.queue = np.zeros([queue_size]+[input_features])


    def forward(self, inputs):
        # The actual call that is run when you pass data into this model. 
        output = None
        for temp in inputs.data.numpy():
            #print(self.queue[:, 1:].size(), torch.from_numpy(temp).type(Tensor).unsqueeze(0).size())
            self.queue = np.concatenate((self.queue, np.expand_dims(temp, 0)), 0)
            self.queue = self.queue[1:]
            if output is None:
                output = np.expand_dims(self.queue, 0)
            else:
                output = np.concatenate((output, np.expand_dims(self.queue, 0)), 0)
        # Return 
        output = Variable(torch.from_numpy(output).type(Tensor))
        #output.requires_grad = False
        output = output.view(output.size()[0], -1) # FLATTEN and make into a Variable for Torch
        return output
            
    # reunit the queue
    def _reset_queue(self):
        if type(self.input_features) is list:
            self.queue = np.zeros([self.queue_size]+self.input_features)
        else:
            self.queue = np.zeros([self.queue_size]+[self.input_features])
"""

'class Queue(nn.Module):\n    def __init__(self, input_features, out_features, queue_size):\n        super(Queue, self).__init__()\n        self.input_features = input_features\n        self.queue_size = queue_size\n        # Setup the queue\n        if type(input_features) is list:\n            self.queue = np.zeros([queue_size]+input_features)\n        else:\n            self.queue = np.zeros([queue_size]+[input_features])\n\n\n    def forward(self, inputs):\n        # The actual call that is run when you pass data into this model. \n        output = None\n        for temp in inputs.data.numpy():\n            #print(self.queue[:, 1:].size(), torch.from_numpy(temp).type(Tensor).unsqueeze(0).size())\n            self.queue = np.concatenate((self.queue, np.expand_dims(temp, 0)), 0)\n            self.queue = self.queue[1:]\n            if output is None:\n                output = np.expand_dims(self.queue, 0)\n            else:\n                output = np.concatenate((output, np.expand_

In [75]:
class Queued_DQN(nn.Module):
    def __init__(self, input_size, hidden_size, num_actions, queue_size):
        super(Queued_DQN, self).__init__()
        self.queue = Queue(input_size, input_size*queue_size, queue_size)
        #self.lin1 = nn.Linear(input_size*queue_size, hidden_size)
        self.lin2 = nn.Linear(hidden_size, hidden_size)
        self.lin3 = nn.Linear(hidden_size, num_actions)
        self.output_shape = [1, num_actions]
        
    def forward(self, x):
        # Replace queue with another Linear and you 
        # have the same network we use for everything else. 
        # This is the model
        x = F.relu(self.queue(x))
        #x = F.relu(self.lin1(x))
        x = F.relu(self.lin2(x))
        x = self.lin3(x)
        return x
    
    def reset_queue(self):
        self.queue.reset_queue()
        
    def save_queue(self):
        return self.queue.queue.clone()
        
    def load_queue(self, input_queue):
        self.queue.queue = input_queue.clone()

In [76]:
"""class DQN(nn.Module):
    def __init__(self, input_size, hidden_size, num_actions):
        super(DQN, self).__init__()
        self.lin1 = nn.Linear(input_size, hidden_size)
        self.lin2 = nn.Linear(hidden_size, hidden_size)
        self.lin3 = nn.Linear(hidden_size, num_actions)
        self.output_shape = [1, num_actions]
        
    def forward(self, x):
        x = F.relu(self.lin1(x))
        x = F.relu(self.lin2(x))
        return self.lin3(x)"""
    

'class DQN(nn.Module):\n    def __init__(self, input_size, hidden_size, num_actions):\n        super(DQN, self).__init__()\n        self.lin1 = nn.Linear(input_size, hidden_size)\n        self.lin2 = nn.Linear(hidden_size, hidden_size)\n        self.lin3 = nn.Linear(hidden_size, num_actions)\n        self.output_shape = [1, num_actions]\n        \n    def forward(self, x):\n        x = F.relu(self.lin1(x))\n        x = F.relu(self.lin2(x))\n        return self.lin3(x)'

In [77]:
import numpy as np
import pandas as pd
def train_catch(catch, model, exp_replay, num_games, batch_size,
                per_random_act=.1, num_actions=3,
                test_every=None, test_on_games=100):
    optimizer = optim.Adagrad(model.parameters())

    # Set up the return Loss and Scores dataframes
    rLoss = pd.DataFrame(columns = ['epoch', 'loss'])
    rLoss['epoch'] = rLoss['epoch'].astype(int)
    # rLoss = pd.DataFrame({'epoch':pd.Series([], dtype='int'), 'loss':pd.Series([], dtype='float')})

    if test_every is not None:
        rScores = pd.DataFrame(columns = ['epoch', str('mean score over ' + str(test_on_games) + ' games')])
        rScores['epoch'] = rLoss['epoch'].astype(int)
    # Record variables for training
    game_cnt = 0
    timestep = 0.0
    counter = 0
    after_first_ball = False
    
    pushed = 0
    
    #train over total epochs
    for e in range(num_games):

        game_over = False
        tot_loss = 0.0

        # get initial input
        input_t = catch.observe(flatten=True, expand_dim=True)
    
        #iterate over each game
        while not game_over:

            # t_-1 is the previous observation
            input_tm1 = input_t.copy()
            # get next action
            temp = torch.from_numpy(input_tm1).type(Tensor)
            temp = Variable(temp)
            q = model(temp).data.max(1)[1].view(1, 1).numpy()
            
            if np.random.rand() <= per_random_act:
                # Randomly pick an action
                action = torch.rand(1) * (num_actions)
                action = int(action.numpy()[0])
            else:
                # Use the pre-selected action
                action = q[0][0]
            # apply action, get rewards and new state
            reward, timestep, game_over = catch.act(action-1)

            # t_0, current timestep
            input_t = catch.observe(flatten=True, expand_dim=True)
            # store experience
            exp_replay.push(states=[input_tm1, action, reward, input_t], game_over=game_over, ball_over=(reward != 0))
            if reward != 0 :
                pushed = pushed + 1
                
            # adapt model
            if pushed >= batch_size + 1: 
                
                temp_queue = model.save_queue()
                for pep in range(batch_size):
                    model.reset_queue()
                    inputs, expected_targets = exp_replay.sample(model, batch_size=1)
                    model.reset_queue()
                    targets = model(Variable(torch.from_numpy(inputs).type(Tensor)))

                    expected_targets = Variable(torch.from_numpy(expected_targets).type(Tensor))

                    # Compute Huber loss
                    loss = F.mse_loss(targets, expected_targets)
                    tot_loss = tot_loss + loss.data.numpy()[0]
                    # Optimize the model
                    optimizer.zero_grad()
                    loss.backward()
                    for param in model.parameters():
                        param.grad.data.clamp_(-1, 1)
                    optimizer.step()

                model.load_queue(temp_queue)

            # game has completed, add one to the total game count
            if game_over:
                game_cnt += 1
                
        if pushed >= batch_size + 1:
            # Save and report loss
            rLoss.loc[len(rLoss), :] = [int(e),loss]
            print("Epoch {:03d} | Loss {:.4f}".format(e, tot_loss))
            # Create initial start environment
        catch.reset()
        model.reset_queue()
        # Test the current model weights if need be.
        if (test_every is not None) and ((e + 1) % test_every == 0):

            scores = test_catch(
                catch=catch, model=model, 
                test_on_games=test_on_games)

            # Save and report mean Score
            ms = scores['score'].mean()
            rScores.loc[len(rScores), :] = [int(e), ms]
            print("Epoch {:03d} | MeanScore {:.2f}".format(e, ms))

            catch.reset()

    # Return rLoss, and rScores if the user requested the model to be tested
    # while training.
    if (test_every is not None):
        return rLoss, rScores
    else:
        return rLoss



def test_catch(catch, model, test_on_games=100,  save_frames=False):

    if save_frames:
        frames_stack = []
    scores = pd.DataFrame(columns=['game', 'score'])
    scores['game'] = scores['game'].astype(int)
    # Iterate over number of games to play
    for game_num in range(test_on_games):

        # Count for num balls that reached the end
        game_over = False

        # Get initial Frame
        frame_num = 1
        input_t = catch.observe(flatten=True, expand_dim=True)
        if save_frames:
            frames_stack.append(input_t)

        total_score = 0
        # Iterate until end of testing game
        while not game_over:

            input_tm1 = input_t

            # Get next action
            temp = torch.from_numpy(input_tm1).type(Tensor)
            temp = Variable(temp)
            q = model(temp).data.max(1)[1].view(1, 1).numpy()
            action = q[0][0]

            # apply action, get rewards and new state
            reward, timestep, game_over = catch.act(action-1)
            input_t = catch.observe(flatten=True, expand_dim=True)
            total_score += reward
            # Iterate frame number
            frame_num += 1

            if save_frames:
                frames_stack.append(input_t)
        scores.loc[len(scores), :] = [int(game_num), total_score]
        #scores.append([game_num, total_score])
        game_num += 1
        catch.reset()
        model.reset_queue()

    if save_frames:
        return scores, frames_stack
    else:
        return scores


In [78]:
height = 11
width=13
m = 4
q_size = 5
i=0

test_name = "RUN" + str(i) + "_Mask_" + str(int(m)) + "_13x11_Q" + str(q_size)+"_MLP"

c = Catch(screen_height=height, screen_width=width, # output_buffer_size=q_size,
          game_over_conditions = {'ball_deletions': 1}, mask=m, ball_spawn_rate=11,)

exp = ReplayMemory(max_memory=50)

input_size= width * height

model = Queued_DQN(input_size, input_size*q_size, 3, q_size)
#model = DQN(input_size, input_size, 3)

In [79]:
loss, test_on_train = train_catch(catch = c, model = model, exp_replay = exp, num_games = 5000, 
                                  batch_size = 10, test_every = 5, test_on_games = 100)
c.reset()
tests = test_catch(catch = c, model = model, test_on_games = 1000)

Epoch 004 | MeanScore -0.62
Epoch 009 | MeanScore -0.46
Epoch 010 | Loss 1.0657
Epoch 011 | Loss 2.9893
Epoch 012 | Loss 0.5993
Epoch 013 | Loss 0.3263
Epoch 014 | Loss 0.3325
Epoch 014 | MeanScore -0.18
Epoch 015 | Loss 0.1774
Epoch 016 | Loss 0.0666
Epoch 017 | Loss 0.1141
Epoch 018 | Loss 0.0519
Epoch 019 | Loss 0.4929
Epoch 019 | MeanScore -0.28
Epoch 020 | Loss 0.3513
Epoch 021 | Loss 0.3473
Epoch 022 | Loss 0.3577
Epoch 023 | Loss 0.3811
Epoch 024 | Loss 0.0546
Epoch 024 | MeanScore -0.28
Epoch 025 | Loss 0.1277
Epoch 026 | Loss 0.1865
Epoch 027 | Loss 0.2945
Epoch 028 | Loss 0.3003
Epoch 029 | Loss 0.1108
Epoch 029 | MeanScore -0.72
Epoch 030 | Loss 0.1999
Epoch 031 | Loss 0.2277
Epoch 032 | Loss 0.1356
Epoch 033 | Loss 0.1725
Epoch 034 | Loss 0.0531
Epoch 034 | MeanScore -0.52
Epoch 035 | Loss 0.4770
Epoch 036 | Loss 0.2633
Epoch 037 | Loss 0.5309
Epoch 038 | Loss 0.2083
Epoch 039 | Loss 0.2303
Epoch 039 | MeanScore -0.46
Epoch 040 | Loss 0.2679
Epoch 041 | Loss 0.4901
Epoch 04

Epoch 286 | Loss 0.0364
Epoch 287 | Loss 0.1536
Epoch 288 | Loss 0.1008
Epoch 289 | Loss 0.1222
Epoch 289 | MeanScore 0.44
Epoch 290 | Loss 0.0458
Epoch 291 | Loss 0.0445
Epoch 292 | Loss 0.2875
Epoch 293 | Loss 0.0740
Epoch 294 | Loss 0.1300
Epoch 294 | MeanScore 0.30
Epoch 295 | Loss 0.0753
Epoch 296 | Loss 0.0884
Epoch 297 | Loss 0.0567
Epoch 298 | Loss 0.0563
Epoch 299 | Loss 0.0553
Epoch 299 | MeanScore 0.20
Epoch 300 | Loss 0.0535
Epoch 301 | Loss 0.0285
Epoch 302 | Loss 0.0897
Epoch 303 | Loss 0.0231
Epoch 304 | Loss 0.0842
Epoch 304 | MeanScore 0.18
Epoch 305 | Loss 0.1091
Epoch 306 | Loss 0.0580
Epoch 307 | Loss 0.0284
Epoch 308 | Loss 0.0247
Epoch 309 | Loss 0.1469
Epoch 309 | MeanScore 0.34
Epoch 310 | Loss 0.0903
Epoch 311 | Loss 0.0629
Epoch 312 | Loss 0.1101
Epoch 313 | Loss 0.0596
Epoch 314 | Loss 0.0595
Epoch 314 | MeanScore 0.16
Epoch 315 | Loss 0.0287
Epoch 316 | Loss 0.0889
Epoch 317 | Loss 0.0906
Epoch 318 | Loss 0.0522
Epoch 319 | Loss 0.1657
Epoch 319 | MeanScore 

Epoch 565 | Loss 0.0216
Epoch 566 | Loss 0.0467
Epoch 567 | Loss 0.0151
Epoch 568 | Loss 0.1116
Epoch 569 | Loss 0.0553
Epoch 569 | MeanScore 0.80
Epoch 570 | Loss 0.0100
Epoch 571 | Loss 0.1418
Epoch 572 | Loss 0.0556
Epoch 573 | Loss 0.0320
Epoch 574 | Loss 0.0237
Epoch 574 | MeanScore 0.64
Epoch 575 | Loss 0.0959
Epoch 576 | Loss 0.1141
Epoch 577 | Loss 0.0718
Epoch 578 | Loss 0.0508
Epoch 579 | Loss 0.0533
Epoch 579 | MeanScore 0.76
Epoch 580 | Loss 0.0319
Epoch 581 | Loss 0.0574
Epoch 582 | Loss 0.1003
Epoch 583 | Loss 0.1087
Epoch 584 | Loss 0.0435
Epoch 584 | MeanScore 0.70
Epoch 585 | Loss 0.0234
Epoch 586 | Loss 0.0222
Epoch 587 | Loss 0.0145
Epoch 588 | Loss 0.0308
Epoch 589 | Loss 0.0227
Epoch 589 | MeanScore 0.52
Epoch 590 | Loss 0.0087
Epoch 591 | Loss 0.0308
Epoch 592 | Loss 0.0148
Epoch 593 | Loss 0.0110
Epoch 594 | Loss 0.0069
Epoch 594 | MeanScore 0.52
Epoch 595 | Loss 0.0036
Epoch 596 | Loss 0.0036
Epoch 597 | Loss 0.0639
Epoch 598 | Loss 0.0267
Epoch 599 | Loss 0.051

Epoch 844 | MeanScore 0.78
Epoch 845 | Loss 0.0515
Epoch 846 | Loss 0.0315
Epoch 847 | Loss 0.0367
Epoch 848 | Loss 0.0261
Epoch 849 | Loss 0.0191
Epoch 849 | MeanScore 0.82
Epoch 850 | Loss 0.0083
Epoch 851 | Loss 0.0113
Epoch 852 | Loss 0.0173
Epoch 853 | Loss 0.0092
Epoch 854 | Loss 0.0090
Epoch 854 | MeanScore 0.94
Epoch 855 | Loss 0.0051
Epoch 856 | Loss 0.0714
Epoch 857 | Loss 0.0460
Epoch 858 | Loss 0.0176
Epoch 859 | Loss 0.1344
Epoch 859 | MeanScore 0.88
Epoch 860 | Loss 0.0112
Epoch 861 | Loss 0.0140
Epoch 862 | Loss 0.0143
Epoch 863 | Loss 0.0294
Epoch 864 | Loss 0.0119
Epoch 864 | MeanScore 0.84
Epoch 865 | Loss 0.0131
Epoch 866 | Loss 0.0125
Epoch 867 | Loss 0.0052
Epoch 868 | Loss 0.0060
Epoch 869 | Loss 0.0509
Epoch 869 | MeanScore 0.92
Epoch 870 | Loss 0.0431
Epoch 871 | Loss 0.0276
Epoch 872 | Loss 0.0242
Epoch 873 | Loss 0.0272
Epoch 874 | Loss 0.0332
Epoch 874 | MeanScore 0.86
Epoch 875 | Loss 0.0430
Epoch 876 | Loss 0.0162
Epoch 877 | Loss 0.0233
Epoch 878 | Loss 0.

Epoch 1119 | Loss 0.0117
Epoch 1119 | MeanScore 1.00
Epoch 1120 | Loss 0.0043
Epoch 1121 | Loss 0.0133
Epoch 1122 | Loss 0.1479
Epoch 1123 | Loss 0.1279
Epoch 1124 | Loss 0.0429
Epoch 1124 | MeanScore 0.98
Epoch 1125 | Loss 0.0208
Epoch 1126 | Loss 0.0235
Epoch 1127 | Loss 0.0130
Epoch 1128 | Loss 0.0076
Epoch 1129 | Loss 0.1058
Epoch 1129 | MeanScore 0.94
Epoch 1130 | Loss 0.0624
Epoch 1131 | Loss 0.0368
Epoch 1132 | Loss 0.0110
Epoch 1133 | Loss 0.0104
Epoch 1134 | Loss 0.0154
Epoch 1134 | MeanScore 0.94
Epoch 1135 | Loss 0.0083
Epoch 1136 | Loss 0.0083
Epoch 1137 | Loss 0.0837
Epoch 1138 | Loss 0.0653
Epoch 1139 | Loss 0.0986
Epoch 1139 | MeanScore 0.96
Epoch 1140 | Loss 0.0463
Epoch 1141 | Loss 0.0445
Epoch 1142 | Loss 0.0360
Epoch 1143 | Loss 0.0168
Epoch 1144 | Loss 0.0185
Epoch 1144 | MeanScore 0.94
Epoch 1145 | Loss 0.0069
Epoch 1146 | Loss 0.0061
Epoch 1147 | Loss 0.0690
Epoch 1148 | Loss 0.0221
Epoch 1149 | Loss 0.0093
Epoch 1149 | MeanScore 0.96
Epoch 1150 | Loss 0.0068
Epoc

Epoch 1387 | Loss 0.0079
Epoch 1388 | Loss 0.0106
Epoch 1389 | Loss 0.0048
Epoch 1389 | MeanScore 1.00
Epoch 1390 | Loss 0.0140
Epoch 1391 | Loss 0.0127
Epoch 1392 | Loss 0.0091
Epoch 1393 | Loss 0.0090
Epoch 1394 | Loss 0.0164
Epoch 1394 | MeanScore 1.00
Epoch 1395 | Loss 0.0210
Epoch 1396 | Loss 0.0252
Epoch 1397 | Loss 0.0120
Epoch 1398 | Loss 0.0156
Epoch 1399 | Loss 0.0139
Epoch 1399 | MeanScore 0.90
Epoch 1400 | Loss 0.1113
Epoch 1401 | Loss 0.0504
Epoch 1402 | Loss 0.0198
Epoch 1403 | Loss 0.0243
Epoch 1404 | Loss 0.0085
Epoch 1404 | MeanScore 0.92
Epoch 1405 | Loss 0.0140
Epoch 1406 | Loss 0.0067
Epoch 1407 | Loss 0.0071
Epoch 1408 | Loss 0.0065
Epoch 1409 | Loss 0.0066
Epoch 1409 | MeanScore 0.96
Epoch 1410 | Loss 0.0044
Epoch 1411 | Loss 0.0058
Epoch 1412 | Loss 0.0030
Epoch 1413 | Loss 0.1766
Epoch 1414 | Loss 0.0322
Epoch 1414 | MeanScore 0.96
Epoch 1415 | Loss 0.0054
Epoch 1416 | Loss 0.0173
Epoch 1417 | Loss 0.0111
Epoch 1418 | Loss 0.0793
Epoch 1419 | Loss 0.0407
Epoch 1

Epoch 1655 | Loss 0.0035
Epoch 1656 | Loss 0.0226
Epoch 1657 | Loss 0.0122
Epoch 1658 | Loss 0.0234
Epoch 1659 | Loss 0.0123
Epoch 1659 | MeanScore 0.98
Epoch 1660 | Loss 0.0109
Epoch 1661 | Loss 0.0058
Epoch 1662 | Loss 0.0067
Epoch 1663 | Loss 0.0160
Epoch 1664 | Loss 0.0042
Epoch 1664 | MeanScore 0.96
Epoch 1665 | Loss 0.0135
Epoch 1666 | Loss 0.0072
Epoch 1667 | Loss 0.0212
Epoch 1668 | Loss 0.0070
Epoch 1669 | Loss 0.0049
Epoch 1669 | MeanScore 0.96
Epoch 1670 | Loss 0.0051
Epoch 1671 | Loss 0.0042
Epoch 1672 | Loss 0.0072
Epoch 1673 | Loss 0.0072
Epoch 1674 | Loss 0.0076
Epoch 1674 | MeanScore 1.00
Epoch 1675 | Loss 0.0051
Epoch 1676 | Loss 0.0069
Epoch 1677 | Loss 0.0039
Epoch 1678 | Loss 0.0037
Epoch 1679 | Loss 0.0022
Epoch 1679 | MeanScore 0.98
Epoch 1680 | Loss 0.0020
Epoch 1681 | Loss 0.0028
Epoch 1682 | Loss 0.0037
Epoch 1683 | Loss 0.0084
Epoch 1684 | Loss 0.0126
Epoch 1684 | MeanScore 0.96
Epoch 1685 | Loss 0.0053
Epoch 1686 | Loss 0.0033
Epoch 1687 | Loss 0.0030
Epoch 1

KeyboardInterrupt: 

In [221]:
os.makedirs(".//configs", exist_ok=True)
os.makedirs(".//data", exist_ok=True)
os.makedirs(".//figs", exist_ok=True)

torch.save(model, "configs//" + test_name + ".ptm")

loss.to_csv("data//" + test_name + "_loss.csv", index=False)
test_on_train.to_csv("data//" + test_name + "_tests_in_training.csv", index=False)
tests.to_csv("data//" + test_name + "_tests.csv", index=False)


  "type " + obj.__name__ + ". It won't be checked "


# WORKSPACE


In [None]:
x = torch.randn(10)

In [222]:
model2 = torch.load("configs//" + test_name + ".ptm")

In [120]:
a = action.data.max(1)[0].view(1, 1).numpy()[0]

In [182]:
inputs, targets = exp.sample(model, batch_size=10)

In [172]:
input_tm1 = c.observe(flatten=True, expand_dim=True)

In [223]:
temp = torch.from_numpy(input_tm1).type(Tensor)
temp = Variable(temp)
q = model2(temp).data.max(1)[1].view(1, 1).numpy()
action = q[0][0]

In [205]:
height = 11
width=13
m = False
q_size = 1
i=0
test_name = "RUN" + str(i) + "_Mask_" + str(int(m)) + "_13x11_Q" + str(q_size)+"_MLP"

c = Catch(screen_height=height, screen_width=width, output_buffer_size=q_size,
          game_over_conditions = {'ball_deletions': 10}, mask=m, ball_spawn_rate=11,)

In [319]:
queue = Queued_DQN(input_size, input_size*4, 3, 4)

In [599]:
c.act(1)
bb(Variable(torch.from_numpy(c.observe(flatten=True, expand_dim=True)).type(Tensor)))

Variable containing:
 0.5667  0.2255  0.2753
[torch.FloatTensor of size 1x3]

In [391]:
output = Variable(torch.from_numpy(c.observe(flatten=True, expand_dim=True)).type(Tensor))

In [393]:
output.requires_grad = False
