In [1]:
%pylab inline

Populating the interactive namespace from numpy and matplotlib


In [16]:
import gym
import math
import random
import numpy as np
import matplotlib
import matplotlib.pyplot as plt
from collections import namedtuple
from itertools import count
from copy import deepcopy
from PIL import Image
import os 
from torch.nn import parameter
Parameter = parameter.Parameter

import torch
import torch.nn as nn
import torch.optim as optim
import torch.nn.functional as F
from torch.autograd import Variable, Function
import torchvision.transforms as T
from catch import Catch

# if gpu is to be used
use_cuda = torch.cuda.is_available()
FloatTensor = torch.cuda.FloatTensor if use_cuda else torch.FloatTensor
LongTensor = torch.cuda.LongTensor if use_cuda else torch.LongTensor
ByteTensor = torch.cuda.ByteTensor if use_cuda else torch.ByteTensor
Tensor = FloatTensor

In [3]:
env = gym.make('CartPole-v0').unwrapped

# set up matplotlib
is_ipython = 'inline' in matplotlib.get_backend()
if is_ipython:
    from IPython import display

plt.ion()

# if gpu is to be used
use_cuda = torch.cuda.is_available()
FloatTensor = torch.cuda.FloatTensor if use_cuda else torch.FloatTensor
LongTensor = torch.cuda.LongTensor if use_cuda else torch.LongTensor
ByteTensor = torch.cuda.ByteTensor if use_cuda else torch.ByteTensor
Tensor = FloatTensor

In [4]:
Transition = namedtuple('Transition',
                        ('state', 'action', 'next_state', 'reward'))

class ReplayMemory(object):
    def __init__(self, max_memory=100, discount=.9):
        self.max_memory = max_memory
        self.memory = []
        self.discount = discount
        self.current_memory = []

    def push(self, states, game_over, ball_over):
        # memory[i] = [[state_t, action_t, reward_t, state_t+1], game_over?]
        self.current_memory.append([states, game_over])
        if ball_over or game_over:
            self.memory.append(self.current_memory)
            if len(self.memory) > self.max_memory:
                del self.memory[0]
            self.current_memory = []


    def sample(self, model, batch_size=1):
        len_memory = len(self.memory)
        num_actions = model.output_shape[-1]

        Oin = None
        Otar = None
        for i, idx in enumerate(np.random.randint(0, len_memory,
                                                  size=min(len_memory, batch_size))):

            mems = self.memory[idx]
            for mem in mems:
                state_t, action_t, reward_t, state_tp1 = mem[0]
                game_over = mem[1]
                inputs = state_t
                temp_state_t = Variable(torch.from_numpy(state_t).type(Tensor))
                temp_state_tp1 = Variable(torch.from_numpy(state_tp1).type(Tensor))
                
                # There should be no target values for actions not taken.
                # Thou shalt not correct actions not taken #deep
                targets = model(temp_state_t).data.numpy()[0]
                Q_sa = model(temp_state_tp1).data.max(1)[0].view(1, 1).numpy()[0][0]
                
                if game_over:  # if game_over is True
                    targets[action_t] = reward_t
                else:
                    # reward_t + gamma * max_a' Q(s', a')
                    targets[action_t] = reward_t + self.discount * Q_sa
                if Oin is None:
                    Oin = inputs
                    Otar = np.expand_dims(targets, axis=0)
                else:
                    Oin = np.concatenate([Oin, inputs], axis=0)
                    Otar = np.concatenate([Otar, np.expand_dims(targets, axis=0)], axis=0)
        return Oin, Otar
    


In [108]:


class fifo_queue(Function):
    def forward(self, old_queue, inputs):
        outputs = None
        queue = old_queue.clone()
        for temp in Variable(inputs).data.numpy():
            queue = torch.cat((queue, torch.from_numpy(temp).type(Tensor).unsqueeze(0)), 0)
            queue = queue[1:]
            if outputs is None:
                outputs = queue.clone().unsqueeze(0)
            else:
                outputs = torch.cat((outputs, queue.unsqueeze(0)), 0)
        
        self.mark_non_differentiable(outputs)
        return outputs, queue
    
    def backward(self, output_grad):
        return None


class Queue(nn.Module):
    def __init__(self, input_features, out_features, queue_size, bias=None):
        super(Queue, self).__init__()
        # Setup the Queue
        self.input_features = input_features
        self.queue_size = queue_size
        self._reset_queue()
        
        # Init the queue function
        self.fifo_queue = fifo_queue()

        # Setup the weights to be used in the linear function later. 
        self.weight = Parameter(torch.Tensor(out_features, input_features*queue_size))
        if bias:
            self.bias = Parameter(torch.Tensor(out_features))
        else:
            self.register_parameter('bias', None)
        # Init the parameters
        self.reset_parameters()
        
    def reset_parameters(self):
        stdv = 1. / math.sqrt(self.weight.size(1))
        self.weight.data.uniform_(-stdv, stdv)
        if self.bias is not None:
            self.bias.data.uniform_(-stdv, stdv)
            
    def forward(self, inputs):
        # The actual call that is run when you pass data into this model. 
        
        x = self.fifo_queue(Variable(self.queue), inputs)
        output = x[0]
        self.queue = x[1].data
        #output, self.queue 
        # Return 
        output = output.view(output.size()[0], -1)
        return F.linear(output, self.weight, self.bias)
            
    # reunit the queue
    def _reset_queue(self):
        if type(self.input_features) is list:
            queue = torch.zeros([self.queue_size]+self.input_features)
        else:
            queue = torch.zeros([self.queue_size]+[self.input_features])
        self.queue = queue
        
        self.saved_queue = None
        #self.register_buffer("queue", queue)
        self.queue.requires_grad = False
        
    def save_queue(self):
        self.saved_queue = self.queue.clone()
        
    def load_queue(self):
        self.queue = self.saved_queue.clone()
        self.saved_queue = None

"""class Queue(nn.Module):
    def __init__(self, input_features, out_features, queue_size):
        super(Queue, self).__init__()
        self.input_features = input_features
        self.queue_size = queue_size
        # Setup the queue
        if type(input_features) is list:
            self.queue = np.zeros([queue_size]+input_features)
        else:
            self.queue = np.zeros([queue_size]+[input_features])


    def forward(self, inputs):
        # The actual call that is run when you pass data into this model. 
        output = None
        for temp in inputs.data.numpy():
            #print(self.queue[:, 1:].size(), torch.from_numpy(temp).type(Tensor).unsqueeze(0).size())
            self.queue = np.concatenate((self.queue, np.expand_dims(temp, 0)), 0)
            self.queue = self.queue[1:]
            if output is None:
                output = np.expand_dims(self.queue, 0)
            else:
                output = np.concatenate((output, np.expand_dims(self.queue, 0)), 0)
        # Return 
        output = Variable(torch.from_numpy(output).type(Tensor))
        #output.requires_grad = False
        output = output.view(output.size()[0], -1) # FLATTEN and make into a Variable for Torch
        return output
            
    # reunit the queue
    def _reset_queue(self):
        if type(self.input_features) is list:
            self.queue = np.zeros([self.queue_size]+self.input_features)
        else:
            self.queue = np.zeros([self.queue_size]+[self.input_features])
"""

'class Queue(nn.Module):\n    def __init__(self, input_features, out_features, queue_size):\n        super(Queue, self).__init__()\n        self.input_features = input_features\n        self.queue_size = queue_size\n        # Setup the queue\n        if type(input_features) is list:\n            self.queue = np.zeros([queue_size]+input_features)\n        else:\n            self.queue = np.zeros([queue_size]+[input_features])\n\n\n    def forward(self, inputs):\n        # The actual call that is run when you pass data into this model. \n        output = None\n        for temp in inputs.data.numpy():\n            #print(self.queue[:, 1:].size(), torch.from_numpy(temp).type(Tensor).unsqueeze(0).size())\n            self.queue = np.concatenate((self.queue, np.expand_dims(temp, 0)), 0)\n            self.queue = self.queue[1:]\n            if output is None:\n                output = np.expand_dims(self.queue, 0)\n            else:\n                output = np.concatenate((output, np.expand_

In [109]:
class Queued_DQN(nn.Module):
    def __init__(self, input_size, hidden_size, num_actions, queue_size):
        super(Queued_DQN, self).__init__()
        self.queue = Queue(input_size, input_size*queue_size, queue_size)
        #self.lin1 = nn.Linear(input_size*queue_size, hidden_size)
        self.lin2 = nn.Linear(hidden_size, hidden_size)
        self.lin3 = nn.Linear(hidden_size, 1)
        self.output_shape = [1]
        
    def forward(self, x):
        # Replace queue with another Linear and you 
        # have the same network we use for everything else. 
        # This is the model
        x = F.relu(self.queue(x))
        x = F.relu(self.lin2(x))
        x = self.lin3(x)
        return x
    
    def reset_queue(self):
        self.queue._reset_queue()

In [80]:
"""class DQN(nn.Module):
    def __init__(self, input_size, hidden_size, num_actions):
        super(DQN, self).__init__()
        self.lin1 = nn.Linear(input_size, hidden_size)
        self.lin2 = nn.Linear(hidden_size, hidden_size)
        self.lin3 = nn.Linear(hidden_size, num_actions)
        self.output_shape = [1, num_actions]
        
    def forward(self, x):
        x = F.relu(self.lin1(x))
        x = F.relu(self.lin2(x))
        return self.lin3(x)"""
    

'class DQN(nn.Module):\n    def __init__(self, input_size, hidden_size, num_actions):\n        super(DQN, self).__init__()\n        self.lin1 = nn.Linear(input_size, hidden_size)\n        self.lin2 = nn.Linear(hidden_size, hidden_size)\n        self.lin3 = nn.Linear(hidden_size, num_actions)\n        self.output_shape = [1, num_actions]\n        \n    def forward(self, x):\n        x = F.relu(self.lin1(x))\n        x = F.relu(self.lin2(x))\n        return self.lin3(x)'

In [102]:
q_size = 2

epochs = 1000
input_size = 1
batch_size = 1000
model = Queued_DQN(1, 50, 1, 1)
#model = DQN(input_size, input_size, 3)
optimizer = optim.Adagrad(model.parameters())


In [103]:

for y in range(epochs):
    training = np.random.randint(0, 2, batch_size)
    previous = 0
    expected_targets = []
    for x in training:
        expected_targets.append(int(np.logical_xor(previous, x)))
        previous = x
    expected_targets = np.array(expected_targets)
    training = np.expand_dims(training, -1)
    targets = model(Variable(torch.from_numpy(training).type(Tensor)))
    expected_targets = Variable(torch.from_numpy(expected_targets).type(Tensor))
    
    # Compute Huber loss
    loss = F.mse_loss(targets, expected_targets)
    # Optimize the model
    optimizer.zero_grad()
    loss.backward()
    for param in model.parameters():
        param.grad.data.clamp_(-1, 1)
    optimizer.step()

    if y% 100 == 0:
        print("epoch", y)

epoch 0
epoch 100
epoch 200
epoch 300
epoch 400
epoch 500
epoch 600
epoch 700
epoch 800
epoch 900


In [105]:
testing = np.random.randint(0, 2, batch_size)
previous = [0]
expected_targets = []
for x in testing:
    expected_targets.append(int(np.logical_xor(previous, x)))
    previous = x
expected_targets = np.array(expected_targets)
testing = np.expand_dims(testing, -1)

In [106]:
targets = model(Variable(torch.from_numpy(testing).type(Tensor)))


In [107]:
for x in zip(testing, expected_targets, targets):
    print(x[0], x[1], x[2].data.numpy())

[0] 0 [ 0.49928012]
[0] 0 [ 0.49928012]
[1] 1 [ 0.49572316]
[1] 0 [ 0.49572316]
[0] 1 [ 0.49928012]
[0] 0 [ 0.49928012]
[0] 0 [ 0.49928012]
[1] 1 [ 0.49572316]
[0] 1 [ 0.49928012]
[1] 1 [ 0.49572316]
[1] 0 [ 0.49572316]
[0] 1 [ 0.49928012]
[1] 1 [ 0.49572316]
[1] 0 [ 0.49572316]
[0] 1 [ 0.49928012]
[1] 1 [ 0.49572316]
[0] 1 [ 0.49928012]
[1] 1 [ 0.49572316]
[0] 1 [ 0.49928012]
[0] 0 [ 0.49928012]
[1] 1 [ 0.49572316]
[0] 1 [ 0.49928012]
[0] 0 [ 0.49928012]
[0] 0 [ 0.49928012]
[1] 1 [ 0.49572316]
[1] 0 [ 0.49572316]
[1] 0 [ 0.49572316]
[0] 1 [ 0.49928012]
[0] 0 [ 0.49928012]
[0] 0 [ 0.49928012]
[1] 1 [ 0.49572316]
[0] 1 [ 0.49928012]
[1] 1 [ 0.49572316]
[0] 1 [ 0.49928012]
[0] 0 [ 0.49928012]
[1] 1 [ 0.49572316]
[0] 1 [ 0.49928012]
[0] 0 [ 0.49928012]
[0] 0 [ 0.49928012]
[0] 0 [ 0.49928012]
[1] 1 [ 0.49572316]
[1] 0 [ 0.49572316]
[0] 1 [ 0.49928012]
[1] 1 [ 0.49572316]
[0] 1 [ 0.49928012]
[0] 0 [ 0.49928012]
[0] 0 [ 0.49928012]
[0] 0 [ 0.49928012]
[0] 0 [ 0.49928012]
[0] 0 [ 0.49928012]


In [96]:
np.logical_xor(1,1)

False

# WORKSPACE


In [None]:
x = torch.randn(10)

In [222]:
model2 = torch.load("configs//" + test_name + ".ptm")

In [120]:
a = action.data.max(1)[0].view(1, 1).numpy()[0]

In [182]:
inputs, targets = exp.sample(model, batch_size=10)

In [172]:
input_tm1 = c.observe(flatten=True, expand_dim=True)

In [223]:
temp = torch.from_numpy(input_tm1).type(Tensor)
temp = Variable(temp)
q = model2(temp).data.max(1)[1].view(1, 1).numpy()
action = q[0][0]

In [205]:
height = 11
width=13
m = False
q_size = 1
i=0
test_name = "RUN" + str(i) + "_Mask_" + str(int(m)) + "_13x11_Q" + str(q_size)+"_MLP"

c = Catch(screen_height=height, screen_width=width, output_buffer_size=q_size,
          game_over_conditions = {'ball_deletions': 10}, mask=m, ball_spawn_rate=11,)

In [319]:
queue = Queued_DQN(input_size, input_size*4, 3, 4)

In [599]:
c.act(1)
bb(Variable(torch.from_numpy(c.observe(flatten=True, expand_dim=True)).type(Tensor)))

Variable containing:
 0.5667  0.2255  0.2753
[torch.FloatTensor of size 1x3]

In [391]:
output = Variable(torch.from_numpy(c.observe(flatten=True, expand_dim=True)).type(Tensor))

In [393]:
output.requires_grad = False
