In [1]:
import gym
import universe

In [2]:
load_id = 'slither-AE-LSTM-final-n' # base case, LSTM 512, 512
# load_id = 'test_universe-AE-LSTM-C5' + '-ext' # with attentional transition, 512, 512, 4
# load_id = 'test_universe-AE-LSTM-C5' + '-large' # with larger state, 2048

In [3]:
# Code referenced from https://gist.github.com/gyglim/1f8dfb1b5c82627ae3efcfbbadb9f514
import tensorflow as tf
import numpy as np
import scipy.misc 
try:
    from StringIO import StringIO  # Python 2.7
except ImportError:
    from io import BytesIO         # Python 3.00


class Logger(object):
    
    def __init__(self, log_dir):
        """Create a summary writer logging to log_dir."""
        self.writer = tf.summary.FileWriter(log_dir)

    def scalar_summary(self, tag, value, step):
        """Log a scalar variable."""
        summary = tf.Summary(value=[tf.Summary.Value(tag=tag, simple_value=value)])
        self.writer.add_summary(summary, step)

    def image_summary(self, tag, images, step):
        """Log a list of images."""

        img_summaries = []
        for i, img in enumerate(images):
            # Write the image to a string
            try:
                s = StringIO()
            except:
                s = BytesIO()
            scipy.misc.toimage(img).save(s, format="png")

            # Create an Image object
            img_sum = tf.Summary.Image(encoded_image_string=s.getvalue(),
                                       height=img.shape[0],
                                       width=img.shape[1])
            # Create a Summary value
            img_summaries.append(tf.Summary.Value(tag='%s/%d' % (tag, i), image=img_sum))

        # Create and write Summary
        summary = tf.Summary(value=img_summaries)
        self.writer.add_summary(summary, step)
        
    def pltfig_summary(self, tag, images, step):
        """Log a list of images."""

        img_summaries = []
        for i, img in enumerate(images):
            # Write the image to a string
            try:
                s = StringIO()
            except:
                s = BytesIO()
            img.savefig(s, format="png")

            # Create an Image object
            shape = (img.get_dpi() * img.get_size_inches()).astype(int) # w, h
            img_sum = tf.Summary.Image(encoded_image_string=s.getvalue(),
                                       height=shape[1],
                                       width=shape[0])
            # Create a Summary value
            img_summaries.append(tf.Summary.Value(tag='%s/%d' % (tag, i), image=img_sum))

        # Create and write Summary
        summary = tf.Summary(value=img_summaries)
        self.writer.add_summary(summary, step)
        self.writer.flush()
        
    def histo_summary(self, tag, values, step, bins=1000):
        """Log a histogram of the tensor of values."""

        # Create a histogram using numpy
        counts, bin_edges = np.histogram(values, bins=bins)

        # Fill the fields of the histogram proto
        hist = tf.HistogramProto()
        hist.min = float(np.min(values))
        hist.max = float(np.max(values))
        hist.num = int(np.prod(values.shape))
        hist.sum = float(np.sum(values))
        hist.sum_squares = float(np.sum(values**2))

        # Drop the start of the first bin
        bin_edges = bin_edges[1:]

        # Add bin edges and counts
        for edge in bin_edges:
            hist.bucket_limit.append(edge)
        for c in counts:
            hist.bucket.append(c)

        # Create and write Summary
        summary = tf.Summary(value=[tf.Summary.Value(tag=tag, histo=hist)])
        self.writer.add_summary(summary, step)
        self.writer.flush()

log_dir = '/tmp/tb/' + nb_id
! rm -r $log_dir

In [4]:
import math

import time
import random
import pickle

from collections import deque

%matplotlib notebook
import matplotlib as mpl
plt = mpl.pyplot
# mpl.pylab.rcParams['figure.figsize'] = (12, 9)

import numpy as np
import torch as tch
F = tch.nn.functional

In [5]:
tch.cuda.is_available()

True

In [6]:
# pytorch auxiliar functions
def np2var(input):
    output = tch.autograd.Variable(tch.from_numpy(input))
    if tch.cuda.is_available():
        output = output.cuda()
    return output

# tanh2sigmoid = lambda x: (x + 1) / 2

def postprocess(tch_img):
    return (tch_img * 255).byte().data.cpu().numpy().squeeze()

# screen slicing00
top = 86
left = 20 + 100
height = 300
width = 500 - 200
gamescreen_slice = (slice(top,  top + height), slice(left, left + width))
game_center = (top + height // 2, left + width // 2) # y, x
game_center = game_center[::-1]

# average pooling for downscaling
s = downscale_size = 2
pool2d_downscale = tch.nn.AvgPool2d((s, s), stride=(s, s))
downscale_n = 1
downscale = downscale_size ** downscale_n
dummy_img = np.zeros((height // downscale, width // downscale, 1))

In [7]:
class CNN_encoder(tch.nn.Module):
    def __init__(self, n_convs=5, out_ch=128, act=F.selu):
        
        super(CNN_encoder, self).__init__()
        
        self.n_convs = n_convs
        self.kernel_size = (4, 4)
        self.out_ch = out_ch
        self.act = act
        
        self.conv_layers = tch.nn.ModuleList()
        self.conv_layers.append(tch.nn.Conv2d(1, 16, self.kernel_size, stride=(2, 2), padding=(2, 2)))
        self.conv_layers.append(tch.nn.Conv2d(16, 16, self.kernel_size, stride=(2, 2), padding=(1, 1)))
        self.conv_layers.append(tch.nn.Conv2d(16, 32, self.kernel_size, stride=(2, 2), padding=(2, 2)))
        self.conv_layers.append(tch.nn.Conv2d(32, 64, self.kernel_size, stride=(2, 2), padding=(1, 1)))
        self.conv_layers.append(tch.nn.Conv2d(64, 128, self.kernel_size, stride=(2, 2), padding=(1, 1)))
        self.conv_layers.append(tch.nn.Conv2d(128, 512, (5, 5), stride=(1, 1), padding=(0, 0)))
            
        self.last_conv_unrolled_size = 512
        self.fc_sizes = []
        self.fc_layers = tch.nn.ModuleList()
        in_size = self.last_conv_unrolled_size
        for out_size in self.fc_sizes:
            self.fc_layers.append(tch.nn.Linear(in_size, out_size))
            in_size = out_size
            
        self.conv_sizes = []
            
    def forward(self, input):
        
        self.conv_sizes = []

        output = input
        for layer in list(self.conv_layers):
            self.conv_sizes.append(tuple(output.size()))
            output = self.act(layer(output))
        self.conv_sizes.append(tuple(output.size()))
        
        output = output.view(-1, self.last_conv_unrolled_size)
        
        for layer in self.fc_layers:
            output = self.act(layer(output))
        
        return output

encoder = CNN_encoder()

# for param in encoder.parameters():
#     n = np.prod(list(param.size())[-3:]) # valid for 2D convolutions
#     stdv = 2. / math.sqrt(n)
#     param.data.uniform_(-stdv, stdv)

if tch.cuda.is_available():
    encoder = encoder.cuda()

dummy_img_T = dummy_img.transpose(2, 0, 1)
dummy_img_T = np2var(dummy_img_T).float().unsqueeze(0)
_ = encoder(dummy_img_T)

encoder.conv_sizes

class CNN_decoder(tch.nn.Module):
    def __init__(self, encoder):
        
        super(CNN_decoder, self).__init__()
        
        self.act = encoder.act
        
        self.input_size = encoder.fc_sizes[-1] if len(encoder.fc_sizes) > 0 else None
        self.fc_sizes = (encoder.fc_sizes[-2::-1] + [encoder.last_conv_unrolled_size] 
                         if len(encoder.fc_sizes) > 0 else [])
        
        self.last_conv_shape = (-1, ) + encoder.conv_sizes[-1][1:]
        
        self.fc_layers = tch.nn.ModuleList()

        in_size = self.input_size
        for out_size in self.fc_sizes:
            self.fc_layers.append(tch.nn.Linear(in_size, out_size))
            in_size = out_size
            
            
        self.convtrans_layers = tch.nn.ModuleList()
        in_ch = encoder.conv_sizes[-1][1]
        for output_shape, encode_layer in zip(encoder.conv_sizes[-2::-1], tuple(encoder.conv_layers)[::-1]):
            out_ch = output_shape[1]
            self.convtrans_layers.append(tch.nn.ConvTranspose2d(in_ch, out_ch,
                                                                encode_layer.kernel_size,
                                                                encode_layer.stride,
                                                                encode_layer.padding))
            in_ch = out_ch
            
        
    def forward(self, input):
        
        output = input
        for layer in self.fc_layers:
            output = self.act(layer(output))
            
        output = output.view(*self.last_conv_shape)
        
        for layer in list(self.convtrans_layers)[:-1]:
            output = self.act(layer(output))
        output = tch.sigmoid(self.convtrans_layers[-1](output))
        
        return output
        

decoder = CNN_decoder(encoder)

# for param in decoder.parameters():
#     n = 1 if len(param.size()) == 1 else param.size()[0]
#     n *= np.prod(list(param.size())[-2:]) # valid for 2D convolutions
#     stdv = 2. / math.sqrt(n)
#     param.data.uniform_(-stdv, stdv)

if tch.cuda.is_available():
    decoder = decoder.cuda()

import torch
import torch.nn as nn
import torch.nn.functional as F
from torch.autograd import Variable

class customLSTMCell(nn.Module):
    def __init__(self, input_size, hidden_size, num_variants):
        super(customLSTMCell, self).__init__()
        self.hidden_size = hidden_size
        self.input_size = input_size
        self.num_variants = num_variants
        
        self.ih = nn.Linear(self.input_size, 4 * self.hidden_size * self.num_variants)
        self.hh = nn.Linear(self.hidden_size, 4 * self.hidden_size * self.num_variants)
        
        self.hhh = nn.Linear(self.hidden_size, self.num_variants)
        
    def forward(self, input, hidden):
        
        hx, cx = hidden
        
        gates = self.ih(input) + self.hh(hx)
        
        gates_weights = F.softmax(self.hhh(cx))
        gates = gates.view(-1, 4 * self.hidden_size, self.num_variants)
        gates = torch.matmul(gates, gates_weights.squeeze())        
        
        ingate, forgetgate, cellgate, outgate = gates.chunk(4, 1)
        
        ingate = F.sigmoid(ingate)
        forgetgate = F.sigmoid(forgetgate)
        cellgate = F.tanh(cellgate)
        outgate = F.sigmoid(outgate)
        
        cy = (forgetgate * cx) + (ingate * cellgate)
        hy = outgate * F.tanh(cy)

        return hy, cy

class LSTM_predictor(tch.nn.Module):
    
    def __init__(self, size, hidden_size):
        
        super(LSTM_predictor, self).__init__()
        
        self.cell = tch.nn.LSTMCell(size, hidden_size)
        
        self.output_layer = tch.nn.Linear(hidden_size, size)
        
        self.act = encoder.act
        
    def forward(self, hidden, input):
               
        hidden = self.cell(input, hidden)
        
        output = self.act(self.output_layer(hidden[0]))
        
        return hidden, output
        
    def zero_hidden(self):
        
        hidden = (tch.autograd.Variable(tch.zeros(1, self.cell.hidden_size)),
                  tch.autograd.Variable(tch.zeros(1, self.cell.hidden_size)))
        
        if tch.cuda.is_available():
            hidden = tuple(h.cuda() for h in hidden)
            
        return hidden # h_0, c_0
    
predictor = LSTM_predictor(512, 512)

if tch.cuda.is_available():
    predictor = predictor.cuda()
    
class dummy_module(tch.nn.Module):

    def __init__(self, encoder, decoder, predictor):
        super(dummy_module, self).__init__()

        self.encoder = encoder
        self.decoder = decoder
        self.predictor = predictor

    def forward(self):
        pass

dummy = dummy_module(encoder, decoder, predictor)

In [8]:
dummy_load = tch.load(load_id + 'dummy.pkl')
for module, module_load in zip((encoder, decoder, predictor), tuple(dummy_load.children())):
    for param, param_load in zip(module.parameters(), module_load.parameters()):
        param.data = param_load.data

# history = pickle.load(open(load_id, "rb"))
# encoder_load = tch.load(load_id + 'encoder')
# decoder_load = tch.load(load_id + 'decoder')
# predictor_load = tch.load(load_id + 'predictor')

# for module, module_load in zip((encoder, decoder, predictor), (encoder_load, decoder_load, predictor_load)):
#     for param, param_load in zip(module.parameters(), module_load.parameters()):
#         param.data = param_load.data

start = time.time()
assert decoder(encoder(dummy_img_T)).size() == dummy_img_T.size()
assert decoder(predictor(predictor.zero_hidden(), encoder(dummy_img_T))[1]).size() == dummy_img_T.size()
# fig.canvas.draw()
end = time.time()

end - start

assert decoder(encoder(dummy_img_T)).size() == dummy_img_T.size()
assert decoder(predictor(predictor.zero_hidden(), encoder(dummy_img_T))[1]).size() == dummy_img_T.size()

In [9]:
criterion = tch.nn.MSELoss()
bce_criterion = lambda x, y: tch.nn.BCELoss(size_average=True)(x, y)

ae_params = tuple(encoder.parameters()) + tuple(decoder.parameters())
rnn_params = tuple(predictor.parameters())
optimizer = tch.optim.SGD(ae_params + rnn_params, lr=1e-1, momentum=0.9)
# ae_optimizer = tch.optim.SGD(ae_params, lr=1e-2, momentum=0.99)
# rnn_optimizer = tch.optim.SGD(rnn_params, lr=1e-2, momentum=0.99)
# ae_optimizer = tch.optim.Adam(ae_params)
# rnn_optimizer = tch.optim.Adam(rnn_params)
# pred_optimizer = tch.optim.Adam(rnn_params, lr=1e-3)
# optimizer = tch.optim.Adam(ae_params, lr=1e-4, betas=(0.99, 0.9999))

In [10]:
step_counter = 0 + pickle.load(open(load_id + 'tsc.pickle', 'rb'))

plot_every = 1

buffer_size = 32

In [12]:
nb_id = 'slither-AE-LSTM-final-n'

log_dir = './tb_final/' + nb_id
# ! rm -r $log_dir

logger = Logger(log_dir)

In [13]:
import os

In [14]:
total_loss = 0
true_total_loss_ae = 0
true_total_loss_pred = 0

train_steps = 0

train_queue = deque(maxlen=buffer_size)

hidden = predictor.zero_hidden()

while True:
    
    with open('./game_dataset/' + random.choice(os.listdir('./game_dataset')), 'rb') as gamescreen_file:
              
        gamescreen_list = pickle.load(gamescreen_file)
        
        for gamescreen, _ in gamescreen_list:
            
            is_last = gamescreen is gamescreen_list[-1][0]
            
            gamescreen = np2var(gamescreen).float() / 255
            gamescreen = gamescreen.unsqueeze(0).unsqueeze(0)

            train_queue.append(gamescreen)

            if len(train_queue) == buffer_size or (is_last and len(train_queue) > 1):
                train_steps += 1
              
                # encode
                train_batch = tch.cat(train_queue, dim=0)
                coded_batch = encoder(train_batch)
                # autoencode
                decoded_batch = decoder(coded_batch)
                loss_ae = bce_criterion(decoded_batch, train_batch.detach()) * (len(train_queue) / buffer_size)
              
                # predict
                pred_list = []
                for code in coded_batch[:-1]:
                    code = code.unsqueeze(0)
                    # predict
                    hidden, pred_code = predictor(hidden, code.detach())
                    pred_list.append(pred_code)

                hidden = tuple(h.detach() for h in hidden)
            
                pred_coded_batch = tch.cat(pred_list, dim=0)
                loss_pred = criterion(pred_coded_batch, coded_batch[1:].detach()) * (len(train_queue) / buffer_size)
                
                true_loss = loss_ae + loss_pred
                
                #### here only for comparison with other runs
                pred_decoded_batch = decoder(pred_coded_batch)
                # normalizing for shorter `train_queue`s
                loss = bce_criterion(pred_decoded_batch.detach(), train_batch[1:].detach()) * (len(train_queue) / buffer_size)
                train_queue.clear()
                train_queue.append(gamescreen) # add last gamescreen as the first of the next batch
              
                # take step
                step_counter += 1

                optimizer.zero_grad()
                # encoding and prediction gradient
                true_loss.backward()
                # prediction and decoding gradient
                optimizer.step()

                if step_counter % 32 == 1:
                    name = 'encoder/ratio_'
                    for i, param in enumerate(encoder.parameters()):
                        logger.scalar_summary(name + str(i) + str(tuple(param.size())),
                                              param.grad.data.norm() / param.data.norm(), step_counter)
                    name = 'decoder/ratio_'
                    for i, param in enumerate(decoder.parameters()):
                        logger.scalar_summary(name + str(i) + str(tuple(param.size())),
                                              param.grad.data.norm() / param.data.norm(), step_counter)
                    name = 'rnn/ratio_'
                    for i, param in enumerate(rnn_params):
                        logger.scalar_summary(name + str(i) + str(tuple(param.size())),
                                              param.grad.data.norm() / param.data.norm(), step_counter)
                # accumulate
                total_loss += loss.data[0]
                true_total_loss_ae += loss_ae.data[0]
                true_total_loss_pred += loss_pred.data[0]
                
#                 # autoencoding
#                 decoded_gamescreen = decoded_batch[-1:]
#                 decoded_pred_gamescreen = decoder(pred_batch_coded[-1:])

#                 #displaying
#                 gamescreen = postprocess(gamescreen)
#                 decoded_gamescreen = postprocess(decoded_gamescreen)
#                 decoded_pred_gamescreen = postprocess(decoded_pred_gamescreen)
#                 imgs[0].set_data(gamescreen)
#                 imgs[1].set_data(decoded_gamescreen)
#                 imgs[2].set_data(decoded_pred_gamescreen)
#                 fig.canvas.draw()

        if train_steps > 0:
            logger.scalar_summary('loss', total_loss / train_steps, step_counter)
            logger.scalar_summary('true_loss_naive/ae', true_total_loss_ae / train_steps, step_counter)
            logger.scalar_summary('true_loss_naive/pred', true_total_loss_pred / train_steps, step_counter)

        total_loss = 0
        true_total_loss_ae = 0
        true_total_loss_pred = 0
        train_steps = 0

        hidden = predictor.zero_hidden()
        train_queue.clear()

        tch.save(encoder, nb_id + 'encoder.pkl')
        tch.save(decoder, nb_id + 'decoder.pkl')
        tch.save(predictor, nb_id + 'predictor.pkl')
        tch.save(dummy, nb_id + 'dummy.pkl')
        pickle.dump(step_counter, open(load_id + 'tsc.pickle', 'wb'))

  "type " + obj.__name__ + ". It won't be checked "
  "type " + obj.__name__ + ". It won't be checked "
  "type " + obj.__name__ + ". It won't be checked "
  "type " + obj.__name__ + ". It won't be checked "


KeyboardInterrupt: 