<a href="https://colab.research.google.com/github/ij264/Corpus-Drawing-Project/blob/master/sketchRNN.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [13]:
!pip install ipython-autotime

%load_ext autotime

The autotime extension is already loaded. To reload it, use:
  %reload_ext autotime
time: 2.33 s


In [14]:
# imports
import numpy as np
import matplotlib.pyplot as plt

import torch
import torch.nn as nn
import torch.nn.functional as F
from torch import optim
from torch.autograd import Variable

time: 2.36 ms


In [19]:
# hyperparameters
# UPDATE

hp = {
    'location': '/content/drive/Shared drives/Corpus Drawing Project/data/sketchrnn_airplane.npz',
    'Nz': 128,
    'batch_size': 1,
    'encoder_hidden_size': 256,
    'decoder_hidden_size': 512,
    'temperature': 0.9,
    'gradient_clipping': 1.0,
    'lr': 1e-4,
    'KL_min': 0.2,
    'R': 0.9999,
    'WKL': 1.0,
    'dropout_keep': 0.9
}

time: 5.72 ms


In [20]:
from google.colab import drive
drive.mount('/content/drive')

Drive already mounted at /content/drive; to attempt to forcibly remount, call drive.mount("/content/drive", force_remount=True).
time: 8.18 ms


In [21]:
# DONE
# returns maximum sequence length in stroke sequences in data
def max_size(data):
    sequences = [len(seq) for seq in data]
    return max(sequences)

time: 1.32 ms


In [22]:
data = np.load(hp['location'], encoding='latin1', allow_pickle=True)
training_data = data['train']
testing_data = data['test']

Nmax = max_size(training_data)
Nmax

99

time: 1.25 s


In [None]:
# UPDATE
# encoder RNN
class EncoderRNN(nn.Module):
    def __init__(self):
        super(EncoderRNN, self).__init__()

        # bidirectional LSTM 
        self.LSTM = nn.LSTM(input_size=5, # input vector is 5x1
                            hp['encoder_hidden_size'],
                            hp['decoder_hidden_size'],
                            num_layers=1,
                            bias=True,
                            batch_first=False,
                            bidirectional=True
                            )
        
        self.mu = nn.Linear(in_features=hp['encoder_hidden_size'],
                            hp.Nz)
        self.sigma = nn.Linear(in_features=hp['encoder_hidden_size'],
                               hp['Nz'])
        
        self.train()

    def forward(self, inputs, batch_size, hidden_cell=None):
        mu = self.mu(h)
        sigma_hat = self.sigma(h)
        sigma = torch.exp(sigma_hat/2)

In [25]:
# returns batches of size batch_size
def get_batch(data, batch_size):
    batch_idx = np.random.choice(len(data), batch_size) # creates array of random indices of length batch_size
    batch_sequence = [data[idx] for idx in batch_idx]
    strokes = []
    lengths = []

    for sequence in batch_sequence:
        sequence_len = len(sequence[:, 0]) # length of first column
        new_sequence = np.zeros((Nmax, 5)) # initalises empty sequence to store strokes. each row is in the form DeltaX, DeltaY, p1, p2, p3
        print(new_sequence)
        new_sequence[:sequence_len, :2] = sequence[:, :2] # initalises DeltaX, DeltaY
        print(new_sequence)
        new_sequence[:sequence_len - 1, 2] = 1-sequence[:-1, 2] # initalises p1
        print(new_sequence)
        new_sequence[:sequence_len, 3] = sequence[:, 2]
        print(new_sequence)
        new_sequence[(sequence_len - 1):, 4] = 1 # initialises p3
        print(new_sequence)
        new_sequence[sequence_len - 1, 2:4] = 0
        lengths.append(len(sequence[:, 0]))
        strokes.append(new_sequence)
    
    batch = Variable(torch.from_numpy(np.stack(strokes,1)).float())
    
    return batch, lengths

time: 13.7 ms


In [26]:
batch, lengths = get_batch(training_data, hp['batch_size'])
print(batch)

[[0. 0. 0. 0. 0.]
 [0. 0. 0. 0. 0.]
 [0. 0. 0. 0. 0.]
 [0. 0. 0. 0. 0.]
 [0. 0. 0. 0. 0.]
 [0. 0. 0. 0. 0.]
 [0. 0. 0. 0. 0.]
 [0. 0. 0. 0. 0.]
 [0. 0. 0. 0. 0.]
 [0. 0. 0. 0. 0.]
 [0. 0. 0. 0. 0.]
 [0. 0. 0. 0. 0.]
 [0. 0. 0. 0. 0.]
 [0. 0. 0. 0. 0.]
 [0. 0. 0. 0. 0.]
 [0. 0. 0. 0. 0.]
 [0. 0. 0. 0. 0.]
 [0. 0. 0. 0. 0.]
 [0. 0. 0. 0. 0.]
 [0. 0. 0. 0. 0.]
 [0. 0. 0. 0. 0.]
 [0. 0. 0. 0. 0.]
 [0. 0. 0. 0. 0.]
 [0. 0. 0. 0. 0.]
 [0. 0. 0. 0. 0.]
 [0. 0. 0. 0. 0.]
 [0. 0. 0. 0. 0.]
 [0. 0. 0. 0. 0.]
 [0. 0. 0. 0. 0.]
 [0. 0. 0. 0. 0.]
 [0. 0. 0. 0. 0.]
 [0. 0. 0. 0. 0.]
 [0. 0. 0. 0. 0.]
 [0. 0. 0. 0. 0.]
 [0. 0. 0. 0. 0.]
 [0. 0. 0. 0. 0.]
 [0. 0. 0. 0. 0.]
 [0. 0. 0. 0. 0.]
 [0. 0. 0. 0. 0.]
 [0. 0. 0. 0. 0.]
 [0. 0. 0. 0. 0.]
 [0. 0. 0. 0. 0.]
 [0. 0. 0. 0. 0.]
 [0. 0. 0. 0. 0.]
 [0. 0. 0. 0. 0.]
 [0. 0. 0. 0. 0.]
 [0. 0. 0. 0. 0.]
 [0. 0. 0. 0. 0.]
 [0. 0. 0. 0. 0.]
 [0. 0. 0. 0. 0.]
 [0. 0. 0. 0. 0.]
 [0. 0. 0. 0. 0.]
 [0. 0. 0. 0. 0.]
 [0. 0. 0. 0. 0.]
 [0. 0. 0. 0. 0.]
 [0. 0. 0.

In [None]:
# NN model: a bidirectional NN with LSTM
class Model():
    def __init__(self):

        # forward encoder
        self.encoder() = EncoderRNN()

        # backward encoder
        self.decoder() = DecoderRNN()

        # TODO: implement gradient clipping
        self.encoder_optimiser() = optim.Adam(self.encoder.parameters(), hp['lr'])
        self.decoder_optimiser() = optim.Adam(self.decoder.parameters(), hp['lr'])

    # bivariate normal distribution probability distribution function
    def bivariate_normal_PDF(Dx, Dy):

        z = (Dx - self.mu_x)**2/self.sigma_x**2 \
        - 2 * self.rho_xy * (Dx - self.mu_x) * (Dy - self.mu_y)/(self.sigma_x * self.sigma_y) \
        + (Dy - self.mu_y)**2/self.sigma_y**2
        prefactor = 1/(2 * np.pi * self.sigma_x * self.sigma_y * torch.sqrt(1 - self.rho_xy**2))

        return prefactor * torch.exp(-z/(2 * (1 - self.rho_xy**2)))

    # reconstruction loss
    def LR(self, Dx, Dy, p):
        PDF = bivariate_normal_PDF(Dx, Dy)
        LS = -1/float(N_max) * torch.sum(
            torch.log(
                torch.sum(self.Pi * PDF)
            )
        )
        LP = -1/float(N_max) * torch.sum(
            p * torch.log(self.q)
        )
        return LS + LP

    # KL divergence loss 
    def KL(self, Dx, Dy):
        return -1/(2 * float(hp['Nz'])) * torch.sum(1 + sigma_hat - torch.square(mu))

# EQ. 7 USE SOFTMAX IN PYTORCH
    q = nn.Softmax(self.q_hat) 

    def train(self, epoch):
        self.encoder.train()
        self.decoder.train()

        batch, lengths = get_batch(hp['batch_size'])
        
        total_loss = 0
        for i, (data, _) in enumerate(training_set):

        # `clip_grad_norm` helps prevent the exploding gradient problem 
        torch.nn.utils.clip_grad_norm_(model.parameters(), args.clip)