In [1]:
import torch
import torch.nn as nn
import torch.nn.functional as F
import numpy as np
import matplotlib.pyplot as plt
import PIL
import math

from torch import optim
from torch.autograd import Variable

# check the system we're on, gpu
use_cuda = torch.cuda.is_available()

np.load.__defaults__=(None, True, True, 'ASCII')

^C


ModuleNotFoundError: No module named 'torch'

In [None]:
"""
Notes on HyperParameters
"""
class HyperParameters():
    def __init__(self):
        self.data = '../Datasets/sketchrnn_chair.npz'
        self.encoder_hidden_size = 64
        self.decoder_hidden_size = 128
        # latent vector
        self.Nz = 32
        
        #return
        self.M = 20
        self.dropout = 0.9
        self.batch_size = 100
        self.eta_min = 0.01
        self.R = 0.99995
        self.KL_min = 0.2
        self.wKL = 0.5
        self.lr = 0.001
        self.lr_decay = 0.9999
        self.min_lr = 0.00001
        self.grad_clip = 1.
        self.temperature = 0.4
        self.max_seq_length = 200

hp = HyperParameters()

In [None]:
class DataLoader():
    def __init__(self, file):
        self.file = file
        
    def load_npz(self):
        data = np.load(file=self.file, encoding='latin1', allow_pickle=True)
        return data

dataLoader = DataLoader(hp.data)
data = dataLoader.load_npz()

In [None]:
"""
Notes on Data Handler
"""
class DataHandler:
    def __init__(self, data):
        self.data = data
    
    def padding(self, strokedata, max_len):
        s = torch.from_numpy(strokedata)
        s_len = s.shape[0]
        result = torch.zeros((max_len, 5))
        result[0:s_len, 0:2] = s[:, 0:2]
        result[0:s_len, 3] = s[:, 2]
        result[0:s_len, 2] = 1 - result[0:s_len, 3]    # 1 to 0, 0 to 1
        if s_len < max_len:
            result[(s_len - 1):, 4] = 1
        return result
        
    def handle(self):
        max_len = 0    # max length of a sketch - max number of stroke vectors in a sketch
        for datatype in self.data:
            for strokedata in self.data[datatype]:
                if strokedata.shape[0] > max_len:
                    max_len = strokedata.shape[0]
        # lengths of data - the number of sketchs in data
        train_len = self.data['train'].shape[0]
        test_len = self.data['test'].shape[0]
        valid_len = self.data['valid'].shape[0]
        
        data_train = torch.zeros((train_len, max_len, 5), dtype=float)
        data_test = torch.zeros((test_len, max_len, 5), dtype=float)
        data_valid = torch.zeros((valid_len, max_len, 5), dtype=float)
        
        i = j = k = 0
        for datatype in self.data:
            for strokedata in self.data[datatype]:
                if datatype == 'train':
                    data_train[i] = self.padding(strokedata, max_len)
                    i += 1
                if datatype == 'test':
                    data_test[j] = self.padding(strokedata, max_len)
                    j += 1
                if datatype == 'valid':
                    data_valid[k] = self.padding(strokedata, max_len)
                    k += 1 

        return data_train, data_test, data_valid, max_len
    
    def make_batch(self, data, batch_size):
        stroke_lengths = []
        data_len = data.shape[0] * data.shape[1]
        reshaped_data = data.reshape(data_len, 5)
        batch_num = math.ceil(data_len / batch_size)
        batches = torch.zeros((batch_num, batch_size, 5), dtype=float)
        i = j = 0
        for k in range(0, batch_num - 1):
            seq_len = len(reshaped_data[:,0])
            batches[i] = reshaped_data[j:j+batch_size]
            j += batch_size
            stroke_lengths.append(seq_len)
            i += 1
        batches[i] = reshaped_data[j:]
        if use_cuda:
            batches = batches.cuda()
        return batches, stroke_lengths
    
        
datahandler = DataHandler(data)
data_train, data_test, data_valid, max_length = datahandler.handle()
Nmax = max_length
# batches_test, stroke_lengths = datahandler.make_batch(data_test, hp.batch_size)
# print(batches_test.shape)

In [None]:
class Encoder(nn.Module):
    def __init__(self):
        super().__init__()
        # bidirectional lstm:
        self.lstm = nn.LSTM(5, hp.encoder_hidden_size, bidirectional=True)
        # create mu and sigma from lstm's last output:
        self.fc_mu = nn.Linear(2*hp.encoder_hidden_size, hp.Nz)
        self.fc_sigma = nn.Linear(2*hp.encoder_hidden_size, hp.Nz)
        # active dropout:
        self.train()

    def forward(self, inputs, batch_size, hidden_cell=None):
        if hidden_cell is None:
            # then must init with zeros
            if use_cuda:
                hidden = torch.zeros(2, batch_size, hp.encoder_hidden_size).cuda()
                cell = torch.zeros(2, batch_size, hp.encoder_hidden_size).cuda()
            else:
                hidden = torch.zeros(2, batch_size, hp.encoder_hidden_size)
                cell = torch.zeros(2, batch_size, hp.encoder_hidden_size)
            hidden_cell = (hidden, cell)
        _, (hidden,cell) = self.lstm(inputs.float(), hidden_cell)
        # hidden is (2, batch_size, hidden_size), we want (batch_size, 2*hidden_size):
        hidden_forward, hidden_backward = torch.split(hidden,1,0)
        hidden_cat = torch.cat([hidden_forward.squeeze(0), hidden_backward.squeeze(0)],1)
        # mu and sigma:
        mu = self.fc_mu(hidden_cat)
        sigma_hat = self.fc_sigma(hidden_cat)
        sigma = torch.exp(sigma_hat/2.)
        # N ~ N(0,1)
        z_size = mu.size()
                                   
        if use_cuda:
            N = torch.normal(torch.zeros(z_size),torch.ones(z_size)).cuda()
        else:
            N = torch.normal(torch.zeros(z_size),torch.ones(z_size))
        z = mu + sigma*N
        # mu and sigma_hat are needed for LKL loss
        return z, mu, sigma_hat
    
# encoder = Encoder()
# encoder.train()
# z, _, _ = encoder(batches_test, hp.batch_size)
# print(z.shape)

In [None]:
# a = torch.rand((3,1,5))
# b = torch.tensor([[1,2], 
#                   [3,4]])
# c = torch.tensor([[5,6],
#                   [7,8]])
# d = torch.stack([b.unsqueeze(0),c.unsqueeze(0)], 0)
# e = (b.unsqueeze(0).contiguous(),c.unsqueeze(0).contiguous())
# print(d.is_contiguous())
# print(d)
# print(e)
# # print(a.squeeze(0).shape)

In [18]:
"""
Notes on Decoder
"""
class Decoder(nn.Module):
    def __init__(self):
        super().__init__()
        self.lstm = nn.LSTM(hp.Nz + 5, hp.decoder_hidden_size)
        self.fc_hc = nn.Linear(hp.Nz, 2*hp.decoder_hidden_size)
        self.fc_output = nn.Linear(hp.decoder_hidden_size, 6 * hp.M + 3)
        
    def forward(self, inputs, z, hidden_cell=None):
        if hidden_cell is None:       
            hidden, cell = torch.split(F.tanh(self.fc_hc(z)), hp.decoder_hidden_size, 1)
            hidden_cell = (hidden.unsqueeze(0), cell.unsqueeze(0))
        outputs, (hidden, cell) = self.lstm(inputs, hidden_cell)
        if self.training:
            y = self.fc_output(outputs)
        else:
            y = self.fc_output(hidden)
        q = y[:,-2:].softmax(-1)
        rest = y[:,:-2]
        params = rest.reshape(-1, hp.M, 6)
        pi = params[:,:,0].softmax(-1)
        mu_x = params[:,:,1]
        mu_y = params[:,:,2]
        sigma_x = params[:,:,3].exp()
        sigma_y = params[:,:,4].exp()
        rho = params[:,:,5].tanh()
        return pi, mu_x, mu_y, sigma_x, sigma_y, rho, q, hidden, cell

In [23]:
"""
Notes on Model
"""
class Model():
    def __init__(self):
        if use_cuda:
            self.encoder = Encoder().cuda()
            self.decoder = Decoder().cuda()
        else:
            self.encoder = Encoder()
            self.decoder = Decoder()
            self.encoder_optimizer = optim.Adam(self.encoder.parameters(), hp.lr)
            self.decoder_optimizer = optim.Adam(self.decoder.parameters(), hp.lr)
            self.eta_step = hp.eta_min
           
    def make_target(self, batch, lengths):
        if use_cuda:
            eos = torch.stack([torch.Tensor([0,0,0,0,1])]*batch.size()[1]).cuda().unsqueeze(0)
        else:
            eos = torch.stack([torch.Tensor([0,0,0,0,1])]*batch.size()[1]).unsqueeze(0)
        batch = torch.cat([batch, eos], 0)
        mask = torch.zeros(Nmax+1, batch.size()[1])
        
        for indice,length in enumerate(lengths):
            mask[:length,indice] = 1
        if use_cuda:
            mask = mask.cuda()
            
        dx = torch.stack([batch.data[:,:,0]]*hp.M,2) # reduce the dimensions
        dy = torch.stack([batch.data[:,:,1]]*hp.M,2)
        p1 = batch.data[:,:,2]
        p2 = batch.data[:,:,3]
        p3 = batch.data[:,:,4]
        p = torch.stack([p1,p2,p3],2)
        
        return mask,dx,dy,p
    
    def train(self, epoch):
        # set the dropoff parameters for lstm
        self.encode().train
        self.decode().train
    
    def bivariate_normal_pdf(self, dx, dy):
        z_x = ((dx-self.mu_x)/self.sigma_x)**2
        z_y = ((dy-self.mu_y)/self.sigma_y)**2
        z_xy = (dx-self.mu_x)*(dy-self.mu_y)/(self.sigma_x*self.sigma_y)
        z = z_x + z_y -2*self.rho_xy*z_xy
        exp = torch.exp(-z/(2*(1-self.rho_xy**2)))
        norm = 2*np.pi*self.sigma_x*self.sigma_y*torch.sqrt(1-self.rho_xy**2)
        return exp/norm
    
    def reconstruction_loss(self, mask, dx, dy, p, epoch):
        pdf = self.bivariate_normal_pdf(dx, dy)
        LS = -torch.sum(mask*torch.log(1e-5+torch.sum(self.pi * pdf, 2)))\
        /float(Nmax*hp.batch_size)
        LP = -torch.sum(p*torch.log(self.q))/float(Nmax*hp.batch_size)
        return LS+LP

    def kullback_leibler_loss(self):
        LKL = -0.5*torch.sum(1+self.sigma-self.mu**2-torch.exp(self.sigma))\
        /float(hp.Nz*hp.batch_size)
        if use_cuda:
            KL_min = Variable(torch.Tensor([hp.KL_min]).cuda()).detach()
        else:
            KL_min = Variable(torch.Tensor([hp.KL_min])).detach()
        return hp.wKL*self.eta_step * torch.max(LKL,KL_min)
    
    def save(self, epoch):
        model_number = np.random.rand()
        torch.save(self.encoder.state_dict(), './outputs/encoderRNN_model_number_%3f_epoch_%d.pth' % (model_number,epoch))
        torch.save(self.decoder.state_dict(), './outputs/decoderRNN_model_number_%3f_epoch_%d.pth' % (model_number,epoch))

    def load(self, encoder_name, decoder_name):
        saved_encoder = torch.load(encoder_name)
        saved_decoder = torch.load(decoder_name)
        self.encoder.load_state_dict(saved_encoder)
        self.decoder.load_state_dict(saved_decoder)
    
    def conditional_generation(self, epoch):
        batch,lengths = make_batch(1)
        # should remove dropouts:
        self.encoder.train(False)
        self.decoder.train(False)
        # encode:
        z, _, _ = self.encoder(batch, 1)
        if use_cuda:
            sos = Variable(torch.Tensor([0,0,1,0,0]).view(1,1,-1).cuda())
        else:
            sos = Variable(torch.Tensor([0,0,1,0,0]).view(1,1,-1))
        s = sos
        seq_x = []
        seq_y = []
        seq_z = []
        hidden_cell = None
        for i in range(Nmax):
            input = torch.cat([s,z.unsqueeze(0)],2)
            # decode:
            self.pi, self.mu_x, self.mu_y, self.sigma_x, self.sigma_y, \
                self.rho_xy, self.q, hidden, cell = \
                    self.decoder(input, z, hidden_cell)
            hidden_cell = (hidden, cell)
            # sample from parameters:
            s, dx, dy, pen_down, eos = self.sample_next_state()
            #------
            seq_x.append(dx)
            seq_y.append(dy)
            seq_z.append(pen_down)
            if eos:
                print(i)
                break
        # visualize result:
        x_sample = np.cumsum(seq_x, 0)
        y_sample = np.cumsum(seq_y, 0)
        z_sample = np.array(seq_z)
        sequence = np.stack([x_sample,y_sample,z_sample]).T
        draw_sketch(sequence, epoch)
    
    #Used for conditional generation...
    def tweak_temperature(self, pi_pdf):
        # sigma x = exp(sigma_hat), sigma_hat = log(Sigma x)
        pi_pdf = np.log(pi_pdf)/hp.temperature
        pi_pdf -= pi_pdf.max()
        pi_pdf = np.exp(pi_pdf)
        pi_pdf /= pi_pdf.sum()
        return pi_pdf

    def sample_sketch_state(self):
        # Get GMM index for mixture distribution
        pi = self.pi.data[0,0,:].numpy()
        pi = tweak_temperature(pi)
        pi_idx = np.random.choice(hp.M, p=pi)

        # Get the pen state, or sketch state
        pi = self.q.data[0,0,:].numpy()
        q = tweak_temperature(q)
        q_idx = np.random.choice(3, p=q)

        # Get mixture parameters
        mu_x = self.mu_x.data[0,0,pi_idx]
        mu_y = self.mu_y.data[0,0,pi_idx]
        sigma_x = self.sigma_x.data[0,0,pi_idx]
        sigma_y = self.sigma_y.data[0,0,pi_idx]
        rho_xy = self.rho_xy.data[0,0,pi_idx]
        x,y = sample_bivariate_normal(mu_x,mu_y,sigma_x,sigma_y,rho_xy,greedy=False)
        next_state = torch.zeros(5)
        next_state[0] = x
        next_state[1] = y
        next_state[q_idx+2] = 1
        return Variable(next_state).view(1,1,-1),x,y,q_idx==1,q_idx==2

In [20]:
# take the parameters to form the normal distribution and sample from it
def sample_bivariate_normal(mu_x,mu_y,sigma_x,sigma_y,rho_xy, greedy=False):
    if greedy:
        return mu_x, mu_y
    mean = [mu_x,mu_y]
    # https://mathworld.wolfram.com/BivariateNormalDistribution.html
    # to read more about bivariate normal and it's parameters
    sigma_x *= np.sqrt(hp.temperature)
    sigma_y *= np.sqrt(hp.temperature)
    covariance = [[sigma_x * sigma_x, rho_xy * sigma_x * sigma_y],[rho_xy * sigma_x * sigma_y, sigma_y * sigma_y]]
    x = np.random.multivariate_normal(mean, covariance, 1)
    return x[0][0], x[0][1]

# Will sketch the image out using different colours for the strokes
def draw_sketch(sequence, epoch, name='_output_'):
    strokes = np.split(sequence, np.where(sequence[:,2]>0)[0]+1)
    fig = plt.figure()
    axis_1 = fig.add_subplot(1,1,1)
    
    for stroke in strokes:
        plt.plot(stoke[:,0],-stroke[:,1], color=numpy.random.rand(3,))
    
    canvas = plt.get_current_fig_manager().canvas
    canvas.draw()
    
    pil_sketch = PIL.Image.frombytes('RGB', canvas.get_width_height(), canvas.tostring_rgb())
    name = 'sketch_after_'+str(epoch)+'epochs'+name+'.jpg'
    pil_image.save(name,"JPEG")
    plt.close("all")

In [21]:
"""
Note on conditional generation:

This for after training, the paper says we can sample sketches for the model.
During the sampling process we generate parameters for both GMM and categorical
distributions at each time step, and sample an outcome S'i for that time step.

GMM ->
Categorical distributions -> calculated using outputs as logit values

We continue to sample until p3, so the end of sketch, is 1 or the number of iterations
has reached Nmax (length of longest sketch in training dataset). 
Sampling the output is not deterministic, it's a random sequence,
conditioned on the input latent vector z. We can control the level of randomness we'd like
our samples to have during the sampling process by using a temperature parameter.

- state_q_hat tends to state_q_hat / temperature
- prime_k tends to prime_k / temperature
- sigma_squared_x tends to sigma_squared_temperature_x
- sigma_squared_y tends to sigma_squared_temperature_y

- the softmax parameters can be scaled, of the categorical distribution and also sigma
parameters of the bivariate normal distribution by a temperature parameter temperature.
To control the level of randomness in our samples.
- temperature is usually between 0 and 1, so limit T -> 0, model becomes deterministic and
samples will consist of the most likely point in the probability density function.

- Unconditinal generation uses this temperature methodology to control randomness of the
distribution

Unconditional generation -> train model to generate sketches unconditionally where train
decoder RNN module
-> Initial hidden states and cell states of decoder RNN are initialized to zero, inputs xi
of decoder RNN at each step is only Si-1 or Si-1', don't need to concatenate latent vectorZ

"""

"\nNote on conditional generation:\n\nThis for after training, the paper says we can sample sketches for the model.\nDuring the sampling process we generate parameters for both GMM and categorical\ndistributions at each time step, and sample an outcome S'i for that time step.\n\nGMM ->\nCategorical distributions -> calculated using outputs as logit values\n\nWe continue to sample until p3, so the end of sketch, is 1 or the number of iterations\nhas reached Nmax (length of longest sketch in training dataset). \nSampling the output is not deterministic, it's a random sequence,\nconditioned on the input latent vector z. We can control the level of randomness we'd like\nour samples to have during the sampling process by using a temperature parameter.\n\n- state_q_hat tends to state_q_hat / temperature\n- prime_k tends to prime_k / temperature\n- sigma_squared_x tends to sigma_squared_temperature_x\n- sigma_squared_y tends to sigma_squared_temperature_y\n\n- the softmax parameters can be sc

In [None]:
"""
Note on training iteration:
Epoch - indicates the number of passes entire training dataset
the model completes. Dataset grouped into batches, if the batch size is the whole training dataset the no. of epochs is the no. of iterations.
datasize d, epochs e, number of iterations i, batchsize b, d*e = i*b
- data*epochs = number of iterations * batchsize

def sample_sketch_state(self):
    # Get GMM index for mixture distribution
    pi = self.pi.data[0,0,:].numpy()
    pi = tweak_temperature(pi)
    pi_idx = np.random.choice(hp.M, p=pi)
    
    # Get the pen state, or sketch state
    pi = self.q.data[0,0,:].numpy()
    q = tweak_temperature(q)
    q_idx = np.random.choice(3, p=q)
    
    # Get mixture parameters
    mu_x = self.mu_x.data[0,0,pi_idx]
    mu_y = self.mu_y.data[0,0,pi_idx]
    sigma_x = self.sigma_x.data[0,0,pi_idx]
    sigma_y = self.sigma_y.data[0,0,pi_idx]
    rho_xy = self.rho_xy.data[0,0,pi_idx]
    x,y = sample_bivariate_normal(mu_x,mu_y,sigma_x,sigma_y,rho_xy,greedy=False)
    next_state = torch.zeros(5)
    next_state[0] = x
    next_state[1] = y
    next_state[q_idx+2] = 1
    return Variable(next_state).view(1,1,-1),x,y,q_idx==1,q_idx==2

In [None]:
class Model():
    def __init__(self):
        if use_cuda:
            self.encoder = EncoderRNN().cuda()
            self.decoder = DecoderRNN().cuda()
        else:
            self.encoder = EncoderRNN()
            self.decoder = DecoderRNN()
        #self.encoder_optimizer = optim.Adam(self.encoder.parameters(), hp.lr)
        #self.decoder_optimizer = optim.Adam(self.decoder.parameters(), hp.lr)
        #self.eta_step = hp.eta_min
    
# take the parameters to form the normal distribution and sample from it
    def sample_bivariate_normal(mu_x,mu_y,sigma_x,sigma_y,rho_xy, greedy=False):
        if greedy:
            return mu_x, mu_y
        mean = [mu_x,mu_y]
        # https://mathworld.wolfram.com/BivariateNormalDistribution.html
    # to read more about bivariate normal and it's parameters
        sigma_x *= np.sqrt(hp.temperature)
        sigma_y *= np.sqrt(hp.temperature)
        covariance = [[sigma_x * sigma_x, rho_xy * sigma_x * sigma_y],[rho_xy * sigma_x * sigma_y, sigma_y * sigma_y]]
        x = np.random.multivariate_normal(mean, covariance, 1)
        return x[0][0], x[0][1]




In [None]:
# Will sketch the image out using different colours for the strokes
def draw_sketch(sequence, epoch, name='_output_'):
    strokes = np.split(sequence, np.where(sequence[:,2]>0)[0]+1)
    fig = plt.figure()
    axis_1 = fig.add_subplot(1,1,1)
    
    for stroke in strokes:
        plt.plot(stoke[:,0],-stroke[:,1], color=numpy.random.rand(3,))
    
    canvas = plt.get_current_fig_manager().canvas
    canvas.draw()
    
    pil_sketch = PIL.Image.frombytes('RGB', canvas.get_width_height(), canvas.tostring_rgb())
    name = 'sketch_after_'+str(epoch)+'epochs'+name+'.jpg'
    pil_image.save(name,"JPEG")
    plt.close("all")

In [None]:
    def reconstruction_loss(self, mask, dx, dy, p, epoch):
        # compute reconstruction loss (difference between the origin image and the reconstructed image):
        pdf = self.bivariate_normal_pdf(dx, dy)
        # loss for x and y
        LS = -torch.sum(mask*torch.log(1e-5+torch.sum(self.pi * pdf, 2)))\
            /float(Nmax*hp.batch_size)
        # loss for pen states
        LP = -torch.sum(p*torch.log(self.q))/float(Nmax*hp.batch_size)
        return LS+LP

    def kullback_leibler_loss(self):
        # compute kullbackleibler loss (difference between the distribution of latenet vector and N(0,1)):
        LKL = -0.5*torch.sum(1+self.sigma-self.mu**2-torch.exp(self.sigma))\
            /float(hp.Nz*hp.batch_size)
        if use_cuda:
            KL_min = Variable(torch.Tensor([hp.KL_min]).cuda()).detach()
        else:
            KL_min = Variable(torch.Tensor([hp.KL_min])).detach()
        return hp.wKL*self.eta_step * torch.max(LKL,KL_min)
    
    def train_temp(self, epoch):
        # compute losses:
        LKL = self.kullback_leibler_loss()
        LR = self.reconstruction_loss(mask,dx,dy,p,epoch)
        loss = LR + LKL
        # gradient step
        loss.backward()