In [2]:
!pip install torch
import torch
import torch.nn as nn
import torch.nn.functional as F
import numpy as np
import matplotlib.pyplot as plt
import PIL

from torch.autograd import Variable

# check the system we're on, gpu
use_cuda = torch.cuda.is_available()

np.load.__defaults__=(None, True, True, 'ASCII')



In [3]:
class HyperParameters():
    def __init__(self):
        self.data = '../Datasets/sketchrnn_chair.npz'
        self.encoder_hidden_size = 64
        self.decoder_hidden_size = 128
        # latent vector
        self.Nz = 32
        
        #return
        self.M = 20
        self.dropout = 0.9
        self.batch_size = 100
        self.eta_min = 0.01
        self.R = 0.99995
        self.KL_min = 0.2
        self.wKL = 0.5
        self.lr = 0.001
        self.lr_decay = 0.9999
        self.min_lr = 0.00001
        self.grad_clip = 1.
        self.temperature = 0.4
        self.max_seq_length = 200

hp = HyperParameters()

In [4]:
class DataLoader():
    def __init__(self, file):
        self.file = file
        
    def load_npz(self):
        data = np.load(file=self.file, encoding='latin1', allow_pickle=True)
        return data

dataLoader = DataLoader(hp.data)
data = dataLoader.load_npz()

In [5]:
class DataHandler:
    def __init__(self, data):
        self.data = data
    
    def padding(self, strokedata, max_len):
        s = torch.from_numpy(strokedata)
        s_len = s.shape[0]
        result = torch.zeros((max_len, 5))
        result[0:s_len, 0:2] = s[:, 0:2]
        result[0:s_len, 3] = s[:, 2]
        result[0:s_len, 2] = 1 - result[0:s_len, 3]    # 1 to 0, 0 to 1
        if s_len < max_len:
            result[(s_len - 1):, 4] = 1
        return result
        
    def handle(self):
        max_len = 0    # max length of a sketch - max number of stroke vectors in a sketch
        for datatype in self.data:
            for strokedata in self.data[datatype]:
                if strokedata.shape[0] > max_len:
                    max_len = strokedata.shape[0]
        # lengths of data - the number of sketchs in data
        train_len = self.data['train'].shape[0]
        test_len = self.data['test'].shape[0]
        valid_len = self.data['valid'].shape[0]
        
        data_train = torch.zeros((train_len, max_len, 5), dtype=float)
        data_test = torch.zeros((test_len, max_len, 5), dtype=float)
        data_valid = torch.zeros((valid_len, max_len, 5), dtype=float)
        
        i = j = k = 0
        for datatype in self.data:
            for strokedata in self.data[datatype]:
                if datatype == 'train':
                    data_train[i] = self.padding(strokedata, max_len)
                    i += 1
                if datatype == 'test':
                    data_test[j] = self.padding(strokedata, max_len)
                    j += 1
                if datatype == 'valid':
                    data_valid[k] = self.padding(strokedata, max_len)
                    k += 1 

        return data_train, data_test, data_valid
    
        
datahandler = DataHandler(data)
data_train, data_test, data_valid = datahandler.handle()  

In [6]:
# class Encoder(nn.Module):
#     def __init__(self):
#         super().__init__()
#         # bidirectional lstm:
#         self.lstm = nn.LSTM(5, hp.encoder_hidden_size, bidirectional=True)
#         # create mu and sigma from lstm's last output:
#         self.fc_mu = nn.Linear(2*hp.encoder_hidden_size, hp.Nz)
#         self.fc_sigma = nn.Linear(2*hp.encoder_hidden_size, hp.Nz)
#         # active dropout:
#         # self.train()

#     def forward(self, inputs, batch_size, hidden_cell=None):
#         if hidden_cell is None:
#             # then must init with zeros
#             if use_cuda:
#                 hidden = torch.zeros(2, batch_size, hp.encoder_hidden_size).cuda()
#                 cell = torch.zeros(2, batch_size, hp.encoder_hidden_size).cuda()
#             else:
#                 hidden = torch.zeros(2, batch_size, hp.encoder_hidden_size)
#                 cell = torch.zeros(2, batch_size, hp.encoder_hidden_size)
#             hidden_cell = (hidden, cell)
#         _, (hidden,cell) = self.lstm(inputs.float(), hidden_cell)
#         # hidden is (2, batch_size, hidden_size), we want (batch_size, 2*hidden_size):
#         hidden_forward, hidden_backward = torch.split(hidden,1,0)
#         hidden_cat = torch.cat([hidden_forward.squeeze(0), hidden_backward.squeeze(0)],1)
#         # mu and sigma:
#         mu = self.fc_mu(hidden_cat)
#         sigma_hat = self.fc_sigma(hidden_cat)
#         sigma = torch.exp(sigma_hat/2.)
#         # N ~ N(0,1)
#         z_size = mu.size()
                                   
#         if use_cuda:
#             N = torch.normal(torch.zeros(z_size),torch.ones(z_size)).cuda()
#         else:
#             N = torch.normal(torch.zeros(z_size),torch.ones(z_size))
#         z = mu + sigma*N
#         # mu and sigma_hat are needed for LKL loss
#         return z, mu, sigma_hat
    
# encoder = Encoder()
# encoder.train()
# z, _, _ = encoder(data_train, hp.batch_size)

RuntimeError: Expected hidden[0] size (2, 66, 64), got [2, 100, 64]

In [15]:
# a = torch.rand((3,1,5))
b = torch.tensor([[1,2], 
                  [3,4]])
c = torch.tensor([[5,6],
                  [7,8]])
d = torch.stack([b.unsqueeze(0),c.unsqueeze(0)], 0)
e = (b.unsqueeze(0).contiguous(),c.unsqueeze(0).contiguous())
print(d.is_contiguous())
print(d)
print(e)
# print(a.squeeze(0).shape)

True
tensor([[[[1, 2],
          [3, 4]]],


        [[[5, 6],
          [7, 8]]]])
(tensor([[[1, 2],
         [3, 4]]]), tensor([[[5, 6],
         [7, 8]]]))


In [None]:
class Decoder(nn.Module):
    def __init__(self):
        super().__init__()
        self.lstm = nn.LSTM(hp.Nz + 5, hp.decoder_hidden_size)
        self.fc_hc = nn.Linear(hp.Nz, 2*hp.decoder_hidden_size)
        
    def forward(self, inputs, z, hidden_cell=None):
        if hidden_cell is None:       
            hidden, cell = torch.split(F.tanh(self.fc_hc(z)), hp.decoder_hidden_size, 1)
            hidden_cell = (hidden.unsqueeze(0), cell.unsqueeze(0))
            

In [None]:
# Forms part of the Model class

"""
This for after training, the paper says we can sample sketches for the model.
During the sampling process we generate parameters for both GMM and categorical
distributions at each time step, and sample an outcome S'i for that time step.

GMM ->
Categorical distributions -> calculated using outputs as logit values

We continue to sample until p3, so the end of sketch, is 1 or the number of iterations
has reached Nmax (length of longest sketch in training dataset). 
Sampling the output is not deterministic, it's a random sequence,
conditioned on the input latent vector z. We can control the level of randomness we'd like
our samples to have during the sampling process by using a temperature parameter.

- state_q_hat tends to state_q_hat / temperature
- prime_k tends to prime_k / temperature
- sigma_squared_x tends to sigma_squared_temperature_x
- sigma_squared_y tends to sigma_squared_temperature_y

- the softmax parameters can be scaled, of the categorical distribution and also sigma
parameters of the bivariate normal distribution by a temperature parameter temperature.
To control the level of randomness in our samples.
- temperature is usually between 0 and 1, so limit T -> 0, model becomes deterministic and
samples will consist of the most likely point in the probability density function.

- Unconditinal generation uses this temperature methodology to control randomness of the
distribution

Unconditional generation -> train model to generate sketches unconditionally where train
decoder RNN module
-> Initial hidden states and cell states of decoder RNN are initialized to zero, inputs xi
of decoder RNN at each step is only Si-1 or Si-1', don't need to concatenate latent vectorZ

"""

def sample_sketch_state():

def tweak_temperature(pi_pdf):
    # sigma x = exp(sigma_hat), sigma_hat = log(Sigma x)
    pi_pdf = np.log(pi_pdf)/hp.temperature
    
def sample_bivariate_normal():
    
def make_image():