In [1]:
# Imports
import torch
import torch.nn as nn
import torch.nn.functional as F
import numpy as np
from sklearn.model_selection import train_test_split
import time
from math import log10, floor
import os

from relational_rnn_models import RelationalMemoryGenerator
from discriminator import RelGANDiscriminator

In [2]:
# Define variables
n_vars = 3
var_ids = list(range(n_vars))
var_names = ['var' + str(i) for i in var_ids]
var_weights = [0.1, 0.6, 0.3] # variable distribution of mock data
n_time_steps = 6
n_individuals = 1000

noise_length = 2

In [3]:
# Helper function(s)

# round a number to n significant digits
def round_to_n(x, n = 2):
    return round(x, -int(floor(log10(abs(x)))) + (n - 1)) if x != 0 else 0

# visualize the output of the generator
def visualize_output(generator, z, n = 2):
    p = generator(z).view(n_time_steps, n_vars)
    p.shape
    for t in range(p.shape[0]):
        tmp = []
        for f in range(p.shape[1]):
            tmp.append(round_to_n(p[t,f], n))
        print(tmp)
        
# Change a sequence of tokens into a one-hot represenation
def tokens_to_one_hot(tokens, vocab_size):
    if len(tokens.shape) == 1:
        tokens = tokens.view(1, -1)
    batch_size = tokens.shape[0]
    sequence_length = tokens.shape[1]
    
    tokens_onehot = torch.FloatTensor(batch_size, sequence_length, vocab_size)

    tokens_onehot.zero_()
    tokens_onehot.scatter_(2, tokens.view(batch_size, sequence_length, -1), 1)
    
    return tokens_onehot

#y = data[:5, :]
#print(y)
#print(tokens_to_one_hot(y, n_vars))

In [4]:
# Generate mock data

events = []

start_time = time.time()

for indv in range(n_individuals):
    tmp = []
    for t in range(n_time_steps):
        if t > 0 and tmp[t - 1] == 'var2':
            weights = [0.7, 0.2, 0.1]
            var = np.random.choice(var_names, p=weights)
        else:
            var = np.random.choice(var_names, p=var_weights)
        tmp.append(var)
    events.append(tmp)
        
print('time taken:', round_to_n(time.time() - start_time), 'seconds')

for i in range(10):
    print(events[i])

time taken: 0.12 seconds
['var2', 'var0', 'var1', 'var2', 'var0', 'var1']
['var1', 'var2', 'var1', 'var1', 'var2', 'var0']
['var0', 'var2', 'var0', 'var1', 'var1', 'var2']
['var1', 'var2', 'var0', 'var2', 'var0', 'var1']
['var1', 'var1', 'var1', 'var2', 'var0', 'var1']
['var1', 'var1', 'var2', 'var0', 'var0', 'var1']
['var2', 'var0', 'var0', 'var1', 'var1', 'var0']
['var1', 'var2', 'var0', 'var2', 'var0', 'var1']
['var1', 'var1', 'var1', 'var1', 'var2', 'var0']
['var2', 'var2', 'var0', 'var2', 'var0', 'var1']


In [5]:
vars_to_indices = dict([(v, i) for i, v in enumerate(var_names)])
print(vars_to_indices)
data = torch.tensor([[vars_to_indices[e] for e in event] for event in events])
print(data[:10])

{'var0': 0, 'var1': 1, 'var2': 2}
tensor([[2, 0, 1, 2, 0, 1],
        [1, 2, 1, 1, 2, 0],
        [0, 2, 0, 1, 1, 2],
        [1, 2, 0, 2, 0, 1],
        [1, 1, 1, 2, 0, 1],
        [1, 1, 2, 0, 0, 1],
        [2, 0, 0, 1, 1, 0],
        [1, 2, 0, 2, 0, 1],
        [1, 1, 1, 1, 2, 0],
        [2, 2, 0, 2, 0, 1]])


In [6]:
# Test generator output

mem_slots = 4
head_size = 2
embed_size = 2
num_tokens = n_vars
temperature = 1
num_heads = 1

G = RelationalMemoryGenerator(mem_slots, head_size, embed_size, num_tokens, temperature, num_heads)

start_token = torch.tensor([[0]])
sequence_length = n_time_steps
memory = G.initial_state(batch_size = 1)
print(memory)
logits, tokens, _, memory = G(start_token, memory, sequence_length, None)
print(logits)
print(tokens)
print(memory)


tensor([[[1., 0.],
         [0., 1.],
         [0., 0.],
         [0., 0.]]])
tensor([[[1., 0., 0.],
         [0., 0., 1.],
         [0., 0., 1.],
         [1., 0., 0.],
         [1., 0., 0.],
         [0., 0., 1.]]], grad_fn=<CatBackward>)
tensor([[[0],
         [2],
         [2],
         [0],
         [0],
         [2]]])
tensor([[[-0.1105,  1.4202],
         [-0.7943,  2.1630],
         [-1.0249,  0.3890],
         [-1.0249,  0.3890]]], grad_fn=<AddBackward0>)


In [7]:
# Define generator evaluation function

def eval_generator(G, data, vocab_size):
    _, data_fake, _, _ = G(data[:, :1], G.initial_state(batch_size = data.shape[0]), data.shape[1] - 1)
    data_fake = data_fake.view(data.shape[0], -1)
    data_fake = torch.cat([data[:, :1], data_fake], dim = 1)
    word_means = torch.stack([torch.mean((data == i).type(torch.FloatTensor), dim = 0) for i in range(vocab_size)])
    word_means_fake = torch.stack([torch.mean((data_fake == i).type(torch.FloatTensor), dim = 0) for i in range(vocab_size)])
    
    scores = torch.sqrt(torch.sum((word_means - word_means_fake) ** 2, dim = 1))
    
    return scores

#print(data[:50, :])
scores = eval_generator(G, data[:500, :], n_vars)
print(scores)
print(scores / torch.tensor(var_weights))




tensor([0.1904, 0.3508, 0.1858])
tensor([1.9037, 0.5846, 0.6192])


In [8]:
test = torch.tensor([[2]])
print(test.shape)
embed = nn.Embedding(10, 4)
test_embed = embed(test)
print(test_embed)
print(test_embed.shape)
test_inp = test_embed[:, 0]
print(test_inp.shape)
test_inp = test_inp.view(test_inp.shape[0], -1)
print(test_inp.shape)
test_inp.unsqueeze(dim=1).shape



torch.Size([1, 1])
tensor([[[-0.8607,  0.0029,  0.7291,  0.8888]]], grad_fn=<EmbeddingBackward>)
torch.Size([1, 1, 4])
torch.Size([1, 4])
torch.Size([1, 4])


torch.Size([1, 1, 4])

In [9]:
t = torch.rand(2,3,4)
t.view(*t.shape, 1).shape

torch.Size([2, 3, 4, 1])

In [10]:
n_embeddings = 2
vocab_size = n_vars
embed_size = 2

D = RelGANDiscriminator(n_embeddings, vocab_size, embed_size)

In [12]:
inp = logits
print(inp)
print(D(inp))

tensor([[[1., 0., 0.],
         [0., 0., 1.],
         [0., 0., 1.],
         [1., 0., 0.],
         [1., 0., 0.],
         [0., 0., 1.]]], grad_fn=<CatBackward>)
tensor([[[0],
         [2],
         [2],
         [0],
         [0],
         [2]]])
