In [1]:
# Imports
import torch
import torch.nn as nn
import torch.nn.functional as F
from torch.autograd import Variable
import numpy as np
from sklearn.model_selection import train_test_split
import time
from math import log10, floor
import os

from relational_rnn_models import RelationalMemoryGenerator
from discriminator import RelGANDiscriminator

In [2]:
# Define variables
n_vars = 3
var_ids = list(range(n_vars))
var_names = ['var' + str(i) for i in var_ids]
var_weights = [0.1, 0.6, 0.3] # variable distribution of mock data
n_time_steps = 6
n_individuals = 1000

noise_length = 2

In [3]:
# Helper function(s)

# round a number to n significant digits
def round_to_n(x, n = 2):
    return round(x, -int(floor(log10(abs(x)))) + (n - 1)) if x != 0 else 0

# visualize the output of the generator
def visualize_output(generator, z, n = 2):
    p = generator(z).view(n_time_steps, n_vars)
    p.shape
    for t in range(p.shape[0]):
        tmp = []
        for f in range(p.shape[1]):
            tmp.append(round_to_n(p[t,f], n))
        print(tmp)

#y = data[:5, :]
#print(y)
#print(F.one_hot(y, n_vars))

In [4]:
# Generate mock data

events = []

start_time = time.time()

alternatve_weights = [0.7, 0.2, 0.1]

for indv in range(n_individuals):
    tmp = []
    for t in range(n_time_steps):
        if t > 0 and tmp[t - 1] == 'var2':
            var = np.random.choice(var_names, p=alternatve_weights)
        else:
            var = np.random.choice(var_names, p=var_weights)
        tmp.append(var)
    events.append(tmp)
        
print('time taken:', round_to_n(time.time() - start_time), 'seconds')

for i in range(10):
    print(events[i])

time taken: 0.12 seconds
['var1', 'var2', 'var2', 'var2', 'var1', 'var2']
['var1', 'var2', 'var0', 'var2', 'var0', 'var0']
['var1', 'var2', 'var0', 'var0', 'var1', 'var2']
['var0', 'var1', 'var2', 'var0', 'var1', 'var1']
['var2', 'var0', 'var1', 'var1', 'var1', 'var1']
['var2', 'var0', 'var1', 'var1', 'var0', 'var0']
['var1', 'var2', 'var0', 'var2', 'var0', 'var2']
['var0', 'var2', 'var0', 'var0', 'var2', 'var2']
['var1', 'var2', 'var0', 'var1', 'var0', 'var1']
['var2', 'var0', 'var1', 'var0', 'var1', 'var2']


In [5]:
vars_to_indices = dict([(v, i) for i, v in enumerate(var_names)])
print(vars_to_indices)
data = torch.tensor([[vars_to_indices[e] for e in event] for event in events])
print(data[:10])

{'var0': 0, 'var1': 1, 'var2': 2}
tensor([[1, 2, 2, 2, 1, 2],
        [1, 2, 0, 2, 0, 0],
        [1, 2, 0, 0, 1, 2],
        [0, 1, 2, 0, 1, 1],
        [2, 0, 1, 1, 1, 1],
        [2, 0, 1, 1, 0, 0],
        [1, 2, 0, 2, 0, 2],
        [0, 2, 0, 0, 2, 2],
        [1, 2, 0, 1, 0, 1],
        [2, 0, 1, 0, 1, 2]])


In [6]:
# Test generator output

mem_slots = 4
head_size = 2
embed_size = 2
num_tokens = n_vars
temperature = 1
num_heads = 2

G = RelationalMemoryGenerator(mem_slots, head_size, embed_size, num_tokens, temperature, num_heads)

start_token = torch.tensor([[0]])
sequence_length = n_time_steps
memory = G.initial_state(batch_size = 1)
print(memory)
logits, tokens, _, memory = G(start_token, memory, sequence_length, None)
print(logits)
print(tokens)
print(memory)


tensor([[[1., 0., 0., 0.],
         [0., 1., 0., 0.],
         [0., 0., 1., 0.],
         [0., 0., 0., 1.]]])
tensor([[[1., 0., 0.],
         [0., 0., 1.],
         [0., 1., 0.],
         [0., 1., 0.],
         [0., 0., 1.],
         [0., 0., 1.]]], grad_fn=<CatBackward>)
tensor([[0, 2, 1, 1, 2, 2]])
tensor([[[-0.5250,  0.2673, -0.5040,  1.1574],
         [-1.1904,  0.4248, -0.5250,  1.1045],
         [-1.1649,  0.2547, -0.2111,  0.9070],
         [-1.1335,  0.2477, -0.4634,  1.3450]]], grad_fn=<AddBackward0>)


In [7]:
# Define generator evaluation functions

# TODO: make a test function which tests the actual word frequencies
def eval_generator(G, data, vocab_size):
    _, data_fake, _, _ = G(data[:, :1], G.initial_state(batch_size = data.shape[0]), data.shape[1])
    word_means = torch.stack([torch.mean((data == i).type(torch.FloatTensor), dim = 0) for i in range(vocab_size)])
    word_means_fake = torch.stack([torch.mean((data_fake == i).type(torch.FloatTensor), dim = 0) for i in range(vocab_size)])
    
    scores = torch.sum(torch.abs(word_means - word_means_fake), dim = 1)
    
    return scores # for each word; the lower the better

def count_special_cases(data, vocab_size):
    counts = torch.zeros(vocab_size)
    for i in range(data.shape[0]):
        for t in range(data.shape[1] - 1):
            if data[i, t] == 2:
                counts[data[i, t + 1]] += 1
                
    return counts

def test_special_case(G, data, vocab_size):
    counts_real = count_special_cases(data, vocab_size)
    freq_real = counts_real / torch.sum(counts_real)
    
    _, data_fake, _, _ = G(data[:, :1], G.initial_state(batch_size = data.shape[0]), data.shape[1])
    
    counts_fake = count_special_cases(data_fake, vocab_size)
    freq_fake = counts_fake / torch.sum(counts_fake)
    
    scores = torch.abs(freq_real - freq_fake)
    
    return scores

#print(data[:5, :])
scores = eval_generator(G, data, n_vars)
print(scores)
print(scores / torch.tensor(var_weights)) # adjusted to the words' frequencies

scores = test_special_case(G, data, n_vars)
print(scores)



tensor([0.5960, 0.6060, 0.0860])
tensor([5.9600, 1.0100, 0.2867])
tensor([0.3799, 0.1848, 0.1950])


In [8]:
# Test Discriminator output

n_embeddings = 2
vocab_size = n_vars
embed_size = 2
sequence_length = n_time_steps
out_channels = 5 
filter_sizes = [2, 3] # values can be at most the sequence_length

D = RelGANDiscriminator(n_embeddings, vocab_size, embed_size, sequence_length, out_channels, filter_sizes)

inp = logits
print(inp)
print(D(inp, False))
print(D(inp))


tensor([[[1., 0., 0.],
         [0., 0., 1.],
         [0., 1., 0.],
         [0., 1., 0.],
         [0., 0., 1.],
         [0., 0., 1.]]], grad_fn=<CatBackward>)
tensor([[0.4774, 0.4754]], grad_fn=<SqueezeBackward1>)
tensor([0.4764], grad_fn=<MeanBackward2>)


In [9]:
# Define the generator pre-train function

def pretrain_generator(G, train_data, vocab_size, n_epochs, lr, print_step = 10):
    loss_function = nn.BCELoss()
    optimizer = torch.optim.Adam(G.parameters(), lr=lr)
    
    train_data_one_hot = F.one_hot(train_data, vocab_size).type(torch.FloatTensor)
    start_token = train_data[:, :1]
    sequence_length = train_data.shape[1]
    
    for e in range(n_epochs):
        optimizer.zero_grad()
        
        memory = G.initial_state(batch_size = train_data.shape[0])
        
        logits, _, _, _ = G(start_token, memory, sequence_length)
        
        loss = loss_function(logits, train_data_one_hot)
        
        loss.backward()
        optimizer.step()
        
        if e % print_step == 0:
            print(
                "[Epoch %d/%d] [G loss: %f]"
                % (e, n_epochs, loss.item())
            )

In [10]:
mem_slots = 4
head_size = 2
embed_size = 2
num_tokens = n_vars
temperature = 1
num_heads = 2

G = RelationalMemoryGenerator(mem_slots, head_size, embed_size, num_tokens, temperature, num_heads)

scores = eval_generator(G, data, n_vars)
print(scores, scores / torch.tensor(var_weights))
pretrain_generator(G, data, n_vars, 10, 0.001, 2)
scores = eval_generator(G, data, n_vars)
print(scores, scores / torch.tensor(var_weights))

tensor([0.4860, 0.1620, 0.5720]) tensor([4.8600, 0.2700, 1.9067])
[Epoch 0/10] [G loss: 10.051292]
[Epoch 2/10] [G loss: 9.741279]
[Epoch 4/10] [G loss: 9.689098]
[Epoch 6/10] [G loss: 9.649196]
[Epoch 8/10] [G loss: 9.732070]
tensor([0.3790, 0.2710, 0.1120]) tensor([3.7900, 0.4517, 0.3733])


In [11]:
# Define the training function

def train(G, D, train_data, vocab_size, n_epochs, lr, print_step = 10):
    print('pretraining generator...')
    pretrain_generator(G, train_data, vocab_size, n_epochs // 10, lr, n_epochs // 10)
    print('pretraining complete')
    
    adversarial_loss = torch.nn.BCELoss()
    
    optimizer_G = torch.optim.Adam(G.parameters(), lr=lr)
    optimizer_D = torch.optim.Adam(D.parameters(), lr=lr)
    
    train_data_one_hot = F.one_hot(train_data, vocab_size).type(torch.FloatTensor)

    start_token = train_data[:, :1]
    sequence_length = train_data.shape[1]
    
    for e in range(n_epochs):
        # Adversarial ground truths
        valid = Variable(torch.FloatTensor(train_data.shape[0], 1).fill_(1.0), requires_grad=False)
        fake = Variable(torch.FloatTensor(train_data.shape[0], 1).fill_(0.0), requires_grad=False)

        optimizer_G.zero_grad()

        # Generate a batch of images
        memory = G.initial_state(batch_size = train_data.shape[0])
        fake_one_hot, _, _, _ = G(start_token, memory, sequence_length)

        # Loss measures generator's ability to fool the discriminator
        g_loss = adversarial_loss(D(fake_one_hot), valid)

        g_loss.backward()
        optimizer_G.step()
        
        optimizer_D.zero_grad()

        # Measure discriminator's ability to classify real from generated samples
        real_loss = adversarial_loss(D(train_data_one_hot), valid)
        fake_loss = adversarial_loss(D(fake_one_hot.detach()), fake)
        d_loss = (real_loss + fake_loss) / 2

        d_loss.backward()
        optimizer_D.step()

        if e % print_step == 0:
            print(
                "[Epoch %d/%d] [D loss: %f] [G loss: %f]"
                % (e, n_epochs, d_loss.item(), g_loss.item())
            )


In [12]:
# Generator params
mem_slots = 4
head_size = 2
embed_size = 2
num_tokens = n_vars
temperature = 1
num_heads = 2

G = RelationalMemoryGenerator(mem_slots, head_size, embed_size, num_tokens, temperature, num_heads)

# Discriminator params
n_embeddings = 2
vocab_size = n_vars
embed_size = 2
sequence_length = n_time_steps
out_channels = 5 
filter_sizes = [2, 3] # values can be at most the sequence_length

D = RelGANDiscriminator(n_embeddings, vocab_size, embed_size, sequence_length, out_channels, filter_sizes)

scores = eval_generator(G, data, n_vars)
scores_special_case = test_special_case(G, data, n_vars)
print('score before training:', scores, scores / torch.tensor(var_weights), scores_special_case)

# Train the GAN
train(G, D, data, n_vars, 1000, 0.001, 100)

scores = eval_generator(G, data, n_vars)
scores_special_case = test_special_case(G, data, n_vars)
print('score after training:', scores, scores / torch.tensor(var_weights), scores_special_case)

score before training: tensor([0.0740, 0.3160, 0.2420]) tensor([0.7400, 0.5267, 0.8067]) tensor([0.4358, 0.2935, 0.1424])
pretraining generator...
[Epoch 0/100] [G loss: 9.477306]
pretraining complete
[Epoch 0/1000] [D loss: 0.695251] [G loss: 0.754841]


  return F.binary_cross_entropy(input, target, weight=self.weight, reduction=self.reduction)


[Epoch 100/1000] [D loss: 0.692670] [G loss: 0.647893]
[Epoch 200/1000] [D loss: 0.675506] [G loss: 0.765716]
[Epoch 300/1000] [D loss: 0.673358] [G loss: 0.745164]
[Epoch 400/1000] [D loss: 0.632296] [G loss: 0.761753]
[Epoch 500/1000] [D loss: 0.631214] [G loss: 0.781789]
[Epoch 600/1000] [D loss: 0.631901] [G loss: 0.797003]
[Epoch 700/1000] [D loss: 0.643933] [G loss: 0.803101]
[Epoch 800/1000] [D loss: 0.668263] [G loss: 0.789163]
[Epoch 900/1000] [D loss: 0.700564] [G loss: 0.735136]
score after training: tensor([0.1790, 0.3910, 0.4560]) tensor([1.7900, 0.6517, 1.5200]) tensor([0.1594, 0.0934, 0.0660])
