In [1]:
import pandas as pd
import torch
import numpy as np
import os
import pickle
from custom_model.model import SimpleNet, SAttendedSimpleNet


USE_CUDA = torch.cuda.is_available()
device = torch.device("cuda" if USE_CUDA else "cpu")
print('Device: ', device)

def read_pickle(fname):
    with open(fname, 'rb') as fin:
        return pickle.load(fin)

df_train = pd.read_pickle('./data/processed/wikiqa_df_train.pickle')
df_test = pd.read_pickle('./data/processed/wikiqa_df_test.pickle')
df_test, df_val = np.split(df_test, 2)
voc = read_pickle('./data/processed/vocabulary.pickle')

Device:  cuda


In [2]:
# df_train = df_train.iloc[:100]

print('Train shape: {} \n\
Test shape: {} \n\
Val shape {}: '.format(df_train.shape, df_test.shape, df_val.shape))

net_simple = SimpleNet(voc['voc_len'], 16, 16)
net_att = SAttendedSimpleNet(voc['voc_len'], 128, 128, 64, 3)

Xq = np.array(df_train.Question_encoded.values.tolist())
Xa = np.array(df_train.Sentence_encoded.values.tolist())
t = np.array(df_train.Label.values.tolist())

Xq = torch.from_numpy(Xq)
Xa = torch.from_numpy(Xa)
t = torch.from_numpy(t)

batch_size = 256
epochs = 20

Train shape: (20347, 9) 
Test shape: (3058, 9) 
Val shape (3058, 9): 


In [3]:
Xq_val = np.array(df_val.Question_encoded.values.tolist())
Xa_val = np.array(df_val.Sentence_encoded.values.tolist())
t_val = np.array(df_val.Label.values.tolist())
val_data = [(torch.from_numpy(Xq_val), torch.from_numpy(Xa_val)), torch.from_numpy(t_val)]

In [4]:
optimizer = torch.optim.Adam
loss_func = torch.nn.CrossEntropyLoss(weight=torch.tensor([0.05, 1.]).to(device))

net_simple.fit(Xq, Xa, t, batch_size, epochs, loss_func, optimizer, device, val_data)
net_att.fit(Xq, Xa, t, batch_size, epochs, loss_func, optimizer, device, val_data)

Training...
Epoch: 0, loss: 0.72153. 5.9 [s] per epoch. Val loss: 0.69855
Epoch: 1, loss: 0.68594. 5.4 [s] per epoch. Val loss: 0.69301
Epoch: 2, loss: 0.65495. 5.2 [s] per epoch. Val loss: 0.73162
Epoch: 3, loss: 0.60546. 5.3 [s] per epoch. Val loss: 0.68717
Epoch: 4, loss: 0.55918. 5.2 [s] per epoch. Val loss: 0.67085
Epoch: 5, loss: 0.54345. 5.3 [s] per epoch. Val loss: 0.65700
Epoch: 6, loss: 0.47130. 5.1 [s] per epoch. Val loss: 0.65441
Epoch: 7, loss: 0.41212. 5.2 [s] per epoch. Val loss: 0.68178
Epoch: 8, loss: 0.48990. 5.3 [s] per epoch. Val loss: 0.66767
Epoch: 9, loss: 0.47798. 5.1 [s] per epoch. Val loss: 0.67524
Epoch: 10, loss: 0.38176. 5.6 [s] per epoch. Val loss: 0.66803
Epoch: 11, loss: 0.39876. 5.3 [s] per epoch. Val loss: 0.67515
Epoch: 12, loss: 0.37616. 5.3 [s] per epoch. Val loss: 0.67172
Epoch: 13, loss: 0.40056. 5.4 [s] per epoch. Val loss: 0.66503
Epoch: 14, loss: 0.40066. 5.5 [s] per epoch. Val loss: 0.65908
Epoch: 15, loss: 0.37227. 5.4 [s] per epoch. Val loss

In [5]:
import matplotlib.pyplot as plt
plt.figure(figsize=(17,8))
plt.subplot(1,2,1)
plt.plot(net_simple.steps, net_simple.losses)
plt.subplot(1,2,2)
plt.plot(net_att.steps, net_att.losses)
plt.show()

<matplotlib.figure.Figure at 0x7f77c4078ac8>

In [None]:
torch.save(net_simple, 'net_simple.torch')
torch.save(net_att, 'net_att.torch')

net_simple = torch.load('net_simple.torch')
net_att = torch.load('net_att.torch')

In [None]:
Xq_test = np.array(df_test.Question_encoded.values.tolist())
Xa_test = np.array(df_test.Sentence_encoded.values.tolist())
t_test = np.array(df_test.Label.values.tolist())

Xq_test = torch.from_numpy(Xq_test).cuda()
Xa_test = torch.from_numpy(Xa_test).cuda()
t_test = torch.from_numpy(t_test).cuda()

In [None]:
net_simple_pred = net_simple(Xq_test, Xa_test)

In [None]:
net_att_pred = net_att.to('cpu')(Xq_test.to('cpu'), Xa_test.to('cpu'))

In [None]:
y_pred_simple = np.array(net_simple_pred.tolist())
y_pred_simple

In [None]:
y_pred_att = np.array(net_att_pred.tolist())
y_pred_att

In [None]:
from sklearn.metrics import roc_auc_score

In [None]:
roc_auc_score(t_test.tolist(), np.array(y_pred_simple.tolist()).T[1])

In [None]:
roc_auc_score(t_test.tolist(), np.array(y_pred_att.tolist()).T[1])

In [None]:
net_att.l_probas[0].size()

In [None]:
df_test.loc[0, 'Question']

In [None]:
df_test.loc[0, 'Sentence']

In [None]:
sum([sum(net_att.l_probas[i][0]) for i in range(len(net_att.l_probas))])

In [None]:
string = df_test.loc[0, 'Question']
score = sum([sum(net_att.l_probas[i][0]) for i in range(len(net_att.l_probas))]).tolist()

In [None]:
df = pd.DataFrame.from_dict(dict(zip(string.split(), score)), orient='index').sort_values(0)
df

In [None]:
import seaborn as sns
from matplotlib import pyplot as plt

In [None]:
for i in range(3):
    plt.figure()
    a = net_att.l_probas[i][0].detach().numpy()
    sns.heatmap(a[:8, :8], xticklabels=string.split(), yticklabels=string.split())
    plt.plot()

In [None]:
net_att.l_probas[1][0].detach().numpy()[5]

In [None]:
net_att.l_probas[1].size()

In [None]:
sum(p.numel() for p in net_att.parameters())

In [None]:
sum(p.numel() for p in net_simple.parameters())

In [None]:
df_train.shape

In [None]:
250*60*4 * 0.1524 / 2.

In [None]:
99000/0.1524 * 15 / 2 / 60 / 60

In [None]:
import copy
def copy_layer(layer, n):
    layers = [layer for _ in range(n)]
    for i in range(len(layers)):
        layers[i].reset_parameters()
    return torch.nn.ModuleList(layers)

In [None]:
# def clones(module, N):
#     return nn.ModuleList([copy.deepcopy(module) for _ in range(N)])

In [None]:
linear_query = copy_layer(torch.nn.Linear(10, 10), 3)

In [None]:
linear_query[0].weight


In [None]:
linear_query[1].weight

In [None]:
a = [torch.nn.Linear(10,10) for _ in range(3)]

In [None]:
a[0].weight

In [None]:
a[1].weight

In [None]:
z = []
for i in range(3):
    z.append(torch.nn.Linear(10,10))

In [None]:
z[0].weight

In [None]:
z[1].weight

In [None]:
a = z[1]
a.reset_parameters()
a.weight

In [None]:
4500./7

In [None]:
110 * 650 / 60 / 60