In [1]:
import pandas as pd
import torch
import numpy as np
import os
import pickle
import matplotlib.pyplot as plt
from custom_model.model import SimpleNet, SAttendedSimpleNet, SAttendedNet, CrossAttentionNet
import seaborn as sns
from sklearn.metrics import roc_auc_score


USE_CUDA = torch.cuda.is_available()
device = torch.device("cuda" if USE_CUDA else "cpu")
print('Device: ', device)

def read_pickle(fname):
    with open(fname, 'rb') as fin:
        return pickle.load(fin)

df_train = pd.read_pickle('./data/processed/wikiqa_df_train.pickle')
df_test = pd.read_pickle('./data/processed/wikiqa_df_test.pickle')
df_test, df_val = np.split(df_test.sample(frac=1., random_state=42), 2)
emb_weights = np.load('./data/processed/index2vector.npy')

vocab_size = emb_weights.shape[0]
embed_dim = emb_weights.shape[1]

# df_train = df_train.iloc[:100]

print('Train shape: {} \n\
Test shape: {} \n\
Val shape {}: '.format(df_train.shape, df_test.shape, df_val.shape))

net_simple = SimpleNet(vocab_size, embed_dim, 64, emb_weights)
# net_att = SAttendedSimpleNet(voc['voc_len'], 128, 128, 64, 3)
net_att = SAttendedNet(vocab_size, embed_dim, 64, 32, 1, 22, 287, emb_weights)
net_crossover = CrossAttentionNet(vocab_size, embed_dim, 64, 32, 1, 22, 287, emb_weights)

Xq = np.array(df_train.Question_encoded.values.tolist())
Xa = np.array(df_train.Sentence_encoded.values.tolist())
t = np.array(df_train.Label.values.tolist())

Xq = torch.from_numpy(Xq)
Xa = torch.from_numpy(Xa)
t = torch.from_numpy(t)

batch_size = 256
epochs = 30

Device:  cuda
Train shape: (20347, 9) 
Test shape: (3058, 9) 
Val shape (3058, 9): 


In [2]:
Xq_val = np.array(df_val.Question_encoded.values.tolist())
Xa_val = np.array(df_val.Sentence_encoded.values.tolist())
t_val = np.array(df_val.Label.values.tolist())
val_data = [(torch.from_numpy(Xq_val), torch.from_numpy(Xa_val)), torch.from_numpy(t_val)]

In [3]:
optimizer = torch.optim.Adam
loss_func = torch.nn.CrossEntropyLoss(weight=torch.tensor([0.05, 0.95]).to(device))

# net_crossover.fit(Xq, Xa, t, batch_size, epochs, loss_func, optimizer, device, 90., val_data)
net_simple.fit(Xq, Xa, t, batch_size, epochs, loss_func, optimizer, device, None, val_data)
net_att.fit(Xq, Xa, t, batch_size, epochs, loss_func, optimizer, device, 90., val_data)

Training...
Epoch: 0, loss: 0.69408. 1.7 [s] per epoch. Val loss: 0.69094
Epoch: 1, loss: 0.68852. 1.3 [s] per epoch. Val loss: 0.68543
Epoch: 2, loss: 0.68257. 1.2 [s] per epoch. Val loss: 0.68406
Epoch: 3, loss: 0.68027. 1.3 [s] per epoch. Val loss: 0.69411
Epoch: 4, loss: 0.68101. 1.1 [s] per epoch. Val loss: 0.71053
Epoch: 5, loss: 0.67971. 1.2 [s] per epoch. Val loss: 0.69788
Epoch: 6, loss: 0.67778. 1.2 [s] per epoch. Val loss: 0.69499
Epoch: 7, loss: 0.67528. 1.2 [s] per epoch. Val loss: 0.69671
Epoch: 8, loss: 0.67343. 1.2 [s] per epoch. Val loss: 0.69802
Epoch: 9, loss: 0.67170. 1.1 [s] per epoch. Val loss: 0.70017
Epoch: 10, loss: 0.66893. 1.2 [s] per epoch. Val loss: 0.70496
Epoch: 11, loss: 0.66667. 1.2 [s] per epoch. Val loss: 0.71250
Epoch: 12, loss: 0.66427. 1.2 [s] per epoch. Val loss: 0.73847
Epoch: 13, loss: 0.66852. 1.2 [s] per epoch. Val loss: 0.70482
Epoch: 14, loss: 0.66748. 1.1 [s] per epoch. Val loss: 0.71612
Epoch: 15, loss: 0.65892. 1.2 [s] per epoch. Val loss

  _ = nn.utils.clip_grad_norm(self.parameters(), clip)


Epoch: 0, loss: 0.69328. 2.5 [s] per epoch. Val loss: 0.69287
Epoch: 1, loss: 0.67387. 2.3 [s] per epoch. Val loss: 0.68654
Epoch: 2, loss: 0.63553. 2.5 [s] per epoch. Val loss: 0.67835
Epoch: 3, loss: 0.63525. 2.5 [s] per epoch. Val loss: 0.68375
Epoch: 4, loss: 0.62463. 2.6 [s] per epoch. Val loss: 0.68132
Epoch: 5, loss: 0.61742. 2.5 [s] per epoch. Val loss: 0.67970
Epoch: 6, loss: 0.61202. 2.5 [s] per epoch. Val loss: 0.67865
Epoch: 7, loss: 0.60492. 2.4 [s] per epoch. Val loss: 0.67642
Epoch: 8, loss: 0.59527. 2.4 [s] per epoch. Val loss: 0.67066
Epoch: 9, loss: 0.58445. 2.6 [s] per epoch. Val loss: 0.66582
Epoch: 10, loss: 0.58251. 2.4 [s] per epoch. Val loss: 0.66804
Epoch: 11, loss: 0.57160. 2.5 [s] per epoch. Val loss: 0.66353
Epoch: 12, loss: 0.56954. 2.5 [s] per epoch. Val loss: 0.66801
Epoch: 13, loss: 0.56216. 2.5 [s] per epoch. Val loss: 0.67381
Epoch: 14, loss: 0.56479. 2.3 [s] per epoch. Val loss: 0.67360
Epoch: 15, loss: 0.56759. 2.3 [s] per epoch. Val loss: 0.67434
Ep

In [4]:
Xq_test = np.array(df_test.Question_encoded.values.tolist())
Xa_test = np.array(df_test.Sentence_encoded.values.tolist())
t_test = np.array(df_test.Label.values.tolist())

Xq_test = torch.from_numpy(Xq_test)
Xa_test = torch.from_numpy(Xa_test)
t_test = torch.from_numpy(t_test)

net_simple.eval()
net_att.eval()

y_simple = net_simple.to('cpu')(Xq_test, Xa_test)
y_attent = net_att.to('cpu')(Xq_test, Xa_test)

In [5]:
print('ROC_AUC simple_net: ', roc_auc_score(t_test, y_simple.detach().numpy().T[1]))
print('ROC_AUC attent_net: ', roc_auc_score(t_test, y_attent.detach().numpy().T[1]))

ROC_AUC simple_net:  0.5973219350437431
ROC_AUC attent_net:  0.6716465464032166


In [8]:
t = torch.rand((3,5,8))

In [22]:
t

tensor([[[ 0.4287,  0.3942,  0.0604,  0.1245,  0.8909,  0.4458,  0.8706,
           0.9773],
         [ 0.1155,  0.2640,  0.1278,  0.8110,  0.4001,  0.1504,  0.0421,
           0.0091],
         [ 0.5653,  0.9873,  0.3048,  0.9034,  0.1659,  0.4719,  0.5392,
           0.5871],
         [ 0.4730,  0.2721,  0.5361,  0.7076,  0.8989,  0.2514,  0.0404,
           0.8865],
         [ 0.2321,  0.1911,  0.3464,  0.8846,  0.2220,  0.1765,  0.3063,
           0.4712]],

        [[ 0.6435,  0.3662,  0.1579,  0.2816,  0.7345,  0.9305,  0.7109,
           0.6003],
         [ 0.3210,  0.6264,  0.5718,  0.4135,  0.7088,  0.5434,  0.4712,
           0.1546],
         [ 0.2150,  0.2433,  0.5993,  0.1116,  0.5238,  0.6924,  0.9444,
           0.3336],
         [ 0.5061,  0.9467,  0.7286,  0.7167,  0.9050,  0.0789,  0.3827,
           0.0125],
         [ 0.9621,  0.0949,  0.8025,  0.5345,  0.5717,  0.5203,  0.8513,
           0.0037]],

        [[ 0.5968,  0.2068,  0.9622,  0.5538,  0.4449,  0.1390,  0

In [21]:
t.view(5,3,8)

tensor([[[ 0.4287,  0.3942,  0.0604,  0.1245,  0.8909,  0.4458,  0.8706,
           0.9773],
         [ 0.1155,  0.2640,  0.1278,  0.8110,  0.4001,  0.1504,  0.0421,
           0.0091],
         [ 0.5653,  0.9873,  0.3048,  0.9034,  0.1659,  0.4719,  0.5392,
           0.5871]],

        [[ 0.4730,  0.2721,  0.5361,  0.7076,  0.8989,  0.2514,  0.0404,
           0.8865],
         [ 0.2321,  0.1911,  0.3464,  0.8846,  0.2220,  0.1765,  0.3063,
           0.4712],
         [ 0.6435,  0.3662,  0.1579,  0.2816,  0.7345,  0.9305,  0.7109,
           0.6003]],

        [[ 0.3210,  0.6264,  0.5718,  0.4135,  0.7088,  0.5434,  0.4712,
           0.1546],
         [ 0.2150,  0.2433,  0.5993,  0.1116,  0.5238,  0.6924,  0.9444,
           0.3336],
         [ 0.5061,  0.9467,  0.7286,  0.7167,  0.9050,  0.0789,  0.3827,
           0.0125]],

        [[ 0.9621,  0.0949,  0.8025,  0.5345,  0.5717,  0.5203,  0.8513,
           0.0037],
         [ 0.5968,  0.2068,  0.9622,  0.5538,  0.4449,  0.1390, 

In [16]:
t.permute(1,0,2)

tensor([[[ 0.4287,  0.3942,  0.0604,  0.1245,  0.8909,  0.4458,  0.8706,
           0.9773],
         [ 0.6435,  0.3662,  0.1579,  0.2816,  0.7345,  0.9305,  0.7109,
           0.6003],
         [ 0.5968,  0.2068,  0.9622,  0.5538,  0.4449,  0.1390,  0.5541,
           0.4626]],

        [[ 0.1155,  0.2640,  0.1278,  0.8110,  0.4001,  0.1504,  0.0421,
           0.0091],
         [ 0.3210,  0.6264,  0.5718,  0.4135,  0.7088,  0.5434,  0.4712,
           0.1546],
         [ 0.9480,  0.1144,  0.7738,  0.3222,  0.0057,  0.8268,  0.2813,
           0.8141]],

        [[ 0.5653,  0.9873,  0.3048,  0.9034,  0.1659,  0.4719,  0.5392,
           0.5871],
         [ 0.2150,  0.2433,  0.5993,  0.1116,  0.5238,  0.6924,  0.9444,
           0.3336],
         [ 0.8755,  0.3231,  0.2724,  0.3659,  0.3079,  0.3166,  0.5770,
           0.4247]],

        [[ 0.4730,  0.2721,  0.5361,  0.7076,  0.8989,  0.2514,  0.0404,
           0.8865],
         [ 0.5061,  0.9467,  0.7286,  0.7167,  0.9050,  0.0789, 

In [None]:
plt.plot(range(len(net_att.losses)), net_att.losses)
plt.plot(range(len(net_att.val_losses)), net_att.val_losses, c='r')
plt.show()

In [None]:
net_att.eval()

In [None]:
xq_1 = np.array(df_train.Question_encoded.values[:5].tolist())
xa_1 = np.array(df_train.Sentence_encoded.values[:5].tolist())
xq_1 = torch.from_numpy(xq_1)
xa_1 = torch.from_numpy(xa_1)

pr = net_att.to('cpu')(xq_1, xa_1)

In [None]:
df_train.Question.values[:5]

In [None]:
df_train.Sentence.values[:5]

In [None]:
def score_sent(sent, scores):
    sent = sent.split()
    scores = scores.detach().numpy().flatten()
    return pd.Series(index=sent, data=scores[:len(sent)])

In [None]:
scoring_q = []
for sent, score in zip(df_train.Question.values[:5], net_att.l_scores[:5]):
    scoring_q.append(score_sent(sent, score))
    
scoring_a = []
for sent, score in zip(df_train.Sentence.values[:5], net_att.r_scores[:5]):
    scoring_a.append(score_sent(sent, score))

In [None]:
scoring_q[0].plot(kind='barh')

In [None]:
print(' '.join(scoring_a[3].index))
scoring_a[3].sort_values().plot(kind='barh')

In [None]:
df_train.loc[3, 'Sentence']

In [None]:
for i in scoring_a:
    i.plot(kind='barh')

In [None]:
plt.barh(scoring_a[3][0], scoring_a[0][1][:len(scoring_a[3][0])])

In [None]:
plt.barh(scoring_a[3][0], scoring_q[0][0][:5])

In [None]:

plt.figure(figsize=(17,8))
plt.subplot(1,2,1)
plt.plot(range(len(net_simple.losses)), net_simple.losses)
plt.plot(range(len(net_simple.val_losses)), net_simple.val_losses, c='r')
plt.subplot(1,2,2)
plt.plot(range(len(net_att.losses)), net_att.losses)
plt.plot(range(len(net_att.val_losses)), net_att.val_losses, c='r')
plt.show()

## Softmax на линейное преобразование от матрицы аттеншна

In [None]:
a = torch.randn((7,6))
sm = torch.nn.Softmax(-1)
a = sm(a)
a

In [None]:
vm = torch.nn.Linear(6, 1)
sm_2 = torch.nn.Softmax(0)
sm_2(vm(a))

In [None]:
t = np.load('./data/processed/index2vector.npy')

In [None]:
type(t)

In [None]:
isinstance(t, np.ndarray)

In [None]:
t.shape

In [23]:
import torchtext

ModuleNotFoundError: No module named 'torchtext'