In [None]:
import torch
import torch.nn as nn
import torch.nn.functional as funcs
import torch.optim as optim
import pandas as pd
from DataCleaner import *
from loader import *
from time import time
import torch.nn.functional as F
import matplotlib.pyplot as plt

In [None]:
if torch.cuda.is_available():
    device = "cuda:0"
else:
    device = "cpu"
device

In [None]:
VOCAB_SIZE = 10000
BATCH_SIZE = 256
MAX_LENGTH = 80
EMBEDDING_DIM = 300
RNN_UNITS = 64
OOV_TOKEN = '<OOV>'
BI_RNN = True
RNN_LAYERS = 2

In [None]:
start_time = time()
project_path = '../../NLP/Kaggle/QuoraQuestionPairs/input/{}.csv'
train_data = pd.read_csv(project_path.format('train'), index_col="id")

In [None]:
train_dataset = QuoraDataset(train, max_length=MAX_LENGTH, vocab_size=VOCAB_SIZE, oov=OOV_TOKEN)

In [None]:
train_data_loader = torch.utils.data.DataLoader(train_dataset, batch_size=BATCH_SIZE, 
                                          shuffle=True, collate_fn=generate_batch, num_workers=4)

In [None]:
class Net(nn.Module):
    # defining the structure of the network
    def __init__(self):
        super(Net, self).__init__()
        self.embedding = nn.Embedding(VOCAB_SIZE + 1, EMBEDDING_DIM)
        self.rnn = nn.GRU(EMBEDDING_DIM, RNN_UNITS, bidirectional=BI_RNN, 
                          num_layers=RNN_LAYERS, batch_first=True)
        self.lin1 = nn.Linear(RNN_UNITS * 2, 96)
        self.lin2 = nn.Linear(96, 28)
        self.out = nn.Linear(28, 1)

    # defining steps in forward pass
    def forward(self, x1, x2):
        try:
            x1 = self.embedding(x1)
            x2 = self.embedding(x2)
            x1 = self.rnn(x1)[1]
            x1 = x1.view(-1, x1.size()[1], x1.size()[2]).sum(dim=0)
            x2 = self.rnn(x2)[1]
            x2 = x2.view(-1, x2.size()[1], x2.size()[2]).sum(dim=0)
            lin = self.lin1(torch.cat((x1, x2), 1))
            lin = torch.relu(self.lin2(lin))
            pred = self.out(lin)
            return pred
        except IndexError:
            print(x1.max(), x2.max())

In [None]:
net = Net()
print(net)

params = list(net.named_parameters())
print(len(params))
for name, param in params:
    print('{!r:30} {}'.format(name, param.size()))

In [None]:
def fit():
    losses = []
    accs = []
    for epoch in range(5):
        loss_vals = []
        acc_vals = []
        for i, batch in enumerate(train_data_loader):
            optimizer.zero_grad()
            q1 = batch[0].to(device)
            q2 = batch[1].to(device)
            label = batch[2].view(-1, 1).to(dtype=torch.float, device=device)
            pred = net(q1, q2).to(dtype=torch.float, device=device)
            loss = loss_fn(pred, label)
            if i % 10 == 9:
                loss_vals.append(loss.item())
                acc_vals.append(accuracy(pred,label))
            loss.backward()
            optimizer.step()
        losses.append(loss_vals)
        accs.append(acc_vals)
    return losses, accs

In [None]:
optimizer = optim.Adam(net.parameters())
loss_fn = F.binary_cross_entropy_with_logits
net.to(device)

In [None]:
def accuracy(prob, target):
    with torch.no_grad():
        prob = torch.sigmoid(prob)
        n = target.size()[0]
        pred = (prob > 0.5).float()
        c = (pred == target).sum().item()
        return c/n

In [None]:
losses, acs = fit()

In [None]:
plt.figure(figsize=(15,10))
plt.subplot(211)
for epoch, loss in enumerate(losses):
    plt.plot(list(range(len(loss))), loss, label='epoch {}'.format(epoch), alpha=0.4+epoch*0.1,linewidth=2 )
    plt.ylabel('loss')
plt.subplot(212)
for epoch, acc in enumerate(acs):
    plt.plot(list(range(len(acc))), acc, label='epoch {}'.format(epoch), alpha=0.4+epoch*0.1, linewidth=2)
    plt.ylabel('accuracy')
plt.legend()
plt.xlabel('batch no')
plt.show()

In [None]:
print(torch.cuda.memory_summary())

In [None]:
torch.cuda.empty_cache()