In [None]:
import torch

import pandas as pd
import numpy as np

from dataloader import *
from model import *
from train import *

In [None]:
PATH = %pwd
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')

In [None]:
main_df = pd.read_csv(os.path.join(PATH, 'train.csv'))
print(main_df.shape)
main_df = main_df.sample(n=main_df.shape[0])
main_df = main_df[["question_text", "target"]]
main_df.head()

In [None]:
o_class = main_df.loc[main_df.target == 0, :]
l_class = main_df.loc[main_df.target == 1, :]

In [None]:
test_o = o_class.iloc[:10000, :]
test_l = l_class.iloc[:10000, :]

valid_o = o_class.iloc[10000:20000, :]
valid_l = l_class.iloc[10000:20000, :]

train_o = o_class.iloc[20000:, :]
train_l = l_class.iloc[20000:, :]

In [None]:
train = pd.concat([train_o, train_l], axis=0)
print(train.shape)

valid = pd.concat([valid_o, valid_l], axis=0)
print(valid.shape)

test = pd.concat([test_o, test_l], axis=0)
print(test.shape)

In [None]:
!mkdir inputs

In [None]:
train.to_csv(os.path.join(PATH, "inputs/train.csv"), index=False)
test.to_csv(os.path.join(PATH, "inputs/test.csv"), index=False)
valid.to_csv(os.path.join(PATH, "inputs/valid.csv"), index=False)

In [None]:
del main_df, train, test, valid, train_l, train_o, test_l, test_o, valid_l,valid_o, o_class, l_class

In [None]:
dataset = CreateDataset(PATH)

In [None]:
train_iterator, valid_iterator, test_iterator = dataset.getData()

In [None]:
pretrained_embeddings = dataset.getEmbeddings()

In [None]:
input_dim = dataset.lengthVocab()[0]
embedding_dim = 300
hidden_dim = 374
output_dim = 2
num_layers = 2
batch_size = 32

In [None]:
model = LSTM(input_dim, embedding_dim, hidden_dim, num_layers)

In [None]:
model.embedding.weight.data = pretrained_embeddings.to(device)
class_weights = torch.tensor([1.0, 15.0]).to(device)

In [None]:
import torch.optim as optim
import torch.nn as nn

optimizer = optim.SGD(model.parameters(), lr=1e-4)
criterion = nn.BCEWithLogitsLoss()

In [None]:
model = model.to(device)
criterion = criterion.to(device)

In [None]:
epoch_train_losses = []
epoch_test_losses = []
epoch_val_losses = []
accu_train_epoch = []
accu_test_epoch = []
accu_val_epoch = []

In [None]:
import torch.nn.functional as F

def binary_accuracy(preds, y):

    preds = torch.sigmoid(preds)
    preds = torch.round(preds)

    correct = (preds == y).float()
    acc = correct.sum()/float(len(correct))
    return acc

In [None]:
import pyprind

def train(model, iterator, optimizer, criterion):
    
    train_loss_batch = []
    accu_train_batch = []
    model.train()
    bar = pyprind.ProgBar(len(iterator), bar_char='█')
    for batch in iterator:
        
        optimizer.zero_grad()
                
        predictions = model.forward(batch.Text).view(-1)
        batch.Label = (batch.Label).type_as(predictions)
        train_loss = criterion(predictions, batch.Label)
        acc = binary_accuracy(predictions, batch.Label)
        
        train_loss.backward()
        optimizer.step()
        
        train_loss_batch.append(train_loss)
        accu_train_batch.append(acc)
        bar.update()

    epoch_train_losses.append(sum(train_loss_batch)/len(iterator))
    accu_train_epoch.append(sum(accu_train_batch)/len(iterator))

    return epoch_train_losses[-1], accu_train_epoch[-1]

In [None]:
def evaluate(model, iterator, criterion):
    
    val_loss_batch = []
    accu_val_batch = []
    model.eval()
    
    with torch.no_grad():
        bar = pyprind.ProgBar(len(iterator), bar_char='█')
        for batch in iterator:

            predictions = model.forward(batch.Text).view(-1)
            batch.Label = (batch.Label).type_as(predictions)
            val_loss = criterion(predictions, batch.Label)
            
            acc = binary_accuracy(predictions, batch.Label)

            val_loss_batch.append(val_loss)
            accu_val_batch.append(acc)
            bar.update()
        epoch_val_losses.append(sum(val_loss_batch)/len(iterator))
        accu_val_epoch.append(sum(accu_val_batch)/len(iterator))
    return epoch_val_losses[-1], accu_val_epoch[-1]

In [None]:
epochs = 2

for epoch in range(epochs):

    train_loss, train_acc = train(model, train_iterator, optimizer, criterion)
    valid_loss, valid_acc = evaluate(model, valid_iterator, criterion)

    print(f'| Epoch: {epoch+1:02} | Train Loss: {train_loss:.3f} | Train Acc: {train_acc*100:.2f}% | Val. Loss: {valid_loss:.3f} | Val. Acc: {valid_acc*100:.2f}% |')