Import and Initialization

In [None]:
import dataprocess as dp
import transformers
from sklearn.model_selection import train_test_split
import numpy as np
import torch
import torch.nn as nn
import torch.nn.functional as F
import matplotlib.pyplot as plt
from torch.optim.lr_scheduler import ExponentialLR
from modelhelper import MSE_Vec_matrix
from modelhelper import Net
from modelhelper import F_score
import torch
from torchvision import models

device = torch.device("cuda" if torch.cuda.is_available() else "cpu")

Calculates the accuracy of the model based on top two prediction

In [None]:
def accuracy(net, input_id, labels):
    j = len(labels)

    # calculate class from most probable to least probable
    guesses = torch.argsort(net(input_id), dim=1, descending=True)

    # get labels
    current_real = labels.clone().detach()

    # calculates number of top 1 and top 2 predictions
    top1_acc = torch.sum(current_real==guesses[:,0])
    top2_acc = torch.sum(current_real==guesses[:,1])

    # calculates accuracies
    accuracy_1 = top1_acc / j
    accuracy_2 = (top2_acc + top1_acc) / j

    return accuracy_1.item(), accuracy_2.item()

Load in pre-encoded data and extract work data and lables.

In [None]:
data = dp.unpickle_data("reviews_Electronics_5_7_encoded.pickle")
sentences = data['reviewText']
labels = data['overall']

Partions data into validation, test and train.

In [None]:
X_train, X_test, y_train, y_test = train_test_split(sentences, labels, test_size=0.1, random_state=42, stratify=labels)
X_test, X_val, y_test, y_val = train_test_split(X_test, y_test, test_size=0.5, random_state=42, stratify=y_test)

X_test = X_test.clone().detach()
y_test = torch.tensor(y_test).cuda()

for i in range(len(y_test)):
    y_test[i] = y_test[i] - 1 

X_train = X_train.clone().detach()
y_train = torch.tensor(y_train)

Unbiasses data

In [None]:
rev1 = X_train[y_train==1]
rev2 = X_train[y_train==2]
rev3 = X_train[y_train==3]
rev4 = X_train[y_train==4]
rev5 = X_train[y_train==5]

training_points = min(len(rev1), len(rev2), len(rev3), len(rev4), len(rev5))

rev1 = rev1 [:training_points]
rev2 = rev2 [:training_points]
rev3 = rev3 [:training_points]
rev4 = rev4 [:training_points]
rev5 = rev5 [:training_points]

indicies = torch.randperm(training_points * 5)

X_train = torch.cat((rev1, rev2, rev3, rev4, rev5), dim=0)[indicies, :]
base = torch.zeros(training_points)
y_train = torch.cat((base,base+1,base+2,base+3,base+4))[indicies]


The cell below handles all the MLP Variables
$$
\gamma = \left( \frac{\alpha}{\beta} \right)^{\frac{1}{e}}
$$

gamma is decay rate

beta is start learning rate

alpha is ending learning rate

e is number of epochs

In [None]:
NUM_EPOCH = 1000
batch_size = 500

# Learning Rate Decay
LR_START  = 1e-1
LR_END    = 1e-3
LR_GAMMA  = (LR_END/LR_START)**(1/NUM_EPOCH)

dropout = 0.2
hidden_layers = [768,500,600,300,400,100,200,70,20,5]
activation_func = F.relu

batch_norm = True

net = Net(  h_sizes=hidden_layers,
            dropout=dropout, 
            activation=activation_func,
            batch_norm=batch_norm).to(device)

print(net)

optimizer = torch.optim.SGD(net.parameters(), lr=LR_START, momentum=0.9)
criterion = torch.nn.MSELoss().to(device)
scheduler = ExponentialLR(optimizer, gamma=LR_GAMMA)

In [None]:
X_train = X_train.to(device)
y_train = y_train.long().to(device)
X_test  = X_test.to(device)
y_test  = y_test.long().cuda()

Training loop

In [None]:
indecies = torch.tensor(range(batch_size))

train_losses = []
test_losses = []
accuracy1 = []
accuracy2 = []
for epoch in range(NUM_EPOCH):
    running_loss = 0.0
    net.train()
    for i in range(len(y_train)//batch_size):
    #for i in range(1):
        select = batch_size * i + indecies
        optimizer.zero_grad()
        outputs = net(X_train[select,:])

        # MSE Loss
        loss = criterion(outputs, F.one_hot(y_train[select],num_classes=5).float())

        # Cross Entropy Loss
#         loss = criterion(outputs, y_train[select].float())

        loss.backward()
        optimizer.step()
        # print statistics
        running_loss += loss.item()

    net.eval()
    with torch.no_grad():
        # MSE
        # training_loss = criterion(net(X_test),MSE_vec[y_test.long()]).item()

        # Cross Entropy
        net_test= net.cpu()
        guesses = net_test(X_test.cpu()).detach()
        training_loss = criterion(guesses, F.one_hot(y_test,num_classes=5).cpu())

        ac1,ac2 = accuracy(net_test, X_test.cpu(), y_test.cpu())
        accuracy1.append(ac1)
        accuracy2.append(ac2)
        net.cuda()
        if round(ac1*100) >= 60 and epoch > 10:
            break

    print('[%d] loss: %.3f \t test loss: %.3f \t val_ac1: %.2f \t val_ac2: %.2f' %
    (epoch + 1, running_loss, training_loss*10,ac1,ac2))
    scheduler.step()
    
    train_losses.append(running_loss)
    test_losses.append(training_loss)
    running_loss = 0.0

Displays training results

In [None]:
print('Finished Training')
plt.plot(train_losses)
plt.figure()
plt.plot(torch.tensor(test_losses).cpu().numpy())
plt.figure()
plt.plot(accuracy1)
plt.figure()
plt.plot(accuracy2)

Print confussion matrix

In [None]:
F_score(net.cpu(),X_val.cpu(),y_val_new.cpu())