In [468]:
import numpy as np
import pandas as pd
import os, json 
from imblearn.over_sampling import SMOTE
from sklearn.model_selection import train_test_split
from sklearn.metrics import classification_report
import torch
from torch import nn, optim
import torch.nn.functional as F
import torchvision.transforms as transforms
import torch.utils.data as data
from torch.autograd import Variable

In [469]:
# Load in dataset
df = pd.read_pickle('./problemsV7.pkl')
label = df['grade'].values
df = df.drop('grade',axis=1).values

# Split train and test data
train_dataset, test_dataset, train_label, test_label = train_test_split(df, label, test_size=.3, stratify=label)

In [470]:
# Convert dataset to tensor
class Dataset(data.Dataset):
    def __init__(self, df, label, smote=False, conv=False):
        self.dataset = df
        self.label = label
        if smote: self.__smote__() 
        
    def __getitem__(self, index):
            return torch.Tensor(self.dataset[index].astype(float)), self.label[index]
    def __len__(self):
        return self.dataset.shape[0]
    
    def __smote__(self):
        sm = SMOTE(random_state=27)
        self.dataset, self.label = sm.fit_sample(self.dataset, self.label)

In [471]:
class LinNet(nn.Module):
    def __init__(self, input_size, num_classes):
        super().__init__()
        self.fc1 = nn.Linear(input_size, 256)
        self.fc2 = nn.Linear(256, 128)
        self.fc3 = nn.Linear(128, 64)
        self.fc4 = nn.Linear(64, num_classes)
        
        self.dropout = nn.Dropout(p=0.2)
        
    def forward(self, x):
        x = x.view(x.shape[0], -1)
        
        x = F.relu(self.fc1(x))
        x = F.relu(self.fc2(x))
        x = self.dropout(F.relu(self.fc3(x)))
        x = F.log_softmax(self.fc4(x), dim=1)
        
        return x
    
class ConvNet(nn.Module):
    def __init__(self):
        super(ConvNet, self).__init__()
        self.conv1 = nn.Conv1d(1, 6, 5)
        self.pool = nn.MaxPool1d(2, 2)
        self.conv2 = nn.Conv1d(6, 16, 5)
        self.fc1 = nn.Linear(1536, 256)
        self.fc2 = nn.Linear(256, 128)
        self.fc3 = nn.Linear(128, 64)
        self.fc4 = nn.Linear(64, 13)

    def forward(self, x):
        x = self.pool(F.relu(self.conv1(x)))
        x = self.pool(F.relu(self.conv2(x)))
        x = x.view(x.size(0), -1)
        x = F.relu(self.fc1(x))
        x = F.relu(self.fc2(x))
        x = F.relu(self.fc3(x))
        x = self.fc4(x)
        return x

In [472]:
# Get device info
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')

In [473]:
# Variables (40%)
input_size = df.shape[1]
classes = np.unique(label)
num_classes = len(classes)
learning_rate = .001
batch_size = 16
num_epochs = 1

In [474]:
# Convert from DF to tensor
train_set = Dataset(train_dataset, train_label, smote=True)
trainloader = data.DataLoader(dataset = train_set, batch_size = batch_size, shuffle = True)

test_set = Dataset(test_dataset, test_label, smote=False)
testloader = data.DataLoader(dataset = test_set, batch_size = batch_size, shuffle = True)

In [475]:
# Initilalize network

# Conv = True for Conv : False for linear
# conv = False
conv = True

# model = NN(input_size=input_size, num_classes=num_classes).to(device)
model = ConvNet().to(device)

In [476]:
# loss and optimizer
criterion = nn.CrossEntropyLoss()
optimizer = optim.Adam(model.parameters(), lr = learning_rate)

In [477]:
# Train network
for epoch in range(num_epochs):
    for i, (route, labels) in enumerate(trainloader):
        route = Variable(route)
        labels = Variable(labels)
 
        optimizer.zero_grad()
        if conv:
            route = route.unsqueeze(1)
        outputs = model(route)
        
        loss = criterion(outputs, labels)
        loss.backward()
        optimizer.step()
        
        if (i+1) % 100 == 0:
            print ('Epoch [%d/%d], Iter [%d] Loss: %.4f' %(epoch+1, num_epochs, i+1, loss.data))

Epoch [1/1], Iter [100] Loss: 2.5259
Epoch [1/1], Iter [200] Loss: 2.1437
Epoch [1/1], Iter [300] Loss: 1.8886
Epoch [1/1], Iter [400] Loss: 2.4822
Epoch [1/1], Iter [500] Loss: 1.9101
Epoch [1/1], Iter [600] Loss: 2.2293
Epoch [1/1], Iter [700] Loss: 1.9798
Epoch [1/1], Iter [800] Loss: 2.1697
Epoch [1/1], Iter [900] Loss: 2.6446
Epoch [1/1], Iter [1000] Loss: 1.9019
Epoch [1/1], Iter [1100] Loss: 2.1571
Epoch [1/1], Iter [1200] Loss: 2.2020
Epoch [1/1], Iter [1300] Loss: 1.5326
Epoch [1/1], Iter [1400] Loss: 1.7589
Epoch [1/1], Iter [1500] Loss: 1.9932
Epoch [1/1], Iter [1600] Loss: 1.9199
Epoch [1/1], Iter [1700] Loss: 2.3902


In [478]:
# Test Network
correct = total = 0
pred = t_label = np.array([])
with torch.no_grad():
    for data in testloader:
        route, labels = data
        if conv:
            route = route.unsqueeze(1)
        outputs = model(route)
        _, predicted = torch.max(outputs.data, 1)
        pred = np.append(predicted.numpy(), pred)
        t_label = np.append(labels.numpy(), t_label)
        total += labels.size(0)
        correct += (predicted == labels).sum().item()

print('Accuracy of the network: %d %%' % (
    100 * correct / total))

print(classification_report(t_label, pred))

difference = abs(pred - t_label)
data = np.array(np.unique(difference, return_counts=True))
pd.DataFrame({'Difference': data[0], 'Counts': data[1]})

Accuracy of the network: 34 %
              precision    recall  f1-score   support

         0.0       0.56      0.78      0.65       933
         1.0       0.00      0.00      0.00       178
         2.0       0.19      0.43      0.26       428
         3.0       0.21      0.19      0.20       352
         4.0       0.29      0.03      0.05       133
         5.0       0.17      0.13      0.15       319
         6.0       0.00      0.00      0.00        11
         7.0       0.19      0.09      0.12       422
         8.0       0.00      0.00      0.00         9
         9.0       0.25      0.01      0.01       187
        10.0       0.00      0.00      0.00        53
        11.0       0.00      0.00      0.00        24
        12.0       0.00      0.00      0.00        12

    accuracy                           0.35      3061
   macro avg       0.14      0.13      0.11      3061
weighted avg       0.29      0.35      0.29      3061



  _warn_prf(average, modifier, msg_start, len(result))


Unnamed: 0,Difference,Counts
0,0.0,1062.0
1,1.0,374.0
2,2.0,471.0
3,3.0,215.0
4,4.0,86.0
5,5.0,372.0
6,6.0,69.0
7,7.0,336.0
8,8.0,36.0
9,9.0,27.0


In [None]:
# Save model
# MODEL_PATH = 'NN_model.pth'
# torch.save(net, MODEL_PATH)

# net = torch.load(MODEL_PATH)