In [26]:
import pandas as pd
import numpy as np
import torch
import torch.nn as nn
import torch.nn.functional as F
from sklearn.metrics import f1_score, accuracy_score, confusion_matrix, multilabel_confusion_matrix
import models
import process_data
import analysis

In [2]:
#setup
BATCH_SIZE = 200
LEARNING_RATE = 0.0005
NUM_EPOCHS = 25

device = torch.device('cuda:0' if torch.cuda.is_available() else 'cpu')
device_count = torch.cuda.device_count()


In [3]:
train_loader, val_loader = process_data.load_data(batch_size=BATCH_SIZE)

In [4]:
cnetmodel = models.get_model("cnet")

In [5]:
criterion = nn.CrossEntropyLoss()
optimizer = torch.optim.Adam(cnetmodel.parameters(), lr=LEARNING_RATE)


In [44]:
def accuracy_per_class(mcm, clsnum):
    return (mcm[clsnum][0][0] + mcm[clsnum][1][1]) / (mcm[clsnum][0][0] + mcm[clsnum][0][1] + mcm[clsnum][1][0] + mcm[clsnum][1][1])

def sensitivity_per_class(mcm, clsnum):
    return mcm[clsnum][0][0] / (mcm[clsnum][0][0] + mcm[clsnum][1][0])

def specificity_per_class(mcm, clsnum):
    return mcm[clsnum][1][1] / (mcm[clsnum][1][1] + mcm[clsnum][0][1])

def pos_pred_val_per_class(mcm, clsnum):
    return mcm[clsnum][0][0] / (mcm[clsnum][0][0] + mcm[clsnum][0][1])

def neg_pred_val_per_class(mcm, clsnum):
    return mcm[clsnum][1][1] / (mcm[clsnum][1][1] + mcm[clsnum][1][0])

def stat_per_class(mcm):
    out = []
    for clsnum in range(5):
         out.append((accuracy_per_class(mcm, clsnum), sensitivity_per_class(mcm, clsnum), specificity_per_class(mcm, clsnum), \
            pos_pred_val_per_class(mcm, clsnum), neg_pred_val_per_class(mcm, clsnum)))
    return out

def avg_stats(mcm):
    avg_acc = 0
    avg_sens = 0
    avg_spec = 0
    avg_pos_pred = 0
    avg_neg_pred = 0
    for clsnum in range(5):
        avg_acc += accuracy_per_class(mcm, clsnum)/5
        avg_sens += sensitivity_per_class(mcm, clsnum)/5
        avg_spec += specificity_per_class(mcm, clsnum)/5
        avg_pos_pred += pos_pred_val_per_class(mcm, clsnum)/5
        avg_neg_pred += neg_pred_val_per_class(mcm, clsnum)/5
    return avg_acc, avg_sens, avg_spec, avg_pos_pred, avg_neg_pred

def eval(model, val_loader):
    model.eval()
    with torch.no_grad():
        val_loss = 0
        y_pred = []
        y_true = []
        for x, y in val_loader:
            x=x.to(device).reshape(-1, 1, 187).float()
            y = y.to(device).long()
            y_hat = model(x)
            val_loss += criterion(y_hat, y).item()
            y_hat = F.softmax(y_hat, dim=1)
            y_hat = torch.argmax(y_hat, dim=1)
            y_pred.extend(y_hat.cpu().numpy())
            y_true.extend(y.long().cpu().numpy())
        
        val_loss = val_loss / len(val_loader)
        f = f1_score(y_pred=y_pred, y_true=y_true, average='macro')
        acc = accuracy_score(y_pred=y_pred, y_true=y_true)
        cm = confusion_matrix(y_true, y_pred)
        mcm = multilabel_confusion_matrix(y_pred, y_true) # input is inverted to get expected output
    return f, acc, cm, mcm, stat_per_class(mcm), avg_stats(mcm)


In [45]:
def train(model, train_loader, val_loader, n_epochs):
    model.train()
    for epoch in range(n_epochs):
        train_loss = 0
        i=0
        for x, y in train_loader:
            x=x.to(device).reshape(-1, 1, 187).float()
            y = y.to(device).long()
            y_hat = model(x)
            loss = criterion(y_hat, y)
            optimizer.zero_grad()
            loss.backward()
            optimizer.step()
            train_loss += loss.item()
        train_loss = train_loss / len(train_loader)
        print('Epoch: {} \t Training Loss: {:.6f}'.format(epoch+1, train_loss))
        f, acc, cm, mcm, stats, avgstats = eval(model, val_loader)
        print('Epoch: %d \t Validation f: %.2f, acc: %.2f'%(epoch+1, f, acc))
        print('Confusion matrix')
        print(cm)
        print(mcm)
        print('Accuracy, Sensitivity, Specificity, Positive Pred Value, Negative Pred Value')
        print('class 1 :', stats[0])
        print('class 2 :', stats[1])
        print('class 3 :', stats[2])
        print('class 4 :', stats[3])
        print('class 5 :', stats[4])
        print('avg metrics :', avgstats)


In [46]:
train(cnetmodel, train_loader, val_loader, 1) # NUM_EPOCHS


Epoch: 1 	 Training Loss: 0.045329
Epoch: 1 	 Validation f: 0.90, acc: 0.98
Confusion matrix
[[18015    78    19     2     4]
 [  129   416     9     1     1]
 [   52     9  1369    12     6]
 [   35     0    15   112     0]
 [   20     0     4     0  1584]]
[[[ 3538   103]
  [  236 18015]]

 [[21249   140]
  [   87   416]]

 [[20397    79]
  [   47  1369]]

 [[21715    50]
  [   15   112]]

 [[20273    24]
  [   11  1584]]]
Accuracy, Sensitivity, Specificity, Positive Pred Value, Negative Pred Value
class 1 : (0.9845148912844874, 0.9374668786433492, 0.9943150458107959, 0.9717110683878055, 0.9870692016875787)
class 2 : (0.989630915402887, 0.9959223847019123, 0.7482014388489209, 0.9934545794567301, 0.827037773359841)
class 3 : (0.9942444728668006, 0.9977010369790648, 0.9454419889502762, 0.9961418245751124, 0.9668079096045198)
class 4 : (0.9970308788598575, 0.9993097100782329, 0.691358024691358, 0.9977027337468413, 0.8818897637795275)
class 5 : (0.9984012424630002, 0.9994577006507592, 0.

In [None]:
embeds,y = analysis.get_embeddings(cnetmodel, train_loader)
coordinates = analysis.get_embeddings(embeds)
analysis.plot_tsne(coordinates, y, 5)