In [1]:
# Imports

import numpy as np
import matplotlib
import matplotlib.pyplot as plt
from sklearn import metrics
from sklearn.metrics import f1_score, accuracy_score
from sklearn.metrics import plot_confusion_matrix
from sklearn.metrics import roc_auc_score
from sklearn.metrics import roc_curve

import torch
import torchvision
import torch.nn as nn  # All neural network modules, nn.Linear, nn.Conv2d, BatchNorm, Loss functions
import torch.optim as optim  # For all Optimization algorithms, SGD, Adam, etc.
import torch.nn.functional as F  # All functions that don't have any parameters
#from torch.utils.data import (
#    DataLoader,
#)  # Gives easier dataset managment and creates mini batches
import torchvision.datasets as datasets  # Has standard datasets we can import in a nice way
import torchvision.transforms as transforms  # Transformations we can perform on our dataset

In [None]:
# Load data, this data will also be  used for the NN
X_train = np.load("alpha94_test.npy", allow_pickle=True)
y_train = np.loadtxt('alpha94_label_test.txt')

nsamples, nx, ny = X_train.shape
print(nsamples,nx,ny)

X_test = np.load("alpha94_val.npy", allow_pickle=True)
y_test = np.loadtxt('alpha94_label_val.txt')
nsamples, nx, ny = X_test.shape
print(nsamples,nx,ny)

In [None]:
# make the data set into one dataset that can go into dataloader
train_ds = []
for i in range(len(X_train)):
    train_ds.append([X_train[i], y_train[i]])

In [None]:
test_ds = []
for i in range(len(X_test)):
    test_ds.append([X_test[i], y_test[i]])

In [None]:
bat_size = 128
train_ldr = torch.utils.data.DataLoader(train_ds,batch_size=bat_size, shuffle=True)

In [None]:
bat_size = 128
test_ldr = torch.utils.data.DataLoader(test_ds,batch_size=bat_size, shuffle=True)

In [None]:
# Set device
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")

# Model

In [None]:
# Hyperparameters
input_size = 139
num_classes = 1
learning_rate = 0.001

class Net(nn.Module):
    def __init__(self, input_size, num_classes):
        super(Net, self).__init__()
        
        self.conv1 = nn.Conv1d(in_channels=21, out_channels=100, kernel_size=3, stride=2, padding=1)
        self.pool = nn.MaxPool1d(kernel_size=2, stride=2)
        self.conv1_bn = nn.BatchNorm1d(100)
        self.conv2 = nn.Conv1d(in_channels=100, out_channels=50, kernel_size=3, stride=2, padding=1)
        self.fc1 = nn.Linear(50*9,10)
        self.fc2 = nn.Linear(10, num_classes)
        
        

    def forward(self, x):


        x = self.pool(F.relu(self.conv1(x)))
        x = self.conv1_bn(x) 
        x = self.pool(F.relu(self.conv2(x)))
        x = x.view(x.size(0), -1)
        x = F.relu(self.fc1(x))
        x = torch.sigmoid(self.fc2(x)) 

        return x
    

    
# Initialize network
net = Net(input_size=input_size, num_classes=num_classes).to(device)

In [None]:
# Loss and optimizer
criterion = nn.BCELoss(reduction="none") 
optimizer = optim.SGD(net.parameters(), lr=learning_rate)



In [None]:
# Training the model
num_epochs = 5 #100


train_acc, train_loss = [], []
valid_acc, valid_loss = [], []
cur_loss = 0
losses = []
loss_val = 0
val_losses = []

valid_acc_test = []



for epoch in range(num_epochs):
    print(epoch+1)
    cur_loss = 0
    val_loss = 0
    
    
    net.train()
    train_preds, train_targs = [], [] 
    for batch_idx, (data, target) in enumerate(train_ldr):
        X_batch =  data.float().clone().detach().requires_grad_(True)
        target_batch = torch.tensor(np.array(target), dtype = torch.float).unsqueeze(1)

        optimizer.zero_grad()
        output = net(X_batch)
        batch_loss = criterion(output, target_batch) 
        batch_loss.backward()
        optimizer.step()
        
        preds = np.round(output.detach())
        train_targs += list(np.array(target_batch))
        train_preds += list(preds.data.numpy())
        cur_loss += batch_loss   

    losses.append(cur_loss / len(X_train))
        
    
    net.eval()
    ### Evaluate validation
    val_preds, val_targs = [], []
    with torch.no_grad():
        for batch_idx, (data, target) in enumerate(test_ldr): ###
            x_batch_val = data.float().clone().detach()#.unsqueeze(1)
            y_batch_val = target.float().clone().detach().unsqueeze(1)
            output = net(x_batch_val)
            val_batch_loss = criterion(output, y_batch_val)
            preds = np.round(output.detach())
            val_preds += list(preds.data.numpy()) 
            val_targs += list(np.array(y_batch_val))
            val_loss += val_batch_loss  
            
        val_losses.append(val_loss / len(X_test))
        
        train_acc_cur = accuracy_score(train_targs, train_preds)  
        valid_acc_cur = accuracy_score(val_targs, val_preds) 

        train_acc.append(train_acc_cur)
        valid_acc.append(valid_acc_cur)

epoch = np.arange(1,len(train_acc)+1)
plt.figure()
plt.plot(epoch, train_acc, 'r', epoch, valid_acc, 'b')
plt.legend(['Train Accucary','Validation Accuracy'])
plt.xlabel('Epoch'), plt.ylabel('Acc')
#plt.savefig('/home/projects/ht3_aim/people/alsj/cd4cd8/data/figures/alpha/CNN_Accuracy.png')


epoch = np.arange(1,len(train_acc)+1)
plt.figure()
plt.plot(epoch, losses, 'r', epoch, val_losses, 'b')
plt.legend(['Train Loss','Validation Loss'])
plt.xlabel('Epoch'), plt.ylabel('Loss')
#plt.savefig('/home/projects/ht3_aim/people/alsj/cd4cd8/data/figures/alpha/CNN_Loss.png')


In [None]:
# ROC
fpr, tpr, threshold = metrics.roc_curve(train_targs, train_preds)
roc_auc = metrics.auc(fpr, tpr)

# plot ROC
import matplotlib.pyplot as plt
plt.title('Receiver Operating Characteristic')
plt.plot(fpr, tpr, 'b', label = 'AUC = %0.2f' % roc_auc)
plt.legend(loc = 'lower right')
plt.plot([0, 1], [0, 1],'r--')
plt.xlim([0, 1])
plt.ylim([0, 1])
plt.ylabel('True Positive Rate')
plt.xlabel('False Positive Rate')
plt.show()

In [None]:
# ROC
fpr, tpr, threshold = metrics.roc_curve(val_targs, val_preds)
roc_auc = metrics.auc(fpr, tpr)

# plot ROC
import matplotlib.pyplot as plt
plt.title('Receiver Operating Characteristic')
plt.plot(fpr, tpr, 'b', label = 'AUC = %0.2f' % roc_auc)
plt.legend(loc = 'lower right')
plt.plot([0, 1], [0, 1],'r--')
plt.xlim([0, 1])
plt.ylim([0, 1])
plt.ylabel('True Positive Rate')
plt.xlabel('False Positive Rate')
plt.show()