In [None]:
import pandas as pd
import numpy as np
from numpy import vstack
import h5py 
from sklearn.metrics import roc_curve, roc_auc_score
from sklearn import metrics 
from sklearn.model_selection import train_test_split, LeaveOneOut, StratifiedKFold
from matplotlib import pyplot as plt
from torch.nn import Linear, Sigmoid, ReLU, BCELoss, Dropout, Module
from torch import Tensor, nn
from torch.optim import SGD
import torch
import random
from tqdm import tnrange
from tqdm import tqdm_notebook as tqdm

# load data

`list_inputs` is a 3D matrix of shape (6700 pathways, 12639 genes, 234 patients)

In [None]:
f = h5py.File("pathway_data.h5", "r")
gr_truth = list(f['gr_truth']) # whether patients have COVID-19 or not

tensor_inputs = torch.zeros((234, 6700, 12639))
for n in tnrange(len(gr_truth)): 
    mat = torch(f['sample_' + str(n)])
    tensor_inputs[n] = mat

In [None]:
# Train-test split 
X_train, X_test, y_train, y_test = train_test_split(list_inputs, gr_truth, test_size=0.33, random_state=42) 

# neural network

In [None]:
class DropoutNet(Module):
    def __init__(self, shape, p):
        super(DropoutNet, self).__init__()
        self.fc1 = Linear(shape[1], 128)
        self.fc2 = Linear(128, 1)
        self.fc3 = Linear(shape[0], 32)
        self.fc4 = Linear(32, 1)
        self.dropout = Dropout(p=p)
        self.sigmoid = Sigmoid()
        self.ReLU = ReLU()

    # x represents our data
    def forward(self, x):
        y = torch.from_numpy(x)
        y = self.dropout(self.ReLU(self.fc1(y)))
        y = self.ReLU(self.fc2(y))
        y = self.dropout(self.ReLU(self.fc3(y.T)))
        y = self.fc4(y)
        y = self.sigmoid(y)
        return y

In [None]:
# train model 
def train_model(X_train, y_train, model, lr=1e-4, momentum=0.6): 
    # define optimization 
    criterion = BCELoss()
    optimizer = SGD(model.parameters(), lr=lr, momentum=momentum) 
    # enumerate epochs 
    pbar = tqdm(total=100*len(X_train))
    for epoch in range(100):
        # enumerate mini batches 
        for i in range(len(X_train)): 
            inputs = X_train[i]
            targets = y_train[i]
            # clear the gradients 
            optimizer.zero_grad() 
            # compute the model output 
            yhat = model(inputs)[0] 
            # calculate loss
            loss = criterion(yhat, targets)
            # credit assignment 
            loss.backward()
            # update model weights 
            optimizer.step()
            pbar.update(1) 
    pbar.close()
        
def evaluate_model(X_test, y_test, model, cv=False): 
    predictions, actuals = [], [] 
    for i in range(len(X_test)): 
        inputs = X_test[i]
        targets = y_test[i]
        # evaluate model on test set 
        yhat = model(inputs)[0]
        # retrieve numpy array
        yhat = yhat.detach().numpy() 
        # store 
        predictions.append(yhat)
        actuals.append(targets) 
    predictions, actuals = vstack(predictions), vstack(actuals)
    
    # calculate AUC 
    ns_probs = [0 for _ in range(len(X_test))]
    ns_auc = roc_auc_score(actuals, ns_probs)
    lr_auc = roc_auc_score(actuals, predictions) 
    
    # calculate roc curves
    ns_fpr, ns_tpr, _ = roc_curve(y_test, ns_probs)
    lr_fpr, lr_tpr, _ = roc_curve(y_test, predictions)
    
    if cv: return lr_auc, lr_fpr, lr_tpr 
    
    # summarize scores
    print('No skill: ROC AUC=%.3f' % (ns_auc))
    print('Neural network: ROC AUC=%.3f' % (lr_auc))
    
    # plot the roc curve for the model
    plt.plot(ns_fpr, ns_tpr, linestyle='--', label='No Skill')
    plt.plot(lr_fpr, lr_tpr, marker='.', label='Neural network')
    # axis labels
    plt.xlabel('False Positive Rate')
    plt.ylabel('True Positive Rate')
    # show the legend
    plt.legend()
    # show the plot
    plt.show()

# Train/test model

In [None]:
nn = DropoutNet(X_train.shape, 0.2)
train_model(X_train, torch.from_numpy(np.array(y_train)).float(), nn) 