In [1]:
import torch
from torch import nn
import torch.nn.functional as F
from sklearn import metrics, preprocessing
import pandas as pd
import numpy as np
import utils_rf

In [2]:
# Define model
class DNN(nn.Module):
    def __init__(self, params):
        super().__init__()
        self.linear1 = nn.Linear(params.dim1, params.dim2)
        self.bn1 = nn.BatchNorm1d(params.dim2)
        self.linear2 = nn.Linear(params.dim2, params.dim3)
        self.bn2 = nn.BatchNorm1d(params.dim3)
        self.linear3 = nn.Linear(params.dim3, params.dim5)
        self.bn3 = nn.BatchNorm1d(params.dim5)
        self.linear4 = nn.Linear(params.dim5, params.dim6)
        self.bn4 = nn.BatchNorm1d(params.dim6)
        self.linear5 = nn.Linear(params.dim6, 1)
    def forward(self, x):
        x = self.linear1(x)
        x = self.bn1(x)
        x = F.relu(x)
        x = self.linear2(x)
        x = self.bn2(x)
        x = F.relu(x)
        x = self.linear3(x)
        x = self.bn3(x)
        x = F.relu(x)
        x = self.linear4(x)
        x = self.bn4(x)
        x = F.relu(x)
        x = self.linear5(x)
        x = torch.sigmoid(x)
        return x

In [3]:
record = pd.DataFrame({'seeds':[], 'aupr':[], 'auc':[], 'precision':[], 'recall': [], 'thresh':[]})
seeds = list(range(9))
for i, seeding in enumerate(seeds):
    
    test = preprocessing.normalize(np.load("../data/temp/traindata_in" + str(i) + "_fold.npy", allow_pickle=True))
    test = torch.tensor(test)
    test_lab = np.load("../data/temp/trainlab_in" + str(i) + "_fold.npy", allow_pickle=True)
    
    model = torch.load("../models/DNN/pytorch_es" + str(i) + ".pt")
    model.eval()
    y_pred = model(test)
    y_pred = y_pred.detach().numpy()
    print(y_pred.shape)
    y_pred = y_pred.reshape((y_pred.shape[0],))
    print("Shape of prediction:", y_pred.shape)
    print(y_pred[0:4])
    
    precision, recall, thresholds = metrics.precision_recall_curve(test_lab, y_pred)
    fpr, tpr, thresholds_r = metrics.roc_curve(test_lab, y_pred)
    pr = np.array([precision, recall]).T
    fp = np.array([fpr, tpr]).T
    utils_rf.mat2csv(pr, ["precision", "recall"], "../temp/pr_dnnwc"+str(seeding)+"train_pytorch.csv")
    utils_rf.mat2csv(fp, ["fpr", "tpr"], "../temp/fp_dnnwc"+str(seeding)+"train_pytorch.csv")
    f1_score, threshold_b, precision_b, recall_b = utils_rf.f1_score_best(precision, recall, thresholds) 
    
    auc_score = metrics.auc(fpr, tpr)
    aupr = metrics.auc(recall, precision)
    #preserved precision and recall are recording under best f1 score
    record = pd.concat([record, pd.DataFrame([[seeding, aupr, auc_score, precision_b, recall_b, threshold_b]], 
                                             columns= ['seeds', 'aupr', 'auc', 'precision', 'recall', 'thresh'])])


(29748, 1)
Shape of prediction: (29748,)
[0.2088386  0.5383126  0.36996752 0.5412351 ]
(29748, 1)
Shape of prediction: (29748,)
[0.368382   0.39628738 0.39628738 0.39628738]
(29748, 1)
Shape of prediction: (29748,)
[0.44821352 0.59125596 0.8542522  0.5427725 ]
(29748, 1)
Shape of prediction: (29748,)
[0.27385315 0.39155966 0.40906507 0.39003605]
(29748, 1)
Shape of prediction: (29748,)
[0.46731064 0.45350474 0.57022107 0.4200658 ]
(29749, 1)
Shape of prediction: (29749,)
[0.411645   0.411645   0.411645   0.36553657]
(29749, 1)
Shape of prediction: (29749,)
[0.2932678  0.7975335  0.11824229 0.6556641 ]
(29749, 1)
Shape of prediction: (29749,)
[0.19057205 0.7205927  0.861785   0.23386937]
(29749, 1)
Shape of prediction: (29749,)
[0.46338424 0.7424124  0.5252438  0.4893403 ]


In [4]:
record

Unnamed: 0,seeds,aupr,auc,precision,recall,thresh
0,0.0,0.773472,0.97507,0.639209,0.810698,0.279273
0,1.0,0.736195,0.973257,0.599589,0.854158,0.271033
0,2.0,0.771548,0.975691,0.654278,0.779449,0.325529
0,3.0,0.672594,0.969786,0.596195,0.824561,0.221726
0,4.0,0.775216,0.976888,0.64335,0.818296,0.289988
0,5.0,0.733803,0.973426,0.603443,0.863826,0.26068
0,6.0,0.793615,0.976659,0.660819,0.802423,0.336017
0,7.0,0.764009,0.972579,0.614567,0.796575,0.303105
0,8.0,0.742346,0.975899,0.635313,0.82665,0.296473


In [5]:
record = record.reset_index(drop=True)
record.to_csv("../temp/repeats10dnnwc_train_pytorch.csv", header = True, index =  True)

In [6]:
print("mean of aupr is {} with standard deviation {}".format(np.mean(record['aupr']), np.std(record['aupr'])))
print("mean of auc is {} with standard deviation {}".format(np.mean(record['auc']), np.std(record['auc'])))
print("mean of precision is {} with standard deviation {}".format(np.mean(record['precision']), np.std(record['precision'])))
print("mean of recall is {} with standard deviation {}".format(np.mean(record['recall']), np.std(record['recall'])))

mean of aupr is 0.7514219799724441 with standard deviation 0.033722020443229035
mean of auc is 0.9743617272383638 with standard deviation 0.0021730673208629615
mean of precision is 0.6274180014650104 with standard deviation 0.023049528095691052
mean of recall is 0.8196261420138078 with standard deviation 0.02526047816381605
