In [1]:
import torch
from torch import nn
import torch.nn.functional as F
from sklearn import metrics, preprocessing
import pandas as pd
import numpy as np
import utils_rf

In [2]:
# Define model
class DNN(nn.Module):
    def __init__(self, params):
        super().__init__()
        self.linear1 = nn.Linear(params.dim1, params.dim2)
        self.bn1 = nn.BatchNorm1d(params.dim2)
        self.linear2 = nn.Linear(params.dim2, params.dim3)
        self.bn2 = nn.BatchNorm1d(params.dim3)
        self.linear3 = nn.Linear(params.dim3, params.dim5)
        self.bn3 = nn.BatchNorm1d(params.dim5)
        self.linear4 = nn.Linear(params.dim5, params.dim6)
        self.bn4 = nn.BatchNorm1d(params.dim6)
        self.linear5 = nn.Linear(params.dim6, 1)
    def forward(self, x):
        x = self.linear1(x)
        x = self.bn1(x)
        x = F.relu(x)
        x = self.linear2(x)
        x = self.bn2(x)
        x = F.relu(x)
        x = self.linear3(x)
        x = self.bn3(x)
        x = F.relu(x)
        x = self.linear4(x)
        x = self.bn4(x)
        x = F.relu(x)
        x = self.linear5(x)
        x = torch.sigmoid(x)
        return x

In [3]:
record = pd.DataFrame({'seeds':[], 'aupr':[], 'auc':[], 'precision':[], 'recall': [], 'thresh':[]})
seeds = list(range(9))
for i, seeding in enumerate(seeds):
    
    test = preprocessing.normalize(np.load("../data/datasets/MACCSKeys/macc_traindata_in" + str(i) + "_fold.npy", allow_pickle=True))
    test = torch.tensor(test)
    test_lab = np.load("../data/datasets/MACCSKeys/macc_trainlab_in" + str(i) + "_fold.npy", allow_pickle=True)
    
    model = torch.load("../models/DNN/MACCSKeys/pytorch_es" + str(i) + "_macc.pt")
    model.eval()
    y_pred = model(test)
    y_pred = y_pred.detach().numpy()
    print(y_pred.shape)
    y_pred = y_pred.reshape((y_pred.shape[0],))
    print("Shape of prediction:", y_pred.shape)
    print(y_pred[0:4])
    
    precision, recall, thresholds = metrics.precision_recall_curve(test_lab, y_pred)
    fpr, tpr, thresholds_r = metrics.roc_curve(test_lab, y_pred)
    pr = np.array([precision, recall]).T
    fp = np.array([fpr, tpr]).T
    utils_rf.mat2csv(pr, ["precision", "recall"], "../temp/MACCSKeys/pr_dnnwc"+str(seeding)+"macc_pytorch.csv")
    utils_rf.mat2csv(fp, ["fpr", "tpr"], "../temp/MACCSKeys/fp_dnnwc"+str(seeding)+"macc_pytorch.csv")
    f1_score, threshold_b, precision_b, recall_b = utils_rf.f1_score_best(precision, recall, thresholds) 
    
    auc_score = metrics.auc(fpr, tpr)
    aupr = metrics.auc(recall, precision)
    #preserved precision and recall are recording under best f1 score
    record = pd.concat([record, pd.DataFrame([[seeding, aupr, auc_score, precision_b, recall_b, threshold_b]], 
                                             columns= ['seeds', 'aupr', 'auc', 'precision', 'recall', 'thresh'])])


(29748, 1)
Shape of prediction: (29748,)
[0.06997786 0.1273372  0.14856794 0.1530163 ]
(29748, 1)
Shape of prediction: (29748,)
[0.07928959 0.20345809 0.09689323 0.11196559]
(29748, 1)
Shape of prediction: (29748,)
[0.1284048  0.15764041 0.14182179 0.13634273]
(29748, 1)
Shape of prediction: (29748,)
[0.07382014 0.19859454 0.13669741 0.14722575]
(29748, 1)
Shape of prediction: (29748,)
[0.0655923  0.18673354 0.13230634 0.08517756]
(29749, 1)
Shape of prediction: (29749,)
[0.1274955  0.16801709 0.1865375  0.19061415]
(29749, 1)
Shape of prediction: (29749,)
[0.06541879 0.18968727 0.11279407 0.15241547]
(29749, 1)
Shape of prediction: (29749,)
[0.06571624 0.2017352  0.11717538 0.1728142 ]
(29749, 1)
Shape of prediction: (29749,)
[0.08628744 0.17240828 0.11763445 0.11698389]


In [4]:
record

Unnamed: 0,seeds,aupr,auc,precision,recall,thresh
0,0.0,0.333851,0.845113,0.308939,0.50982,0.171306
0,1.0,0.337341,0.845343,0.332944,0.476389,0.198801
0,2.0,0.37557,0.867911,0.316092,0.597327,0.175169
0,3.0,0.347113,0.851119,0.351369,0.461153,0.224359
0,4.0,0.330376,0.845441,0.313811,0.512531,0.189047
0,5.0,0.367423,0.867406,0.355491,0.513784,0.202157
0,6.0,0.348859,0.852596,0.322042,0.532164,0.192895
0,7.0,0.328823,0.839919,0.308302,0.519632,0.185236
0,8.0,0.338797,0.849804,0.320062,0.520468,0.187452


In [5]:
record = record.reset_index(drop=True)
record.to_csv("../temp/MACCSKeys/repeats10dnnwc_macc_pytorch.csv", header = True, index =  True)

In [6]:
print("mean of aupr is {} with standard deviation {}".format(np.mean(record['aupr']), np.std(record['aupr'])))
print("mean of auc is {} with standard deviation {}".format(np.mean(record['auc']), np.std(record['auc'])))
print("mean of precision is {} with standard deviation {}".format(np.mean(record['precision']), np.std(record['precision'])))
print("mean of recall is {} with standard deviation {}".format(np.mean(record['recall']), np.std(record['recall'])))

mean of aupr is 0.3453504159738315 with standard deviation 0.015465250895982278
mean of auc is 0.8516280831401791 with standard deviation 0.009285138843581644
mean of precision is 0.325450216447533 with standard deviation 0.016535344807782033
mean of recall is 0.5159187881638596 with standard deviation 0.03580924237507529
