In [1]:
import torch
from torch import nn
import torch.nn.functional as F
from sklearn import metrics, preprocessing
import pandas as pd
import numpy as np
import utils_rf

In [2]:
# Define model
class DNN(nn.Module):
    def __init__(self, params):
        super().__init__()
        self.linear1 = nn.Linear(params.dim1, params.dim2)
        self.bn1 = nn.BatchNorm1d(params.dim2)
        self.linear2 = nn.Linear(params.dim2, params.dim3)
        self.bn2 = nn.BatchNorm1d(params.dim3)
        self.linear3 = nn.Linear(params.dim3, params.dim5)
        self.bn3 = nn.BatchNorm1d(params.dim5)
        self.linear4 = nn.Linear(params.dim5, params.dim6)
        self.bn4 = nn.BatchNorm1d(params.dim6)
        self.linear5 = nn.Linear(params.dim6, 1)
    def forward(self, x):
        x = self.linear1(x)
        x = self.bn1(x)
        x = F.relu(x)
        x = self.linear2(x)
        x = self.bn2(x)
        x = F.relu(x)
        x = self.linear3(x)
        x = self.bn3(x)
        x = F.relu(x)
        x = self.linear4(x)
        x = self.bn4(x)
        x = F.relu(x)
        x = self.linear5(x)
        x = torch.sigmoid(x)
        return x

In [3]:
test = preprocessing.normalize(np.load("../data/datasets/MACCSKeys/test_tensor_csigma005_macc.npy", allow_pickle=True))
test = torch.tensor(test)
test_lab = np.load("../data/datasets/MACCSKeys/test_label_csima005_macc.npy", allow_pickle=True)
record = pd.DataFrame({'seeds':[], 'aupr':[], 'auc':[], 'precision':[], 'recall': [], 'thresh':[]})
seeds = list(range(9))
for i, seeding in enumerate(seeds):
    
    model = torch.load("../models/DNN/MACCSKeys/pytorch_es" + str(i) + "_macc.pt")
    model.eval()
    y_pred = model(test)
    y_pred = y_pred.detach().numpy()
    y_pred = y_pred.reshape((y_pred.shape[0],))
    print("Shape of prediction:", y_pred.shape)
    print(y_pred[0:4])
    
    precision, recall, thresholds = metrics.precision_recall_curve(test_lab, y_pred)
    fpr, tpr, thresholds_r = metrics.roc_curve(test_lab, y_pred)
    pr = np.array([precision, recall]).T
    fp = np.array([fpr, tpr]).T
    utils_rf.mat2csv(pr, ["precision", "recall"], "../temp/MACCSKeys/pr_dnnwc"+str(seeding)+"macc_pytorch_test.csv")
    utils_rf.mat2csv(fp, ["fpr", "tpr"], "../temp/MACCSKeys/fp_dnnwc"+str(seeding)+"macc_pytorch_test.csv")
    f1_score, threshold_b, precision_b, recall_b = utils_rf.f1_score_best(precision, recall, thresholds) 
    
    auc_score = metrics.auc(fpr, tpr)
    aupr = metrics.auc(recall, precision)
    #preserved precision and recall are recording under best f1 score
    record = pd.concat([record, pd.DataFrame([[seeding, aupr, auc_score, precision_b, recall_b, threshold_b]], 
                                             columns= ['seeds', 'aupr', 'auc', 'precision', 'recall', 'thresh'])])


Shape of prediction: (3720,)
[0.38875866 0.6716575  0.36915243 0.0903445 ]
Shape of prediction: (3720,)
[0.3689341  0.62515503 0.38538298 0.07314398]
Shape of prediction: (3720,)
[0.3630861  0.57690144 0.38504177 0.09673954]
Shape of prediction: (3720,)
[0.41130167 0.71128964 0.34486625 0.14596987]


  f1_score = np.divide((2 * np.multiply(precision, recall)),  (precision + recall))
  f1_score = np.divide((2 * np.multiply(precision, recall)),  (precision + recall))


Shape of prediction: (3720,)
[0.37649176 0.6601124  0.33646938 0.07419517]
Shape of prediction: (3720,)
[0.30586568 0.6654436  0.471935   0.05157419]
Shape of prediction: (3720,)
[0.4162195  0.73904115 0.3906235  0.08018388]
Shape of prediction: (3720,)
[0.39383757 0.6329161  0.26851955 0.12061046]
Shape of prediction: (3720,)
[0.35631555 0.67240375 0.4009962  0.09491933]


  f1_score = np.divide((2 * np.multiply(precision, recall)),  (precision + recall))


In [4]:
record

Unnamed: 0,seeds,aupr,auc,precision,recall,thresh
0,0.0,0.320235,0.809869,0.0,0.0,0.679846
0,1.0,0.306248,0.807054,0.0,0.0,0.63772
0,2.0,0.322512,0.839256,0.312016,0.536667,0.186752
0,3.0,0.328937,0.826422,0.331839,0.493333,0.199513
0,4.0,0.318862,0.811139,0.299603,0.503333,0.184234
0,5.0,0.317753,0.814419,0.381703,0.403333,0.232793
0,6.0,0.324702,0.823862,0.0,0.0,0.743416
0,7.0,0.314185,0.811292,0.308793,0.503333,0.187284
0,8.0,0.313534,0.813817,0.323185,0.46,0.198474


In [5]:
record = record.reset_index(drop=True)
record.to_csv("../temp/MACCSKeys/repeats10dnnwc_macc_pytorch_test.csv", header = True, index =  True)

In [6]:
print("mean of aupr is {} with standard deviation {}".format(np.mean(record['aupr']), np.std(record['aupr'])))
print("mean of auc is {} with standard deviation {}".format(np.mean(record['auc']), np.std(record['auc'])))
print("mean of precision is {} with standard deviation {}".format(np.mean(record['precision']), np.std(record['precision'])))
print("mean of recall is {} with standard deviation {}".format(np.mean(record['recall']), np.std(record['recall'])))

mean of aupr is 0.31855196516699164 with standard deviation 0.006329582615447696
mean of auc is 0.8174587935889106 with standard deviation 0.009791045998397557
mean of precision is 0.21745990903060364 with standard deviation 0.15532501806195484
mean of recall is 0.3222222222222222 with standard deviation 0.23043705120466462
