# Evaluation Results of Adverse Drug Reactions (ADRs) in OFFSIDES

Import necessary modules: 

In [19]:
import numpy as np
import pandas as pd
from ADRprofilePrediction import Pairs2Mat, evaluation
from Models import loadHyperpar
import json

In [20]:
import sklearn
print(sklearn.__version__)

0.24.2


## Load data

Load the feature data in to a dictionary. Drug-target, drug-enzyme, drug-chemical structure fingerprint, drug-gene interaction, drug-transporter, drug-pathway and drug-indication are included.

In [21]:
features_dict = {
    "target":Pairs2Mat(path="data/drug_target.tsv",colname1="0",colname2="1"),
    "enzyme":Pairs2Mat(path="data/drug_enzyme.tsv",colname1="0",colname2="1"),
    "Chem":pd.read_csv("data/drug_chemsfp.tsv",sep = "\t",header=0,index_col=0),
    "DGI":Pairs2Mat(path="data/interactions.tsv",colname1="drug_claim_name",colname2="gene_name"),
    "transporter":Pairs2Mat(path="data/drug_transporter.tsv",colname1="0",colname2="1"),
    "pathway":Pairs2Mat(path="data/drug_pathway.tsv",colname1="0",colname2="1"),
    "indication":Pairs2Mat(path="data/drug_indication.tsv",colname1="1_x",colname2="6")
}


Load ADR data from SIDER and OFFSIDES. Variable SEs is a dict that stores ADR data. Variable filter controls the frequency of the ADR. When filter is "all", only the ADRs with extremely low frequencies are removed; when filter is "rare" only frequency less than 50 were used.


In [22]:
filter = "all"
SEs = {}
if filter == "all":
    SIDER = Pairs2Mat(path="data/drug_se.tsv",colname1="1_x",colname2="5")
    column_sums = np.sum(SIDER, axis=0)
    SEs["SIDER"] = SIDER.loc[:, (column_sums >= 5)]

    OFFSIDERS = Pairs2Mat(path="data/OFFSIDES.csv",colname1="drug_concept_name",colname2="condition_concept_name",sep = ",")
    column_sums = np.sum(OFFSIDERS, axis=0)
    SEs["OFFSIDES"] = OFFSIDERS.loc[:, column_sums >= 5]
elif filter == "rare":
    SIDER = Pairs2Mat(path="data/drug_se.tsv",colname1="1_x",colname2="5")
    column_sums = np.sum(SIDER, axis=0)
    SEs["SIDER"] = SIDER.loc[:, (column_sums < 50)]

    OFFSIDERS = Pairs2Mat(path="data/OFFSIDES.csv",colname1="drug_concept_name",colname2="condition_concept_name",sep = ",")
    column_sums = np.sum(OFFSIDERS, axis=0)
    SEs["OFFSIDES"] = OFFSIDERS.loc[:, column_sums < 50]


## Set variables

The variables below includes all the options for the code.

- features_names: This varible is the list of all the features including the target feature, the enzyme feature, chemical structure fingerprint (Chem), drug-gene interaction (DGI), the transporter featrue, the pathway feature, the indication feature.
- SE_names: ADR data from SIDER.
- methods: This option is machine learning methods used for prediction, including Smoothed Kernel Regression (SKR), Kernel Regression (KR), the na\"ive method (Naive), Linear Neighbourhood Similarity Method using Regularized Linear Neighbour Similarity or Jaccard similarity (LNSM_RLN, LNSM_jaccard), Support Vector Machine (SVM), Random Forest (RF) and Boosted Random Forest (BRF).
- metrice_names: Metric we used to evaluate the performance of methods: AUPR, AUROC, AUPR per drug, AUROC per drug, AUPR+AUROC and AUPR+AUROC per drug.
- SE_name: The used ADR data in this file.
- metric: We used AUPR as the tuning metrice in Nested CV and CV.

In [25]:
features_names = ["target", "enzyme", "Chem", "DGI", "transporter", "pathway", "indication"]
# SEs_names = ["SIDER", "OFFSIDES"]
# methods = ["SKR", "KR", "KRR", "Naive", "LNSM_RLN", "LNSM_jaccard", "VKR"]
methods = ["SKR", "KRR", "VKR", "Naive", "LNSM_RLN", "LNSM_jaccard"]
# methods = ["SKR", "KR", "KRR", "Naive", "LNSM_RLN", "LNSM_jaccard", "VKR", "SVM", "OCCA", "SCCA", "RF", "BRF"]
tuning_metrices=["AUROC", "AUPR", "AUROCperdrug", "AUPRperdrug"]
metrice_names = ["AUPR+AUROC", "AUPR+AUROCperdrug", "AUROC", "AUPR", "AUROCperdrug", "AUPRperdrug"]
SEs_name = "OFFSIDES"
metrice = "AUPR"

Set the variables for hyperparameters. We summarized 3 types of hyperparameters (SVM, RF and BRF are not competitive and time-consuming, and were tuned and trained in a seperated file -- SVM_RF.ipynb): 
 - A: This hyperparameters are tuned according to the step $\dots, 10^{-1}, 10^{0}, 10^{1}, \dots$ ($\sigma_X$ and $\sigma_Y$ does not change during tuning so they can be set as $10$ and $100$ respectively).
 - B: This hyperparameters are in $[0,1]$ and tuned according to the step $0, 0.1, \dots, 1$.
 - C: This hyperparameters are tuned based on $5, 10, 15, \dots$.

In [26]:
A = 10**np.arange(-2, 3, 1, dtype=float)
B = np.arange(0.1, 1, 0.1, dtype=float)
C = np.arange(5, 20, 5, dtype=int)
A10 = 10**np.arange(1, 2, 1, dtype=float)
A100 = 10**np.arange(2, 3, 1, dtype=float)
all_hyperparlist = {
    "SKR":[A,B,A10,A100], 
    # "KR":[A,A], 
    "KRR":[A,A],
    "VKR":[A,A,C], 
    "Naive":[], 
    "LNSM_RLN":[B,A], 
    "LNSM_jaccard":[B], 
    # "SVM":[A,A,A], 
    # "OCCA":[], 
    # "SCCA":[A], 
    # "RF":[C], 
    # "BRF":[C]
}

Set dictionaries to store the tuned hyperparameters and the results of CV and Nested CV.

In [28]:
hyperparsOut = {}
hyperparsOut["nested_cv"] = {}
hyperparsOut["cv"] = {}
results = {}
results["nested_cv"] = {}
results["cv"] = {}

## Nested CV and CV

Load tuned hyperparameters. If fully rerunning the tuning step of Nested CV and CV is required, please skip loading variable hyperpars and remove the option `hyperparfixed` of the function `evaluation()`.

In [35]:

# Open and read the JSON file
with open(f'results/hyperpars_{SEs_name}.xml', 'r') as xml_file:
    hyperpars = json.load(xml_file)

In [36]:
for method in methods:
    # # method = "SKR"
    validation = "nested_cv"
    hyperparsOut[validation][method] = {}
    results[validation][method] = {}
    for str in features_names:
        print(f"using feature {str}")
        hyperparList = loadHyperpar(*all_hyperparlist[method],method_option=method)
        results[validation][method][str], hyperparsOut[validation][method][str] = evaluation(Y=SEs[SEs_name], X=features_dict[str], method_option=method,tuning_metrice=metrice,hyperparList=hyperparList,hyperparfixed=hyperpars[validation][method][str],Validation=validation,n_jobs=1)

    # method = "SKR"
    validation = "cv"
    hyperparsOut[validation][method] = {}
    results[validation][method] = {}
    for str in features_names:
        print(f"using feature {str}")
        hyperparList = loadHyperpar(*all_hyperparlist[method],method_option=method)
        results[validation][method][str], hyperparsOut[validation][method][str] = evaluation(Y=SEs[SEs_name], X=features_dict[str], method_option=method,tuning_metrice=metrice,hyperparList=hyperparList,hyperparfixed=hyperpars[validation][method][str],Validation=validation,n_jobs=1)

using feature target
The SKR requires hyperparameter lambda, c, sigma_X, sigma_Y
---------- nested cv start ----------
Fold: 0
number of hyperpars combination:  45
first few training idx:  [  55   75  219  255  516  563  569  804 1031 1046]
first few testing idx:  [ 260  324  368  553  601  692  712  998 1095 1115]
--- tuning end ---
target size: 226
------ best hyper pars:  [0.01, 0.6, 10, 100] ------
SKR starts:
SKR ends:
-----------
AUPRperdrug: 0.6051062970060217
AUROCperdrug: 0.946256519690964
AUPR+AUROCperdrug: 1.5513628166969857
AUPR: 0.4702759030019825
AUROC: 0.8419127337490575
AUPR+AUROC: 1.31218863675104
-----------
Fold: 1
number of hyperpars combination:  45
first few training idx:  [ 260  324  368  553  601  692  712  998 1095 1115]
first few testing idx:  [  55   75  219  255  516  563  569  804 1031 1046]
--- tuning end ---
target size: 226
------ best hyper pars:  [0.01, 0.6, 10, 100] ------
SKR starts:
SKR ends:
-----------
AUPRperdrug: 0.6317439735330859
AUROCperdrug:



VKR ends:
-----------
AUPRperdrug: 0.6078036806287928
AUROCperdrug: 0.9472844392650953
AUPR+AUROCperdrug: 1.5550881198938882
AUPR: 0.45891078928961593
AUROC: 0.8412037793576326
AUPR+AUROC: 1.3001145686472486
-----------
Fold: 1
number of hyperpars combination:  75
first few training idx:  [ 260  324  368  553  601  692  712  998 1095 1115]
first few testing idx:  [  55   75  219  255  516  563  569  804 1031 1046]
--- tuning end ---
target size: 226
------ best hyper pars:  [10, 0.01, 15] ------
VKR starts:




VKR ends:
-----------
AUPRperdrug: 0.6364473411577233
AUROCperdrug: 0.9480961734060978
AUPR+AUROCperdrug: 1.584543514563821
AUPR: 0.5138423241832403
AUROC: 0.8634511633169982
AUPR+AUROC: 1.3772934875002385
-----------
Fold: 2
number of hyperpars combination:  75
first few training idx:  [ 260  324  368  553  601  692  712  998 1095 1115]
first few testing idx:  [ 17  27  70 395 511 590 594 742 930 949]
--- tuning end ---
target size: 226
------ best hyper pars:  [10, 0.1, 15] ------
VKR starts:




VKR ends:
-----------
AUPRperdrug: 0.6127924987733145
AUROCperdrug: 0.9507267416718039
AUPR+AUROCperdrug: 1.5635192404451184
AUPR: 0.47007135573554787
AUROC: 0.8462222921386358
AUPR+AUROC: 1.3162936478741836
-----------
Fold: 3
number of hyperpars combination:  75
first few training idx:  [ 260  324  368  553  601  692  712  998 1095 1115]
first few testing idx:  [  79  211  252  524  566  661  807  844  935 1110]
--- tuning end ---
target size: 226
------ best hyper pars:  [10, 0.1, 15] ------
VKR starts:




VKR ends:
-----------
AUPRperdrug: 0.6181265513169604
AUROCperdrug: 0.9524451418333105
AUPR+AUROCperdrug: 1.570571693150271
AUPR: 0.48520445672782
AUROC: 0.8724478657363697
AUPR+AUROC: 1.3576523224641897
-----------
Fold: 4
number of hyperpars combination:  75
first few training idx:  [ 260  324  368  553  601  692  712  998 1095 1115]
first few testing idx:  [  48  497  732  739  767  911  986 1002 1025 1056]
--- tuning end ---
target size: 226
------ best hyper pars:  [10, 0.01, 15] ------
VKR starts:
VKR ends:
-----------
AUPRperdrug: 0.6389996655262151
AUROCperdrug: 0.9444826281233873
AUPR+AUROCperdrug: 1.5834822936496025
AUPR: 0.4643241553541004
AUROC: 0.8199242827644557
AUPR+AUROC: 1.2842484381185562
-----------
Mean AUPRperdrug: 0.6228339474806012, std: 0.012613913902969976
Mean AUROCperdrug: 0.948607024859939, std: 0.002765689482492318
Mean AUPR+AUROCperdrug: 1.57144097234054, std: 0.011380644070143685
Mean AUPR: 0.47847061625806486, std: 0.01974719774289101
Mean AUROC: 0.84864



VKR ends:
-----------
AUPRperdrug: 0.6229575177540797
AUROCperdrug: 0.945652057778122
AUPR+AUROCperdrug: 1.5686095755322018
AUPR: 0.5526119169505237
AUROC: 0.8806266891277015
AUPR+AUROC: 1.4332386060782252
-----------
Fold: 1
number of hyperpars combination:  75
first few training idx:  [ 71 182 214 271 349 360 537 542 717 722]
first few testing idx:  [109 156 196 307 336 501 634 683 685 739]
--- tuning end ---
target size: 158
------ best hyper pars:  [10, 0.01, 5] ------
VKR starts:
VKR ends:
-----------
AUPRperdrug: 0.6570915181439249
AUROCperdrug: 0.9456764747280073
AUPR+AUROCperdrug: 1.6027679928719323
AUPR: 0.5248073678249707
AUROC: 0.8541683411517864
AUPR+AUROC: 1.3789757089767571
-----------
Fold: 2
number of hyperpars combination:  75
first few training idx:  [ 71 182 214 271 349 360 537 542 717 722]
first few testing idx:  [  0 114 163 306 314 325 332 484 530 572]
--- tuning end ---
target size: 158
------ best hyper pars:  [10, 0.01, 10] ------
VKR starts:




VKR ends:
-----------
AUPRperdrug: 0.6535350702336494
AUROCperdrug: 0.9411604375775537
AUPR+AUROCperdrug: 1.594695507811203
AUPR: 0.5194719975657285
AUROC: 0.8342863794431553
AUPR+AUROC: 1.3537583770088837
-----------
Fold: 3
number of hyperpars combination:  75
first few training idx:  [ 71 182 214 271 349 360 537 542 717 722]
first few testing idx:  [  8  59  66 260 305 310 470 666 715 773]
--- tuning end ---
target size: 158
------ best hyper pars:  [10, 1, 10] ------
VKR starts:




VKR ends:
-----------
AUPRperdrug: 0.667011154405709
AUROCperdrug: 0.941729118898197
AUPR+AUROCperdrug: 1.608740273303906
AUPR: 0.5432748118537275
AUROC: 0.8423422662755876
AUPR+AUROC: 1.3856170781293151
-----------
Fold: 4
number of hyperpars combination:  75
first few training idx:  [ 71 182 214 271 349 360 537 542 717 722]
first few testing idx:  [  3 169 199 294 383 387 528 574 755 760]
--- tuning end ---
target size: 158
------ best hyper pars:  [10, 1, 10] ------
VKR starts:




VKR ends:
-----------
AUPRperdrug: 0.6326621357708746
AUROCperdrug: 0.947227282287512
AUPR+AUROCperdrug: 1.5798894180583867
AUPR: 0.5193951669825563
AUROC: 0.8693246051752868
AUPR+AUROC: 1.388719772157843
-----------
Mean AUPRperdrug: 0.6466514792616476, std: 0.016297239960986892
Mean AUROCperdrug: 0.9442890742538784, std: 0.0023982247387351815
Mean AUPR+AUROCperdrug: 1.5909405535155259, std: 0.014775778833081458
Mean AUPR: 0.5319122522355013, std: 0.01356097800571468
Mean AUROC: 0.8561496562347035, std: 0.01700162885041677
Mean AUPR+AUROC: 1.3880619084702048, std: 0.02571627997493548
-----------
using feature Chem
The VKR requires hyperparameter lambda, sigma_X, k
---------- nested cv start ----------
Fold: 0
number of hyperpars combination:  75
first few training idx:  [ 48  53  91 225 284 301 394 465 516 658]
first few testing idx:  [ 22  83 121 162 181 373 456 459 594 637]
--- tuning end ---
target size: 138
------ best hyper pars:  [10, 10, 15] ------
VKR starts:




VKR ends:
-----------
AUPRperdrug: 0.7180553814979576
AUROCperdrug: 0.9429664624195588
AUPR+AUROCperdrug: 1.6610218439175164
AUPR: 0.5953364346810043
AUROC: 0.8722205584224462
AUPR+AUROC: 1.4675569931034504
-----------
Fold: 1
number of hyperpars combination:  75
first few training idx:  [ 22  83 121 162 181 373 456 459 594 637]
first few testing idx:  [ 48  53  91 225 284 301 394 465 516 658]
--- tuning end ---
target size: 138
------ best hyper pars:  [10, 1, 15] ------
VKR starts:




VKR ends:
-----------
AUPRperdrug: 0.6990834224274423
AUROCperdrug: 0.9416459649490374
AUPR+AUROCperdrug: 1.6407293873764797
AUPR: 0.5741642644008382
AUROC: 0.8579319919553998
AUPR+AUROC: 1.432096256356238
-----------
Fold: 2
number of hyperpars combination:  75
first few training idx:  [ 22  83 121 162 181 373 456 459 594 637]
first few testing idx:  [ 33 105 210 382 447 451 508 613 677 680]
--- tuning end ---
target size: 138
------ best hyper pars:  [10, 1, 15] ------
VKR starts:




VKR ends:
-----------
AUPRperdrug: 0.7018825130046735
AUROCperdrug: 0.9408509107053369
AUPR+AUROCperdrug: 1.6427334237100104
AUPR: 0.553181779363835
AUROC: 0.835560468272161
AUPR+AUROC: 1.3887422476359959
-----------
Fold: 3
number of hyperpars combination:  75
first few training idx:  [ 22  83 121 162 181 373 456 459 594 637]
first few testing idx:  [156 187 293 302 370 391 464 584 587 632]
--- tuning end ---
target size: 138
------ best hyper pars:  [10, 10, 15] ------
VKR starts:




VKR ends:
-----------
AUPRperdrug: 0.6886446248105299
AUROCperdrug: 0.9425604148356597
AUPR+AUROCperdrug: 1.6312050396461895
AUPR: 0.5407683180919922
AUROC: 0.8417803095946501
AUPR+AUROC: 1.3825486276866423
-----------
Fold: 4
number of hyperpars combination:  75
first few training idx:  [ 22  83 121 162 181 373 456 459 594 637]
first few testing idx:  [169 199 240 294 346 387 463 528 573 666]
--- tuning end ---
target size: 138
------ best hyper pars:  [10, 10, 15] ------
VKR starts:




VKR ends:
-----------
AUPRperdrug: 0.7120709294835539
AUROCperdrug: 0.9401184689190464
AUPR+AUROCperdrug: 1.6521893984026002
AUPR: 0.5774628476806329
AUROC: 0.8613475433906189
AUPR+AUROC: 1.438810391071252
-----------
Mean AUPRperdrug: 0.7039473742448314, std: 0.010267656381500503
Mean AUROCperdrug: 0.9416284443657279, std: 0.0010529722392032925
Mean AUPR+AUROCperdrug: 1.6455758186105591, std: 0.010202014781100386
Mean AUPR: 0.5681827288436605, std: 0.01916142748392496
Mean AUROC: 0.8537681743270552, std: 0.013345442308679992
Mean AUPR+AUROC: 1.4219509031707156, std: 0.03200762433616871
-----------
using feature DGI
The VKR requires hyperparameter lambda, sigma_X, k
---------- nested cv start ----------
Fold: 0
number of hyperpars combination:  75
first few training idx:  [   9   83   98  138  335  508  517  678  787 1194]
first few testing idx:  [  78  327  487  663  771  801  809  960  970 1098]
--- tuning end ---
target size: 250
------ best hyper pars:  [10, 0.01, 15] ------
VKR st



VKR ends:
-----------
AUPRperdrug: 0.595709732457896
AUROCperdrug: 0.9475577364086083
AUPR+AUROCperdrug: 1.5432674688665045
AUPR: 0.5830969895329329
AUROC: 0.8878702661126624
AUPR+AUROC: 1.4709672556455953
-----------
Fold: 1
number of hyperpars combination:  75
first few training idx:  [  78  327  487  663  771  801  809  960  970 1098]
first few testing idx:  [   9   83   98  138  335  508  517  678  787 1194]
--- tuning end ---
target size: 250
------ best hyper pars:  [10, 0.1, 15] ------
VKR starts:
VKR ends:
-----------
AUPRperdrug: 0.6370769691473834
AUROCperdrug: 0.9473385951504784
AUPR+AUROCperdrug: 1.5844155642978617
AUPR: 0.6038329661755885
AUROC: 0.885650691169838
AUPR+AUROC: 1.4894836573454264
-----------
Fold: 2
number of hyperpars combination:  75
first few training idx:  [  78  327  487  663  771  801  809  960  970 1098]
first few testing idx:  [  46  471  614  647  712  835  981  989 1023 1119]
--- tuning end ---
target size: 250
------ best hyper pars:  [10, 0.1, 15]



VKR ends:
-----------
AUPRperdrug: 0.6352388525906808
AUROCperdrug: 0.947736994381181
AUPR+AUROCperdrug: 1.5829758469718618
AUPR: 0.5995431286114637
AUROC: 0.8825360340788724
AUPR+AUROC: 1.482079162690336
-----------
Fold: 3
number of hyperpars combination:  75
first few training idx:  [  78  327  487  663  771  801  809  960  970 1098]
first few testing idx:  [ 26 139 174 233 269 280 545 599 882 939]
--- tuning end ---
target size: 250
------ best hyper pars:  [10, 0.1, 15] ------
VKR starts:




VKR ends:
-----------
AUPRperdrug: 0.5806039491029196
AUROCperdrug: 0.9522470776777171
AUPR+AUROCperdrug: 1.5328510267806368
AUPR: 0.5292183815830531
AUROC: 0.8829578221793941
AUPR+AUROC: 1.4121762037624472
-----------
Fold: 4
number of hyperpars combination:  75
first few training idx:  [  78  327  487  663  771  801  809  960  970 1098]
first few testing idx:  [292 299 312 375 587 604 642 703 848 872]
--- tuning end ---
target size: 250
------ best hyper pars:  [10, 0.1, 15] ------
VKR starts:
VKR ends:
-----------
AUPRperdrug: 0.5939042262499181
AUROCperdrug: 0.9547946801080168
AUPR+AUROCperdrug: 1.5486989063579348
AUPR: 0.5838227677653601
AUROC: 0.8939461549935912
AUPR+AUROC: 1.4777689227589512
-----------
Mean AUPRperdrug: 0.6085067459097597, std: 0.023179414948591125
Mean AUROCperdrug: 0.9499350167452002, std: 0.0030392802754792524
Mean AUPR+AUROCperdrug: 1.5584417626549598, std: 0.021244424531843005
Mean AUPR: 0.5799028467336796, std: 0.026656611282998982
Mean AUROC: 0.886592193



VKR ends:
-----------
AUPRperdrug: 0.6993268883385947
AUROCperdrug: 0.9393031218060001
AUPR+AUROCperdrug: 1.6386300101445948
AUPR: 0.5906416789201953
AUROC: 0.85440846769367
AUPR+AUROC: 1.4450501466138652
-----------
Fold: 1
number of hyperpars combination:  75
first few training idx:  [ 90 153 246 259 271 295 296 309 327 427]
first few testing idx:  [ 68  72 132 137 138 162 192 361 430 437]
--- tuning end ---
target size: 99
------ best hyper pars:  [10, 0.1, 15] ------
VKR starts:




VKR ends:
-----------
AUPRperdrug: 0.6941618617951186
AUROCperdrug: 0.939812593663725
AUPR+AUROCperdrug: 1.6339744554588438
AUPR: 0.5776722302005902
AUROC: 0.8405837049441099
AUPR+AUROC: 1.4182559351447002
-----------
Fold: 2
number of hyperpars combination:  75
first few training idx:  [ 90 153 246 259 271 295 296 309 327 427]
first few testing idx:  [  4  16  48  83 125 139 197 215 329 381]
--- tuning end ---
target size: 99
------ best hyper pars:  [10, 0.1, 10] ------
VKR starts:




VKR ends:
-----------
AUPRperdrug: 0.6764949204131945
AUROCperdrug: 0.9369363373924666
AUPR+AUROCperdrug: 1.613431257805661
AUPR: 0.5636375605314676
AUROC: 0.8506148275371562
AUPR+AUROC: 1.4142523880686237
-----------
Fold: 3
number of hyperpars combination:  75
first few training idx:  [ 90 153 246 259 271 295 296 309 327 427]
first few testing idx:  [ 67 102 273 314 318 391 393 423 449 466]
--- tuning end ---
target size: 99
------ best hyper pars:  [10, 0.01, 10] ------
VKR starts:




VKR ends:
-----------
AUPRperdrug: 0.6770909207316248
AUROCperdrug: 0.9417840915342919
AUPR+AUROCperdrug: 1.6188750122659168
AUPR: 0.5484182496183263
AUROC: 0.8487899783641125
AUPR+AUROC: 1.3972082279824387
-----------
Fold: 4
number of hyperpars combination:  75
first few training idx:  [ 90 153 246 259 271 295 296 309 327 427]
first few testing idx:  [ 54 100 105 198 220 277 324 346 369 448]
--- tuning end ---
target size: 99
------ best hyper pars:  [10, 0.01, 5] ------
VKR starts:
VKR ends:
-----------
AUPRperdrug: 0.7038168454359918
AUROCperdrug: 0.9378183240047041
AUPR+AUROCperdrug: 1.6416351694406959
AUPR: 0.607376019941302
AUROC: 0.8525124599509013
AUPR+AUROC: 1.4598884798922032
-----------
Mean AUPRperdrug: 0.6901782873429049, std: 0.011349799362141283
Mean AUROCperdrug: 0.9391308936802376, std: 0.0016776622246968487
Mean AUPR+AUROCperdrug: 1.6293091810231424, std: 0.011149465950402112
Mean AUPR: 0.5775491478423762, std: 0.020509574892450713
Mean AUROC: 0.84938188769799, std: 



VKR ends:
-----------
AUPRperdrug: 0.6534508715727535
AUROCperdrug: 0.9482851318619687
AUPR+AUROCperdrug: 1.601736003434722
AUPR: 0.5376303941474978
AUROC: 0.8462455882276699
AUPR+AUROC: 1.3838759823751676
-----------
Fold: 2
number of hyperpars combination:  75
first few training idx:  [ 10  36  92 145 470 503 590 640 711 730]
first few testing idx:  [ 26  84 219 261 337 365 430 466 489 639]
--- tuning end ---
target size: 150
------ best hyper pars:  [10, 0.01, 15] ------
VKR starts:




VKR ends:
-----------
AUPRperdrug: 0.6632406507125609
AUROCperdrug: 0.9494361159290702
AUPR+AUROCperdrug: 1.612676766641631
AUPR: 0.5523373656996162
AUROC: 0.8680805125374482
AUPR+AUROC: 1.4204178782370644
-----------
Fold: 3
number of hyperpars combination:  75
first few training idx:  [ 10  36  92 145 470 503 590 640 711 730]
first few testing idx:  [ 32 103 111 172 206 280 312 434 511 666]
--- tuning end ---
target size: 150
------ best hyper pars:  [10, 0.01, 15] ------
VKR starts:




VKR ends:
-----------
AUPRperdrug: 0.6656875155412778
AUROCperdrug: 0.9504145402179707
AUPR+AUROCperdrug: 1.6161020557592485
AUPR: 0.5363551939889755
AUROC: 0.8586460561373487
AUPR+AUROC: 1.3950012501263243
-----------
Fold: 4
number of hyperpars combination:  75
first few training idx:  [ 10  36  92 145 470 503 590 640 711 730]
first few testing idx:  [199 240 294 346 463 528 574 624 724 734]
--- tuning end ---
target size: 150
------ best hyper pars:  [10, 0.01, 15] ------
VKR starts:
VKR ends:
-----------
AUPRperdrug: 0.6616334023438774
AUROCperdrug: 0.9490551266115017
AUPR+AUROCperdrug: 1.6106885289553792
AUPR: 0.5744062415778971
AUROC: 0.8720208121988156
AUPR+AUROC: 1.4464270537767128
-----------
Mean AUPRperdrug: 0.6565189301198395, std: 0.009864493996279138
Mean AUROCperdrug: 0.9493789733173111, std: 0.0007046475749255039
Mean AUPR+AUROCperdrug: 1.6058979034371508, std: 0.010005225014836408
Mean AUPR: 0.5449472107840097, std: 0.017252859454681278
Mean AUROC: 0.8628757632507108, 



VKR ends:
-----------
AUPRperdrug: 0.7068452389388139
AUROCperdrug: 0.9454716385811446
AUPR+AUROCperdrug: 1.6523168775199584
AUPR: 0.6607717575778501
AUROC: 0.8887432312160461
AUPR+AUROC: 1.5495149887938964
-----------
Fold: 3
number of hyperpars combination:  75
first few training idx:  [ 10  75  89 121 141 168 196 404 505 698]
first few testing idx:  [103 172 304 379 384 395 434 539 568 733]
--- tuning end ---
target size: 149
------ best hyper pars:  [10, 0.01, 15] ------
VKR starts:
VKR ends:
-----------
AUPRperdrug: 0.6805400269752713
AUROCperdrug: 0.9431404044600717
AUPR+AUROCperdrug: 1.623680431435343
AUPR: 0.6535030535343376
AUROC: 0.8901283195087026
AUPR+AUROC: 1.5436313730430402
-----------
Fold: 4
number of hyperpars combination:  75
first few training idx:  [ 10  75  89 121 141 168 196 404 505 698]
first few testing idx:  [199 240 294 346 387 528 574 619 718 727]
--- tuning end ---
target size: 149
------ best hyper pars:  [10, 0.01, 15] ------
VKR starts:




VKR ends:
-----------
AUPRperdrug: 0.7098420309209695
AUROCperdrug: 0.9423878915113725
AUPR+AUROCperdrug: 1.652229922432342
AUPR: 0.6727703672554615
AUROC: 0.8878502953789691
AUPR+AUROC: 1.5606206626344306
-----------
Mean AUPRperdrug: 0.6985375225915414, std: 0.011465294683355273
Mean AUROCperdrug: 0.9448501797616912, std: 0.0020486015159114843
Mean AUPR+AUROCperdrug: 1.6433877023532326, std: 0.01127056945525063
Mean AUPR: 0.6656942253398488, std: 0.010285140919243294
Mean AUROC: 0.8941423673721178, std: 0.0077301443505875984
Mean AUPR+AUROC: 1.5598365927119666, std: 0.016441831704781116
-----------
using feature target
The VKR requires hyperparameter lambda, sigma_X, k
---------- cv start ----------
--- tuning end ---
target size: 374
------ best hyper pars:  [10, 100, 15] ------
VKR starts:




VKR ends:
-----------
AUPRperdrug: 0.6233467335995113
AUROCperdrug: 0.9470456727883019
AUPR+AUROCperdrug: 1.5703924063878132
AUPR: 0.4646200426902966
AUROC: 0.8388753001525334
AUPR+AUROC: 1.30349534284283
-----------
using feature enzyme
The VKR requires hyperparameter lambda, sigma_X, k
---------- cv start ----------
--- tuning end ---
target size: 245
------ best hyper pars:  [10, 100, 15] ------
VKR starts:
VKR ends:
-----------
AUPRperdrug: 0.6615453580986718
AUROCperdrug: 0.9436422481921148
AUPR+AUROCperdrug: 1.6051876062907866
AUPR: 0.5246130988022827
AUROC: 0.8483311921666853
AUPR+AUROC: 1.372944290968968
-----------
using feature Chem
The VKR requires hyperparameter lambda, sigma_X, k
---------- cv start ----------
--- tuning end ---
target size: 235
------ best hyper pars:  [100, 0.1, 15] ------
VKR starts:




VKR ends:
-----------
AUPRperdrug: 0.6860985265906183
AUROCperdrug: 0.9419124385254848
AUPR+AUROCperdrug: 1.6280109651161032
AUPR: 0.5409165105521107
AUROC: 0.8450093718341462
AUPR+AUROC: 1.3859258823862568
-----------
using feature DGI
The VKR requires hyperparameter lambda, sigma_X, k
---------- cv start ----------
--- tuning end ---
target size: 429
------ best hyper pars:  [100, 0.01, 15] ------
VKR starts:




VKR ends:
-----------
AUPRperdrug: 0.6058438962669456
AUROCperdrug: 0.948397673942732
AUPR+AUROCperdrug: 1.5542415702096775
AUPR: 0.4996029571619218
AUROC: 0.85860230549908
AUPR+AUROC: 1.3582052626610017
-----------
using feature transporter
The VKR requires hyperparameter lambda, sigma_X, k
---------- cv start ----------
--- tuning end ---
target size: 158
------ best hyper pars:  [100, 0.01, 15] ------
VKR starts:




VKR ends:
-----------
AUPRperdrug: 0.711283979282173
AUROCperdrug: 0.9392975636955443
AUPR+AUROCperdrug: 1.6505815429777173
AUPR: 0.575755941547425
AUROC: 0.8451289532959992
AUPR+AUROC: 1.4208848948434243
-----------
using feature pathway
The VKR requires hyperparameter lambda, sigma_X, k
---------- cv start ----------
--- tuning end ---
target size: 261
------ best hyper pars:  [100, 100, 15] ------
VKR starts:




VKR ends:
-----------
AUPRperdrug: 0.6389911382877245
AUROCperdrug: 0.9435475468182835
AUPR+AUROCperdrug: 1.582538685106008
AUPR: 0.484616654304196
AUROC: 0.8277962413396083
AUPR+AUROC: 1.3124128956438041
-----------
using feature indication
The VKR requires hyperparameter lambda, sigma_X, k
---------- cv start ----------
--- tuning end ---
target size: 251
------ best hyper pars:  [100, 100, 15] ------
VKR starts:




VKR ends:
-----------
AUPRperdrug: 0.6858423650106555
AUROCperdrug: 0.9425541648366716
AUPR+AUROCperdrug: 1.6283965298473273
AUPR: 0.5363853651698793
AUROC: 0.8431813061390275
AUPR+AUROC: 1.3795666713089068
-----------
using feature target
The Naive requires no hyperparameter
---------- nested cv start ----------
Fold: 0
number of hyperpars combination:  1
first few training idx:  [  55   75  219  255  516  563  569  804 1031 1046]
first few testing idx:  [ 260  324  368  553  601  692  712  998 1095 1115]
--- tuning end ---
target size: 226
------ best hyper pars:  [] ------
Naive starts:
Naive ends:
-----------
AUPRperdrug: 0.603300106776754
AUROCperdrug: 0.9450909678114552
AUPR+AUROCperdrug: 1.5483910745882092
AUPR: 0.4577214870963392
AUROC: 0.83876886843943
AUPR+AUROC: 1.2964903555357692
-----------
Fold: 1
number of hyperpars combination:  1
first few training idx:  [ 260  324  368  553  601  692  712  998 1095 1115]
first few testing idx:  [  55   75  219  255  516  563  569  804

## Save Output

Store the tuned hyperparameters for reproducing results.

In [None]:
# with open(f'results/hyperpars_{SEs_name}.xml', 'w') as xml_file:
#    json.dump(hyperparsOut, xml_file)

Store the results of Nested CV and CV.

In [37]:
with open(f'results/results_{SEs_name}_{filter}.xml', 'w') as xml_file:
   json.dump(results, xml_file)

## Reorganize the Results and Calculate the P-value of Method Comparison

Load the results of Nested CV and CV.

In [38]:
with open(f'results/results_{SEs_name}_{filter}.xml', 'r') as xml_file:
    results = json.load(xml_file)

Orgainize the results of Nested CV into table.

In [39]:
df = pd.DataFrame()
for m, fs in results["nested_cv"].items():
    for f, mes in fs.items():
        for me, scores in mes.items():
            temp_df = pd.DataFrame({
                'method': m,
                'feature': f,
                'metric': me,
                "score": scores
            })
            df = pd.concat([df, temp_df], ignore_index=True)

custom_order = ["pathway","Chem", "DGI",  "indication", "target", "transporter", "enzyme"]
df['feature'] = pd.Categorical(df['feature'], categories=custom_order, ordered=True)
df['method'] = pd.Categorical(df['method'], categories=methods, ordered=True)
df['metric'] = pd.Categorical(df['metric'], categories=metrice_names, ordered=True)
df2 = pd.pivot_table(df, values=['score'], index=["feature", "method"], aggfunc={'score': ["mean","std"]}, columns=["metric"])
df3 = df2.sort_index(axis=1, level='metric').sort_index(level='feature')
df3.to_excel(f'results/nested_cv_results_{SEs_name}_{filter}.xlsx')
df3

Unnamed: 0_level_0,Unnamed: 1_level_0,score,score,score,score,score,score,score,score,score,score,score,score
Unnamed: 0_level_1,Unnamed: 1_level_1,mean,std,mean,std,mean,std,mean,std,mean,std,mean,std
Unnamed: 0_level_2,metric,AUPR+AUROC,AUPR+AUROC,AUPR+AUROCperdrug,AUPR+AUROCperdrug,AUROC,AUROC,AUPR,AUPR,AUROCperdrug,AUROCperdrug,AUPRperdrug,AUPRperdrug
feature,method,Unnamed: 2_level_3,Unnamed: 3_level_3,Unnamed: 4_level_3,Unnamed: 5_level_3,Unnamed: 6_level_3,Unnamed: 7_level_3,Unnamed: 8_level_3,Unnamed: 9_level_3,Unnamed: 10_level_3,Unnamed: 11_level_3,Unnamed: 12_level_3,Unnamed: 13_level_3
pathway,SKR,1.401852,0.020337,1.597627,0.009471,0.859692,0.008044,0.54216,0.01547,0.947153,0.000755,0.650473,0.009534
pathway,KRR,1.387727,0.032016,1.588558,0.019083,0.853503,0.014285,0.534224,0.022146,0.944141,0.005519,0.644416,0.01446
pathway,VKR,1.407823,0.025471,1.605898,0.011186,0.862876,0.010578,0.544947,0.019289,0.949379,0.000788,0.656519,0.011029
pathway,Naive,1.34749,0.016012,1.592762,0.009759,0.842988,0.006034,0.504502,0.013226,0.944394,0.000574,0.648368,0.010001
pathway,LNSM_RLN,1.325388,0.05181,1.455536,0.014041,0.818835,0.026316,0.506553,0.030408,0.892876,0.007661,0.56266,0.011086
pathway,LNSM_jaccard,0.619868,0.03238,0.684587,0.064551,0.426971,0.026506,0.192897,0.011324,0.424026,0.052422,0.26056,0.026282
Chem,SKR,1.421332,0.037031,1.639458,0.013021,0.853216,0.01385,0.568117,0.023557,0.940113,0.001553,0.699345,0.012673
Chem,KRR,1.418213,0.034645,1.640046,0.012598,0.853147,0.013256,0.565065,0.021544,0.940294,0.001249,0.699752,0.012454
Chem,VKR,1.421951,0.035786,1.645576,0.011406,0.853768,0.014921,0.568183,0.021423,0.941628,0.001177,0.703947,0.01148
Chem,Naive,1.418321,0.037221,1.640358,0.012096,0.853565,0.013598,0.564757,0.023853,0.940066,0.001406,0.700291,0.011847


Orgainize the results of CV into table.

In [40]:
df = pd.DataFrame()
for m, fs in results["cv"].items():
    for f, mes in fs.items():
        for me, scores in mes.items():
            temp_df = pd.DataFrame({
                'method': m,
                'feature': f,
                'metric': me,
                "score": scores
            },index=["1"])
            df = pd.concat([df, temp_df], ignore_index=True)
df['feature'] = pd.Categorical(df['feature'], categories=custom_order, ordered=True)
df['method'] = pd.Categorical(df['method'], categories=methods, ordered=True)
df['metric'] = pd.Categorical(df['metric'], categories=metrice_names, ordered=True)
df2 = pd.pivot_table(df, values=['score'], index=["feature", "method"], columns="metric")
df2.to_excel(f'results/cv_results_{SEs_name}_{filter}.xlsx')
df2

Unnamed: 0_level_0,Unnamed: 1_level_0,score,score,score,score,score,score
Unnamed: 0_level_1,metric,AUPR+AUROC,AUPR+AUROCperdrug,AUROC,AUPR,AUROCperdrug,AUPRperdrug
feature,method,Unnamed: 2_level_2,Unnamed: 3_level_2,Unnamed: 4_level_2,Unnamed: 5_level_2,Unnamed: 6_level_2,Unnamed: 7_level_2
pathway,SKR,1.310736,1.583131,0.8275,0.483237,0.94366,0.639471
pathway,KRR,1.312279,1.583075,0.827761,0.484518,0.943613,0.639462
pathway,VKR,1.312413,1.582539,0.827796,0.484617,0.943548,0.638991
pathway,Naive,1.312752,1.583017,0.82795,0.484802,0.94361,0.639407
pathway,LNSM_RLN,1.247281,1.471227,0.800868,0.446413,0.901787,0.569441
pathway,LNSM_jaccard,0.596689,0.552211,0.424001,0.172688,0.343132,0.20908
Chem,SKR,1.390528,1.630376,0.847032,0.543496,0.942571,0.687805
Chem,KRR,1.386339,1.628735,0.84525,0.54109,0.942066,0.686669
Chem,VKR,1.385926,1.628011,0.845009,0.540917,0.941912,0.686099
Chem,Naive,1.384795,1.627231,0.844561,0.540234,0.941571,0.68566


Calculate the P-value for method comparison based on the result of nested CV.

In [41]:
df = pd.DataFrame()
for m, fs in results["nested_cv"].items():
    for f, mes in fs.items():
        for me, scores in mes.items():
            temp_df = pd.DataFrame({
                'method': m,
                'feature': f,
                'metric': me,
            }, index=["1"])
            temp_df2 = pd.concat([temp_df, pd.DataFrame(scores, columns=["1"]).T], axis=1)
            df = pd.concat([df, temp_df2], ignore_index=True)
for m in metrice_names:
    for f in features_names:
        df2 = df[(df["metric"] == m) & (df["feature"] == f)]
        df3 = df2.iloc[:, np.array([0, 3, 4, 5, 6, 7])]
        df4 = df3.set_index(df3.columns[0])
        df5 = df4.T.ptests(paired=True, stars=False)
        df5.to_excel(f'results/pvalue_{SEs_name}_{filter}_{f}_{m}.xlsx')