In [39]:
from itertools import product
import numpy as np
import pandas as pd
from ADRprofilePrediction import Pairs2Mat, evaluation
from Models import loadHyperpar
import seaborn as sns 
import matplotlib.pylab as plt
from matplotlib.ticker import MultipleLocator
import matplotlib.patheffects as path_effects
import json
import pingouin as pg

In [40]:
import sklearn
print(sklearn.__version__)

0.24.2


In [41]:
features_dict = {
    "target":Pairs2Mat(path="data/drug_target.tsv",colname1="0",colname2="1"),
    "enzyme":Pairs2Mat(path="data/drug_enzyme.tsv",colname1="0",colname2="1"),
    "Chem":pd.read_csv("data/drug_chemsfp.tsv",sep = "\t",header=0,index_col=0),
    "DGI":Pairs2Mat(path="data/interactions.tsv",colname1="drug_claim_name",colname2="gene_name"),
    "transporter":Pairs2Mat(path="data/drug_transporter.tsv",colname1="0",colname2="1"),
    "pathway":Pairs2Mat(path="data/drug_pathway.tsv",colname1="0",colname2="1"),
    "indication":Pairs2Mat(path="data/drug_indication.tsv",colname1="1_x",colname2="6")
}


In [42]:
filter = "all"
SEs = {}
if filter == "all":
    SIDER = Pairs2Mat(path="data/drug_se.tsv",colname1="1_x",colname2="5")
    column_sums = np.sum(SIDER, axis=0)
    SEs["SIDER"] = SIDER.loc[:, (column_sums >= 5)]

    OFFSIDERS = Pairs2Mat(path="data/OFFSIDES.csv",colname1="drug_concept_name",colname2="condition_concept_name",sep = ",")
    column_sums = np.sum(OFFSIDERS, axis=0)
    SEs["OFFSIDES"] = OFFSIDERS.loc[:, column_sums >= 5]
elif filter == "rare":
    SIDER = Pairs2Mat(path="data/drug_se.tsv",colname1="1_x",colname2="5")
    column_sums = np.sum(SIDER, axis=0)
    SEs["SIDER"] = SIDER.loc[:, (column_sums < 50)]

    OFFSIDERS = Pairs2Mat(path="data/OFFSIDES.csv",colname1="drug_concept_name",colname2="condition_concept_name",sep = ",")
    column_sums = np.sum(OFFSIDERS, axis=0)
    SEs["OFFSIDES"] = OFFSIDERS.loc[:, column_sums < 50]


In [43]:
SEs["SIDER"].shape

(1344, 2556)

In [44]:
SEs["OFFSIDES"].shape

(2730, 12750)

In [45]:
features_names = ["target", "enzyme", "Chem", "DGI", "transporter", "pathway", "indication"]
# SEs_names = ["SIDER", "OFFSIDES"]
# methods = ["SKR", "KR", "KRR", "Naive", "LNSM_RLN", "LNSM_jaccard", "VKR"]
methods = ["SKR", "KRR", "VKR", "Naive", "LNSM_RLN", "LNSM_jaccard"]
# methods = ["SKR", "KR", "KRR", "Naive", "LNSM_RLN", "LNSM_jaccard", "VKR", "SVM", "OCCA", "SCCA", "RF", "BRF"]
tuning_metrices=["AUROC", "AUPR", "AUROCperdrug", "AUPRperdrug"]
metrice_names = ["AUPR+AUROC", "AUPR+AUROCperdrug", "AUROC", "AUPR", "AUROCperdrug", "AUPRperdrug"]

In [46]:
A = 10**np.arange(-2, 3, 1, dtype=float)
B = np.arange(0.1, 1, 0.1, dtype=float)
C = np.arange(5, 20, 5, dtype=int)
A10 = 10**np.arange(1, 2, 1, dtype=float)
A100 = 10**np.arange(2, 3, 1, dtype=float)
all_hyperparlist = {
    "SKR":[A,B,A10,A100], 
    # "KR":[A,A], 
    "KRR":[A,A],
    "VKR":[A,A,C], 
    "Naive":[], 
    "LNSM_RLN":[B,A], 
    "LNSM_jaccard":[B], 
    # "SVM":[A,A,A], 
    # "OCCA":[], 
    # "SCCA":[A], 
    # "RF":[C], 
    # "BRF":[C]
}

In [47]:
SEs_name = "SIDER"
metrice = "AUPR"

In [48]:
hyperpars = {}
hyperpars["nested_cv"] = {}
hyperpars["cv"] = {}
hyperparsOut = {}
hyperparsOut["nested_cv"] = {}
hyperparsOut["cv"] = {}
results = {}
results["nested_cv"] = {}
results["cv"] = {}

In [49]:
# hyperpars["nested_cv"]["SKR"] = {}
# hyperpars["nested_cv"]["SKR"]["target"] = [
#     (0.01, 0.4, 10, 100),
#     (0.01, 0.4, 10, 100),
#     (0.01, 0.4, 10, 100),
#     (0.01, 0.5, 10, 100),
#     (0.01, 0.4, 10, 100),
#     ]
# hyperpars["nested_cv"]["SKR"]["enzyme"] = [
#     (0.01, 0.6, 10, 100),
#     (1, 0.8, 10, 100),
#     (1, 0.2, 10, 100),
#     (1, 0.2, 10, 100),
#     (1, 0.2, 10, 100),
#     ]
# hyperpars["nested_cv"]["SKR"]["Chem"] = [
#     (1, 0.3, 10, 100),
#     (1, 0.3, 10, 100),
#     (1, 0.3, 10, 100),
#     (1, 0.3, 10, 100),
#     (1, 0.3, 10, 100),
#     ]
# hyperpars["nested_cv"]["SKR"]["DGI"] = [
#     (0.1, 0.2, 10, 100),
#     (0.1, 0.2, 10, 100),
#     (0.1, 0.2, 10, 100),
#     (0.1, 0.1, 10, 100),
#     (0.1, 0.2, 10, 100),
#     ]
# # hyperpars["nested_cv"]["SKR"]["transporter"] = [
# #     (1, 0.3, 10, 1),
# #     (1, 0.1, 10, 100),
# #     (0.1, 0.5, 10, 100),
# #     (0.1, 0.5, 10, 100),
# #     (0.1, 0.5, 10, 100),
# #     ]
# hyperpars["nested_cv"]["SKR"]["transporter"] = [
#     (1, 0.1, 10, 100),
#     (1, 0.1, 10, 100),
#     (0.1, 0.5, 10, 100),
#     (0.1, 0.5, 10, 100),
#     (0.1, 0.5, 10, 100),
#     ]
# hyperpars["nested_cv"]["SKR"]["pathway"] = [
#     (0.01, 0.3, 10, 100),
#     (0.01, 0.3, 10, 100),
#     (0.01, 0.3, 10, 100),
#     (0.01, 0.3, 10, 100),
#     (0.01, 0.3, 10, 100),
#     ]
# hyperpars["nested_cv"]["SKR"]["indication"] = [
#     (0.1, 0.1, 10, 100),
#     (0.1, 0.1, 10, 100),
#     (0.1, 0.1, 10, 100),
#     (0.1, 0.1, 10, 100),
#     (0.1, 0.1, 10, 100),
#     ]
# # hyperpars["cv"]["SKR"] = {}
# # hyperpars["cv"]["SKR"]["target"] = (10, 0.9, 10, 100)
# # hyperpars["cv"]["SKR"]["enzyme"] = (1, 0.9, 10, 100)
# # hyperpars["cv"]["SKR"]["Chem"] = (0.01, 0.7, 100, 100)
# # hyperpars["cv"]["SKR"]["DGI"] =(0.1, 0.4, 100, 100)
# # hyperpars["cv"]["SKR"]["transporter"] = (10, 0.9, 10, 10)
# # hyperpars["cv"]["SKR"]["pathway"] = (0.1, 0.4, 100, 10)
# # hyperpars["cv"]["SKR"]["indication"] = (1, 0.4, 100, 10)
# hyperpars["cv"]["SKR"] = {}
# hyperpars["cv"]["SKR"]["target"] = (10, 0.9, 10, 100)
# hyperpars["cv"]["SKR"]["enzyme"] = (1, 0.9, 10, 100)
# hyperpars["cv"]["SKR"]["Chem"] = (1, 0.9, 10, 100)
# hyperpars["cv"]["SKR"]["DGI"] =(1, 0.9, 10, 100)
# hyperpars["cv"]["SKR"]["transporter"] = (100, 0.9, 10, 100)
# hyperpars["cv"]["SKR"]["pathway"] = (100, 0.5, 10, 100)
# hyperpars["cv"]["SKR"]["indication"] = (1, 0.9, 10, 100)

In [50]:
# hyperpars["nested_cv"]["KRR"] = {}
# hyperpars["nested_cv"]["KRR"]["target"] = [
#     (0.1, 10),
#     (0.1, 10),
#     (0.1, 10),
#     (0.1, 10),
#     (0.1, 10),
#     ]
# hyperpars["nested_cv"]["KRR"]["enzyme"] = [
#     (1, 10),
#     (10, 10),
#     (1, 10),
#     (1, 10),
#     (1, 10),
#     ]
# hyperpars["nested_cv"]["KRR"]["Chem"] = [
#     (1, 10),
#     (1, 10),
#     (1, 10),
#     (1, 10),
#     (1, 10),
#     ]
# hyperpars["nested_cv"]["KRR"]["DGI"] = [
#     (0.1, 10),
#     (0.1, 10),
#     (0.1, 10),
#     (0.1, 10),
#     (0.1, 10),
#     ]
# hyperpars["nested_cv"]["KRR"]["transporter"] = [
#     (1, 10),
#     (1, 10),
#     (1, 10),
#     (1, 10),
#     (1, 10),
#     ]
# hyperpars["nested_cv"]["KRR"]["pathway"] = [
#     (0.01, 10),
#     (0.01, 10),
#     (0.01, 10),
#     (0.01, 10),
#     (0.01, 10),
#     ]
# hyperpars["nested_cv"]["KRR"]["indication"] = [
#     (0.1, 10),
#     (0.1, 10),
#     (0.1, 10),
#     (0.1, 10),
#     (0.1, 10),
#     ]
# hyperpars["cv"]["KRR"] = {}
# hyperpars["cv"]["KRR"]["target"] = (100, 10)
# hyperpars["cv"]["KRR"]["enzyme"] = (100, 10)
# hyperpars["cv"]["KRR"]["Chem"] = (0.1, 100)
# hyperpars["cv"]["KRR"]["DGI"] = (0.1, 100)
# hyperpars["cv"]["KRR"]["transporter"] = (100, 100)
# hyperpars["cv"]["KRR"]["pathway"] = (0.1, 100)
# hyperpars["cv"]["KRR"]["indication"] = (100, 100)

In [51]:
# hyperpars["nested_cv"]["Naive"] = {}
# hyperpars["nested_cv"]["Naive"]["target"] = [
#     (),
#     (),
#     (),
#     (),
#     (),
#     ]
# hyperpars["nested_cv"]["Naive"]["enzyme"] = [
#     (),
#     (),
#     (),
#     (),
#     (),
#     ]
# hyperpars["nested_cv"]["Naive"]["Chem"] = [
#     (),
#     (),
#     (),
#     (),
#     (),
#     ]
# hyperpars["nested_cv"]["Naive"]["DGI"] = [
#     (),
#     (),
#     (),
#     (),
#     (),
#     ]
# hyperpars["nested_cv"]["Naive"]["transporter"] = [
#     (),
#     (),
#     (),
#     (),
#     (),
#     ]
# hyperpars["nested_cv"]["Naive"]["pathway"] = [
#     (),
#     (),
#     (),
#     (),
#     (),
#     ]
# hyperpars["nested_cv"]["Naive"]["indication"] = [
#     (),
#     (),
#     (),
#     (),
#     (),
#     ]
# hyperpars["cv"]["Naive"] = {}
# hyperpars["cv"]["Naive"]["target"] = ()
# hyperpars["cv"]["Naive"]["enzyme"] = ()
# hyperpars["cv"]["Naive"]["Chem"] =()
# hyperpars["cv"]["Naive"]["DGI"] = ()
# hyperpars["cv"]["Naive"]["transporter"] = ()
# hyperpars["cv"]["Naive"]["pathway"] = ()
# hyperpars["cv"]["Naive"]["indication"] = ()

In [52]:
# hyperpars["nested_cv"]["VKR"] = {}
# hyperpars["nested_cv"]["VKR"]["target"] = [
#     (10, 0.1, 15),
#     (10, 0.01, 15),
#     (10, 0.01, 15),
#     (10, 0.01, 15),
#     (10, 0.01, 15),
#     ]
# hyperpars["nested_cv"]["VKR"]["enzyme"] = [
#     (10, 1, 15),
#     (10, 1, 15),
#     (10, 1, 15),
#     (10, 1, 15),
#     (10, 1, 15),
#     ]
# hyperpars["nested_cv"]["VKR"]["Chem"] = [
#     (10, 1, 15),
#     (10, 1, 15),
#     (10, 1, 15),
#     (10, 1, 15),
#     (10, 1, 15),
#     ]
# hyperpars["nested_cv"]["VKR"]["DGI"] = [
#     (10, 0.1, 15),
#     (10, 0.1, 10),
#     (10, 0.1, 15),
#     (10, 0.01, 15),
#     (10, 0.1, 15),
#     ]
# hyperpars["nested_cv"]["VKR"]["transporter"] = [
#     (10, 1, 15),
#     (10, 1, 15),
#     (10, 1, 15),
#     (10, 0.1, 15),
#     (10, 0.1, 15),
#     ]
# hyperpars["nested_cv"]["VKR"]["pathway"] = [
#     (10, 0.01, 15),
#     (10, 0.01, 15),
#     (10, 0.01, 15),
#     (10, 0.01, 15),
#     (10, 0.01, 15),
#     ]
# hyperpars["nested_cv"]["VKR"]["indication"] = [
#     (10, 0.01, 15),
#     (10, 0.01, 15),
#     (10, 0.01, 15),
#     (10, 0.01, 15),
#     (10, 0.01, 15),
#     ]
# hyperpars["cv"]["VKR"] = {}
# hyperpars["cv"]["VKR"]["target"] = (10, 100, 10)
# hyperpars["cv"]["VKR"]["enzyme"] = (10, 100, 10)
# hyperpars["cv"]["VKR"]["Chem"] =(100, 0.01, 10)
# hyperpars["cv"]["VKR"]["DGI"] = (100, 0.1, 10)
# hyperpars["cv"]["VKR"]["transporter"] = (100, 0.1, 15)
# hyperpars["cv"]["VKR"]["pathway"] = (100, 10, 15)
# hyperpars["cv"]["VKR"]["indication"] = (100, 1, 15)

In [53]:
# hyperpars["nested_cv"]["LNSM_RLN"] = {}
# hyperpars["nested_cv"]["LNSM_RLN"]["target"] = [
#     (0.2, 10), 
#     (0.2, 10), 
#     (0.2, 10), 
#     (0.2, 10), 
#     (0.2, 10)
#     ]
# hyperpars["nested_cv"]["LNSM_RLN"]["enzyme"] = [
#     (0.1, 100),
#     (0.1, 100),
#     (0.1, 100),
#     (0.1, 100),
#     (0.1, 10)
#     ]
# hyperpars["nested_cv"]["LNSM_RLN"]["Chem"] = [
#     (0.6, 100),
#     (0.1, 100),
#     (0.6, 100),
#     (0.1, 100),
#     (0.6, 100)
#     ]
# hyperpars["nested_cv"]["LNSM_RLN"]["DGI"] = [
#     (0.2, 10),
#     (0.2, 10),
#     (0.3, 100),
#     (0.2, 10),
#     (0.2, 10)
#     ]
# hyperpars["nested_cv"]["LNSM_RLN"]["transporter"] = [
#     (0.3, 100),
#     (0.3, 10),
#     (0.3, 10),
#     (0.2, 10),
#     (0.3, 10)
#     ]
# hyperpars["nested_cv"]["LNSM_RLN"]["pathway"] = [
#     (0.1, 10), 
#     (0.1, 10), 
#     (0.1, 10), 
#     (0.1, 1), 
#     (0.1, 10)
#     ]
# hyperpars["nested_cv"]["LNSM_RLN"]["indication"] = [
#     (0.5, 100),
#     (0.4, 100),
#     (0.4, 100),
#     (0.3, 100),
#     (0.4, 100)
#     ]
# hyperpars["cv"]["LNSM_RLN"] = {}
# hyperpars["cv"]["LNSM_RLN"]["target"] = (0.1, 1)
# hyperpars["cv"]["LNSM_RLN"]["enzyme"] = (0.1, 100)
# hyperpars["cv"]["LNSM_RLN"]["Chem"] = (0.9, 100)
# hyperpars["cv"]["LNSM_RLN"]["DGI"] = (0.2, 10)
# hyperpars["cv"]["LNSM_RLN"]["transporter"] = (0.1, 10)
# hyperpars["cv"]["LNSM_RLN"]["pathway"] = (0.1, 10)
# hyperpars["cv"]["LNSM_RLN"]["indication"] = (0.3, 100)

In [54]:
# hyperpars["nested_cv"]["LNSM_jaccard"] = {}
# hyperpars["nested_cv"]["LNSM_jaccard"]["target"] = [
#     (0.1,), 
#     (0.1,), 
#     (0.1,), 
#     (0.1,), 
#     (0.1,)
#     ]
# hyperpars["nested_cv"]["LNSM_jaccard"]["enzyme"] = [
#     (0.2,), 
#     (0.2,), 
#     (0.2,), 
#     (0.2,), 
#     (0.2,)
#     ]
# hyperpars["nested_cv"]["LNSM_jaccard"]["Chem"] = [
#     (0.2,), 
#     (0.2,), 
#     (0.2,), 
#     (0.8,), 
#     (0.5,)
#     ]
# hyperpars["nested_cv"]["LNSM_jaccard"]["DGI"] = [
#     (0.2,), 
#     (0.2,), 
#     (0.2,), 
#     (0.2,), 
#     (0.2,)
#     ]
# hyperpars["nested_cv"]["LNSM_jaccard"]["transporter"] = [
#     (0.6,),
#     (0.7,),
#     (0.1,),
#     (0.7,),
#     (0.1,)
#     ]
# hyperpars["nested_cv"]["LNSM_jaccard"]["pathway"] = [
#     (0.1,), 
#     (0.1,), 
#     (0.1,), 
#     (0.1,), 
#     (0.1,)
#     ]
# hyperpars["nested_cv"]["LNSM_jaccard"]["indication"] = [
#     (0.1,), 
#     (0.1,), 
#     (0.1,), 
#     (0.1,), 
#     (0.1,)
#     ]
# hyperpars["cv"]["LNSM_jaccard"] = {}
# hyperpars["cv"]["LNSM_jaccard"]["target"] = (0.1,)
# hyperpars["cv"]["LNSM_jaccard"]["enzyme"] = (0.4,)
# hyperpars["cv"]["LNSM_jaccard"]["Chem"] = (0.4,)
# hyperpars["cv"]["LNSM_jaccard"]["DGI"] = (0.1,)
# hyperpars["cv"]["LNSM_jaccard"]["transporter"] = (0.5,)
# hyperpars["cv"]["LNSM_jaccard"]["pathway"] = (0.1,)
# hyperpars["cv"]["LNSM_jaccard"]["indication"] = (0.1,)

In [55]:

# Open and read the JSON file
with open(f'results/hyperpars_{SEs_name}.xml', 'r') as xml_file:
    hyperpars = json.load(xml_file)

In [39]:
for method in methods:
    # # method = "SKR"
    validation = "nested_cv"
    hyperparsOut[validation][method] = {}
    results[validation][method] = {}
    for str in features_names:
        print(f"using feature {str}")
        hyperparList = loadHyperpar(*all_hyperparlist[method],method_option=method)
        results[validation][method][str], hyperparsOut[validation][method][str] = evaluation(Y=SEs[SEs_name], X=features_dict[str], method_option=method,tuning_metrice=metrice,hyperparList=hyperparList,hyperparfixed=hyperpars[validation][method][str],Validation=validation,n_jobs=1)

    # method = "SKR"
    validation = "cv"
    hyperparsOut[validation][method] = {}
    results[validation][method] = {}
    for str in features_names:
        print(f"using feature {str}")
        hyperparList = loadHyperpar(*all_hyperparlist[method],method_option=method)
        results[validation][method][str], hyperparsOut[validation][method][str] = evaluation(Y=SEs[SEs_name], X=features_dict[str], method_option=method,tuning_metrice=metrice,hyperparList=hyperparList,hyperparfixed=hyperpars[validation][method][str],Validation=validation,n_jobs=1)

using feature target
The SKR requires hyperparameter lambda, c, sigma_X, sigma_Y
---------- nested cv start ----------
Fold: 0
number of hyperpars combination:  45
first few training idx:  [ 57  75 258 281 294 362 474 479 680 698]
first few testing idx:  [ 70 151 209 236 411 438 439 625 657 704]
--- tuning end ---
target size: 142
------ best hyper pars:  [0.01, 0.4, 10, 100] ------
SKR starts:
SKR ends:
-----------
AUPRperdrug: 0.46165763532831294
AUROCperdrug: 0.8978972570529985
AUPR+AUROCperdrug: 1.3595548923813114
AUPR: 0.41537133982867747
AUROC: 0.8687649857050729
AUPR+AUROC: 1.2841363255337503
-----------
Fold: 1
number of hyperpars combination:  45
first few training idx:  [ 70 151 209 236 411 438 439 625 657 704]
first few testing idx:  [ 57  75 258 281 294 362 474 479 680 698]
--- tuning end ---
target size: 142
------ best hyper pars:  [0.01, 0.4, 10, 100] ------
SKR starts:
SKR ends:
-----------
AUPRperdrug: 0.46629645108222045
AUROCperdrug: 0.9021824148263183
AUPR+AUROCperd



VKR ends:
-----------
AUPRperdrug: 0.4414199494233323
AUROCperdrug: 0.8888652896768148
AUPR+AUROCperdrug: 1.3302852391001472
AUPR: 0.3951850082752013
AUROC: 0.8752621672235312
AUPR+AUROC: 1.2704471754987325
-----------
Fold: 3
number of hyperpars combination:  75
first few training idx:  [ 70 151 209 236 411 438 439 625 657 704]
first few testing idx:  [ 14  21  36  93 153 340 352 403 418 538]
--- tuning end ---
target size: 142
------ best hyper pars:  [10, 0.01, 15] ------
VKR starts:
VKR ends:
-----------
AUPRperdrug: 0.47833690246522237
AUROCperdrug: 0.8972055841573693
AUPR+AUROCperdrug: 1.3755424866225916
AUPR: 0.4275471452853586
AUROC: 0.8737962850631014
AUPR+AUROC: 1.30134343034846
-----------
Fold: 4
number of hyperpars combination:  75
first few training idx:  [ 70 151 209 236 411 438 439 625 657 704]
first few testing idx:  [ 15  50 158 198 208 278 541 564 579 686]
--- tuning end ---
target size: 142
------ best hyper pars:  [10, 0.01, 15] ------
VKR starts:
VKR ends:
-------



VKR ends:
-----------
AUPRperdrug: 0.4503620748771144
AUROCperdrug: 0.8859509594708835
AUPR+AUROCperdrug: 1.3363130343479979
AUPR: 0.41586098158259405
AUROC: 0.8756739195173937
AUPR+AUROC: 1.2915349010999877
-----------
Fold: 4
number of hyperpars combination:  75
first few training idx:  [123 155 179 212 231 458 481 485 510 523]
first few testing idx:  [ 49  83 163 209 214 253 278 387 466 479]
--- tuning end ---
target size: 108
------ best hyper pars:  [10, 1, 15] ------
VKR starts:
VKR ends:
-----------
AUPRperdrug: 0.43902551850450117
AUROCperdrug: 0.8828594219542886
AUPR+AUROCperdrug: 1.3218849404587898
AUPR: 0.38343629758930986
AUROC: 0.8539107291954282
AUPR+AUROC: 1.2373470267847382
-----------
Mean AUPRperdrug: 0.4438169187289612, std: 0.012550276496160731
Mean AUROCperdrug: 0.8851815177814834, std: 0.0020786082582573556
Mean AUPR+AUROCperdrug: 1.3289984365104446, std: 0.014206997525150606
Mean AUPR: 0.39821476956867646, std: 0.024372974363863285
Mean AUROC: 0.8678675386677372,



VKR ends:
-----------
AUPRperdrug: 0.4516524710091313
AUROCperdrug: 0.8967192125001915
AUPR+AUROCperdrug: 1.3483716835093227
AUPR: 0.4032362624419107
AUROC: 0.874010357419414
AUPR+AUROC: 1.2772466198613248
-----------
Fold: 2
number of hyperpars combination:  75
first few training idx:  [ 94 124 142 193 241 339 379 438 651 689]
first few testing idx:  [  5 106 111 220 227 306 395 463 520 538]
--- tuning end ---
target size: 150
------ best hyper pars:  [10, 0.1, 15] ------
VKR starts:
VKR ends:
-----------
AUPRperdrug: 0.45234098226399716
AUROCperdrug: 0.8972715794191863
AUPR+AUROCperdrug: 1.3496125616831836
AUPR: 0.4361310366318718
AUROC: 0.8880710401596048
AUPR+AUROC: 1.3242020767914766
-----------
Fold: 3
number of hyperpars combination:  75
first few training idx:  [ 94 124 142 193 241 339 379 438 651 689]
first few testing idx:  [ 42 120 130 249 307 417 426 437 654 706]
--- tuning end ---
target size: 150
------ best hyper pars:  [10, 0.01, 15] ------
VKR starts:
VKR ends:
-------

In [40]:
# with open(f'results/hyperpars_{SEs_name}.xml', 'w') as xml_file:
#    json.dump(hyperparsOut, xml_file)
with open(f'results/results_{SEs_name}_{filter}.xml', 'w') as xml_file:
   json.dump(results, xml_file)

In [56]:
with open(f'results/results_{SEs_name}_{filter}.xml', 'r') as xml_file:
    results = json.load(xml_file)

In [57]:
df = pd.DataFrame()
for m, fs in results["nested_cv"].items():
    for f, mes in fs.items():
        for me, scores in mes.items():
            temp_df = pd.DataFrame({
                'method': m,
                'feature': f,
                'metric': me,
                "score": scores
            })
            df = pd.concat([df, temp_df], ignore_index=True)

custom_order = ["pathway","Chem", "DGI",  "indication", "target", "transporter", "enzyme"]
df['feature'] = pd.Categorical(df['feature'], categories=custom_order, ordered=True)
df['method'] = pd.Categorical(df['method'], categories=methods, ordered=True)
df['metric'] = pd.Categorical(df['metric'], categories=metrice_names, ordered=True)
df2 = pd.pivot_table(df, values=['score'], index=["feature", "method"], aggfunc={'score': ["mean","std"]}, columns=["metric"])
df3 = df2.sort_index(axis=1, level='metric').sort_index(level='feature')
df3.to_excel(f'results/nested_cv_results_{SEs_name}_{filter}.xlsx')
df3

Unnamed: 0_level_0,Unnamed: 1_level_0,score,score,score,score,score,score,score,score,score,score,score,score
Unnamed: 0_level_1,Unnamed: 1_level_1,mean,std,mean,std,mean,std,mean,std,mean,std,mean,std
Unnamed: 0_level_2,metric,AUPR+AUROC,AUPR+AUROC,AUPR+AUROCperdrug,AUPR+AUROCperdrug,AUROC,AUROC,AUPR,AUPR,AUROCperdrug,AUROCperdrug,AUPRperdrug,AUPRperdrug
feature,method,Unnamed: 2_level_3,Unnamed: 3_level_3,Unnamed: 4_level_3,Unnamed: 5_level_3,Unnamed: 6_level_3,Unnamed: 7_level_3,Unnamed: 8_level_3,Unnamed: 9_level_3,Unnamed: 10_level_3,Unnamed: 11_level_3,Unnamed: 12_level_3,Unnamed: 13_level_3
pathway,SKR,1.334705,0.025306,1.404288,0.013062,0.87733,0.007133,0.457375,0.018828,0.90262,0.003697,0.501668,0.009512
pathway,KRR,1.314502,0.026516,1.392919,0.015386,0.863253,0.007199,0.451248,0.019692,0.892696,0.00452,0.500223,0.010999
pathway,VKR,1.319026,0.02429,1.375027,0.013385,0.879535,0.005165,0.439491,0.019223,0.89821,0.0032,0.476818,0.010276
pathway,Naive,1.242813,0.016862,1.31398,0.009992,0.859248,0.006301,0.383564,0.011161,0.880223,0.003061,0.433758,0.00778
pathway,LNSM_RLN,1.02643,0.447858,1.131054,0.510656,0.719242,0.296256,0.307188,0.152758,0.73862,0.319257,0.392433,0.191538
pathway,LNSM_jaccard,0.826078,0.109223,0.982406,0.079647,0.632615,0.062759,0.193462,0.048206,0.643816,0.051263,0.33859,0.030322
Chem,SKR,1.275559,0.030553,1.356358,0.023997,0.867037,0.009402,0.408522,0.022789,0.893836,0.004003,0.462522,0.020765
Chem,KRR,1.259418,0.030815,1.344612,0.021969,0.854482,0.01033,0.404936,0.022589,0.884176,0.003746,0.460436,0.019664
Chem,VKR,1.273031,0.0304,1.34857,0.025293,0.871534,0.00866,0.401497,0.022613,0.895063,0.00436,0.453507,0.021192
Chem,Naive,1.240348,0.025632,1.314588,0.025718,0.864022,0.008082,0.376326,0.019921,0.884678,0.00566,0.42991,0.020848


In [58]:
df = pd.DataFrame()
for m, fs in results["cv"].items():
    for f, mes in fs.items():
        for me, scores in mes.items():
            temp_df = pd.DataFrame({
                'method': m,
                'feature': f,
                'metric': me,
                "score": scores
            },index=["1"])
            df = pd.concat([df, temp_df], ignore_index=True)
df['feature'] = pd.Categorical(df['feature'], categories=custom_order, ordered=True)
df['method'] = pd.Categorical(df['method'], categories=methods, ordered=True)
df['metric'] = pd.Categorical(df['metric'], categories=metrice_names, ordered=True)
df2 = pd.pivot_table(df, values=['score'], index=["feature", "method"], columns="metric")
df2.to_excel(f'results/cv_results_{SEs_name}_{filter}.xlsx')
df2

Unnamed: 0_level_0,Unnamed: 1_level_0,score,score,score,score,score,score
Unnamed: 0_level_1,metric,AUPR+AUROC,AUPR+AUROCperdrug,AUROC,AUPR,AUROCperdrug,AUPRperdrug
feature,method,Unnamed: 2_level_2,Unnamed: 3_level_2,Unnamed: 4_level_2,Unnamed: 5_level_2,Unnamed: 6_level_2,Unnamed: 7_level_2
pathway,SKR,1.258435,1.345329,0.859145,0.39929,0.886235,0.459094
pathway,KRR,1.26042,1.345745,0.859982,0.400438,0.886422,0.459323
pathway,VKR,1.257367,1.343312,0.858144,0.399223,0.884432,0.458879
pathway,Naive,1.258257,1.343673,0.858629,0.399628,0.885073,0.4586
pathway,LNSM_RLN,0.235112,0.238845,0.193257,0.041855,0.180401,0.058444
pathway,LNSM_jaccard,0.40613,0.516569,0.332292,0.073838,0.367572,0.148997
Chem,SKR,1.26503,1.353039,0.870026,0.395004,0.896757,0.456282
Chem,KRR,1.253668,1.339958,0.864452,0.389216,0.890766,0.449191
Chem,VKR,1.263963,1.344892,0.865968,0.397995,0.890499,0.454393
Chem,Naive,1.244534,1.331759,0.85997,0.384565,0.886647,0.445113


In [59]:
df = pd.DataFrame()
for m, fs in results["nested_cv"].items():
    for f, mes in fs.items():
        for me, scores in mes.items():
            temp_df = pd.DataFrame({
                'method': m,
                'feature': f,
                'metric': me,
            }, index=["1"])
            temp_df2 = pd.concat([temp_df, pd.DataFrame(scores, columns=["1"]).T], axis=1)
            df = pd.concat([df, temp_df2], ignore_index=True)
for m in metrice_names:
    for f in features_names:
        df2 = df[(df["metric"] == m) & (df["feature"] == f)]
        df3 = df2.iloc[:, np.array([0, 3, 4, 5, 6, 7])]
        df4 = df3.set_index(df3.columns[0])
        df5 = df4.T.ptests(paired=True, stars=False)
        df5.to_excel(f'results/pvalue_{SEs_name}_{filter}_{f}_{m}.xlsx')