In [70]:
from itertools import product
import numpy as np
import pandas as pd
from ADRprofilePrediction import Pairs2Mat, evaluation
from Models import loadHyperpar
import seaborn as sns 
import matplotlib.pylab as plt
from matplotlib.ticker import MultipleLocator
import matplotlib.patheffects as path_effects
import json
import pingouin as pg

In [71]:
import sklearn
print(sklearn.__version__)

0.24.2


In [72]:
features_dict = {
    "target":Pairs2Mat(path="data/drug_target.tsv",colname1="0",colname2="1"),
    "enzyme":Pairs2Mat(path="data/drug_enzyme.tsv",colname1="0",colname2="1"),
    "Chem":pd.read_csv("data/drug_chemsfp.tsv",sep = "\t",header=0,index_col=0),
    "DGI":Pairs2Mat(path="data/interactions.tsv",colname1="drug_claim_name",colname2="gene_name"),
    "transporter":Pairs2Mat(path="data/drug_transporter.tsv",colname1="0",colname2="1"),
    "pathway":Pairs2Mat(path="data/drug_pathway.tsv",colname1="0",colname2="1"),
    "indication":Pairs2Mat(path="data/drug_indication.tsv",colname1="1_x",colname2="6")
}


In [73]:
filter = "rare"
SEs = {}
if filter == "all":
    SIDER = Pairs2Mat(path="data/drug_se.tsv",colname1="1_x",colname2="5")
    column_sums = np.sum(SIDER, axis=0)
    SEs["SIDER"] = SIDER.loc[:, (column_sums >= 5)]

    OFFSIDERS = Pairs2Mat(path="data/OFFSIDES.csv",colname1="drug_concept_name",colname2="condition_concept_name",sep = ",")
    column_sums = np.sum(OFFSIDERS, axis=0)
    SEs["OFFSIDES"] = OFFSIDERS.loc[:, column_sums >= 5]
elif filter == "rare":
    SIDER = Pairs2Mat(path="data/drug_se.tsv",colname1="1_x",colname2="5")
    column_sums = np.sum(SIDER, axis=0)
    SEs["SIDER"] = SIDER.loc[:, (column_sums < 50)]

    OFFSIDERS = Pairs2Mat(path="data/OFFSIDES.csv",colname1="drug_concept_name",colname2="condition_concept_name",sep = ",")
    column_sums = np.sum(OFFSIDERS, axis=0)
    SEs["OFFSIDES"] = OFFSIDERS.loc[:, column_sums < 50]


In [74]:
SEs["SIDER"].shape

(1344, 5481)

In [75]:
SEs["OFFSIDES"].shape

(2730, 6172)

In [76]:
features_names = ["target", "enzyme", "Chem", "DGI", "transporter", "pathway", "indication"]
# SEs_names = ["SIDER", "OFFSIDES"]
# methods = ["SKR", "KR", "KRR", "Naive", "LNSM_RLN", "LNSM_jaccard", "VKR"]
methods = ["SKR", "KRR", "VKR", "Naive", "LNSM_RLN", "LNSM_jaccard"]
# methods = ["SKR", "KR", "KRR", "Naive", "LNSM_RLN", "LNSM_jaccard", "VKR", "SVM", "OCCA", "SCCA", "RF", "BRF"]
tuning_metrices=["AUROC", "AUPR", "AUROCperdrug", "AUPRperdrug"]
metrice_names = ["AUPR+AUROC", "AUPR+AUROCperdrug", "AUROC", "AUPR", "AUROCperdrug", "AUPRperdrug"]

In [77]:
A = 10**np.arange(-2, 3, 1, dtype=float)
B = np.arange(0.1, 1, 0.1, dtype=float)
C = np.arange(5, 20, 5, dtype=int)
A10 = 10**np.arange(1, 2, 1, dtype=float)
A100 = 10**np.arange(2, 3, 1, dtype=float)
all_hyperparlist = {
    "SKR":[A,B,A10,A100], 
    # "KR":[A,A], 
    "KRR":[A,A],
    "VKR":[A,A,C], 
    "Naive":[], 
    "LNSM_RLN":[B,A], 
    "LNSM_jaccard":[B], 
    # "SVM":[A,A,A], 
    # "OCCA":[], 
    # "SCCA":[A], 
    # "RF":[C], 
    # "BRF":[C]
}

In [78]:
SEs_name = "OFFSIDES"
metrice = "AUPR"

In [79]:
hyperpars = {}
hyperpars["nested_cv"] = {}
hyperpars["cv"] = {}
hyperparsOut = {}
hyperparsOut["nested_cv"] = {}
hyperparsOut["cv"] = {}
results = {}
results["nested_cv"] = {}
results["cv"] = {}

In [80]:
hyperpars["nested_cv"]["SKR"] = {}
hyperpars["nested_cv"]["SKR"]["target"] = [
    (0.01, 0.6, 10, 100),
    (0.01, 0.6, 10, 100),
    (0.01, 0.6, 10, 100),
    (0.01, 0.6, 10, 100),
    (0.01, 0.5, 10, 100),
    ]
hyperpars["nested_cv"]["SKR"]["enzyme"] = [
    (1, 0.9, 10, 100),
    (1, 0.1, 10, 100),
    (1, 0.1, 10, 100),
    (1, 0.1, 10, 100),
    (1, 0.1, 10, 100),
    ]
hyperpars["nested_cv"]["SKR"]["Chem"] = [
    (1, 0.6, 10, 100),
    (1, 0.5, 10, 100),
    (1, 0.5, 10, 100),
    (10, 0.2, 10, 100),
    (10, 0.1, 10, 100),
    ]
hyperpars["nested_cv"]["SKR"]["DGI"] = [
    (0.1, 0.1, 10, 100),
    (0.1, 0.1, 10, 100),
    (0.1, 0.1, 10, 100),
    (0.1, 0.1, 10, 100),
    (0.1, 0.1, 10, 100),
    ]
hyperpars["nested_cv"]["SKR"]["transporter"] = [
    (0.01, 0.6, 10, 100),
    (0.01, 0.6, 10, 100),
    (0.01, 0.6, 10, 100),
    (0.01, 0.6, 10, 100),
    (0.01, 0.6, 10, 100),
    ]
hyperpars["nested_cv"]["SKR"]["pathway"] = [
    (0.01, 0.4, 10, 100),
    (0.01, 0.4, 10, 100),
    (0.01, 0.3, 10, 100),
    (0.01, 0.4, 10, 100),
    (0.01, 0.4, 10, 100),
    ]
hyperpars["nested_cv"]["SKR"]["indication"] = [
    (0.1, 0.1, 10, 100),
    (0.1, 0.1, 10, 100),
    (0.1, 0.1, 10, 100),
    (0.1, 0.1, 10, 100),
    (0.1, 0.1, 10, 100),
    ]
hyperpars["cv"]["SKR"] = {}
hyperpars["cv"]["SKR"]["target"] = (100, 0.1, 10, 100)
hyperpars["cv"]["SKR"]["enzyme"] = (1, 0.9, 10, 100)
hyperpars["cv"]["SKR"]["Chem"] = (1, 0.9, 10, 100)
hyperpars["cv"]["SKR"]["DGI"] =(0.1, 0.9, 10, 100)
hyperpars["cv"]["SKR"]["transporter"] = (0.01, 0.9, 10, 100)
hyperpars["cv"]["SKR"]["pathway"] = (100, 0.9, 10, 100)
hyperpars["cv"]["SKR"]["indication"] = (0.1, 0.9, 10, 100)


In [81]:
hyperpars["nested_cv"]["KRR"] = {}
hyperpars["nested_cv"]["KRR"]["target"] = [
    (1, 10),
    (1, 10),
    (1, 10),
    (1, 10),
    (0.1, 10),
    ]
hyperpars["nested_cv"]["KRR"]["enzyme"] = [
    (1, 10),
    (1, 10),
    (1, 10),
    (1, 10),
    (1, 10),
    ]
hyperpars["nested_cv"]["KRR"]["Chem"] = [
    (10, 10),
    (10, 10),
    (10, 10),
    (10, 10),
    (10, 10),
    ]
hyperpars["nested_cv"]["KRR"]["DGI"] = [
    (0.1, 10),
    (0.1, 10),
    (0.1, 10),
    (0.1, 10),
    (0.1, 10),
    ]
hyperpars["nested_cv"]["KRR"]["transporter"] = [
    (0.1, 10),
    (1, 10),
    (1, 10),
    (0.01, 10),
    (0.1, 10),
    ]
hyperpars["nested_cv"]["KRR"]["pathway"] = [
    (0.1, 10),
    (0.01, 10),
    (0.1, 10),
    (0.01, 10),
    (0.1, 10),
    ]
hyperpars["nested_cv"]["KRR"]["indication"] = [
    (0.1, 10),
    (0.1, 10),
    (0.1, 10),
    (0.1, 10),
    (0.1, 10),
    ]
hyperpars["cv"]["KRR"] = {}
hyperpars["cv"]["KRR"]["target"] = (100, 10)
hyperpars["cv"]["KRR"]["enzyme"] = (100, 10)
hyperpars["cv"]["KRR"]["Chem"] = (0.1, 100)
hyperpars["cv"]["KRR"]["DGI"] = (0.1, 100)
hyperpars["cv"]["KRR"]["transporter"] = (0.01, 100)
hyperpars["cv"]["KRR"]["pathway"] = (100, 100)
hyperpars["cv"]["KRR"]["indication"] = (1, 100)

In [82]:
hyperpars["nested_cv"]["Naive"] = {}
hyperpars["nested_cv"]["Naive"]["target"] = [
    (),
    (),
    (),
    (),
    (),
    ]
hyperpars["nested_cv"]["Naive"]["enzyme"] = [
    (),
    (),
    (),
    (),
    (),
    ]
hyperpars["nested_cv"]["Naive"]["Chem"] = [
    (),
    (),
    (),
    (),
    (),
    ]
hyperpars["nested_cv"]["Naive"]["DGI"] = [
    (),
    (),
    (),
    (),
    (),
    ]
hyperpars["nested_cv"]["Naive"]["transporter"] = [
    (),
    (),
    (),
    (),
    (),
    ]
hyperpars["nested_cv"]["Naive"]["pathway"] = [
    (),
    (),
    (),
    (),
    (),
    ]
hyperpars["nested_cv"]["Naive"]["indication"] = [
    (),
    (),
    (),
    (),
    (),
    ]
hyperpars["cv"]["Naive"] = {}
hyperpars["cv"]["Naive"]["target"] = ()
hyperpars["cv"]["Naive"]["enzyme"] = ()
hyperpars["cv"]["Naive"]["Chem"] =()
hyperpars["cv"]["Naive"]["DGI"] = ()
hyperpars["cv"]["Naive"]["transporter"] = ()
hyperpars["cv"]["Naive"]["pathway"] = ()
hyperpars["cv"]["Naive"]["indication"] = ()

In [83]:
hyperpars["nested_cv"]["VKR"] = {}
hyperpars["nested_cv"]["VKR"]["target"] = [
    (10, 0.1, 15),
    (10, 0.01, 15),
    (10, 0.1, 15),
    (10, 0.1, 15),
    (10, 0.01, 15),
    ]
hyperpars["nested_cv"]["VKR"]["enzyme"] = [
    (10, 1, 15),
    (10, 0.01, 5),
    (10, 0.01, 10),
    (10, 1, 10),
    (10, 1, 10),
    ]
hyperpars["nested_cv"]["VKR"]["Chem"] = [
    (10, 10, 15),
    (10, 1, 15),
    (10, 1, 15),
    (10, 10, 15),
    (10, 10, 15),
    ]
hyperpars["nested_cv"]["VKR"]["DGI"] = [
    (10, 0.01, 15),
    (10, 0.1, 15),
    (10, 0.1, 15),
    (10, 0.1, 15),
    (10, 0.1, 15),
    ]
hyperpars["nested_cv"]["VKR"]["transporter"] = [
    (10, 0.1, 10),
    (10, 0.1, 15),
    (10, 0.1, 10),
    (10, 0.01, 10),
    (10, 0.01, 5),
    ]
hyperpars["nested_cv"]["VKR"]["pathway"] = [
    (10, 0.01, 15),
    (10, 0.01, 15),
    (10, 0.01, 15),
    (10, 0.01, 15),
    (10, 0.01, 15),
    ]
hyperpars["nested_cv"]["VKR"]["indication"] = [
    (10, 0.01, 15),
    (10, 0.1, 15),
    (10, 0.1, 15),
    (10, 0.01, 15),
    (10, 0.01, 15),
    ]
hyperpars["cv"]["VKR"] = {}
hyperpars["cv"]["VKR"]["target"] = (10, 100, 15)
hyperpars["cv"]["VKR"]["enzyme"] = (10, 100, 15)
hyperpars["cv"]["VKR"]["Chem"] =(100, 0.1, 15)
hyperpars["cv"]["VKR"]["DGI"] = (100, 0.01, 15)
hyperpars["cv"]["VKR"]["transporter"] = (100, 0.01, 15)
hyperpars["cv"]["VKR"]["pathway"] = (100, 100, 15)
hyperpars["cv"]["VKR"]["indication"] = (100, 100, 15)

In [84]:
hyperpars["nested_cv"]["LNSM_RLN"] = {}
hyperpars["nested_cv"]["LNSM_RLN"]["target"] = [
    (0.1, 1), 
    (0.1, 1), 
    (0.1, 1), 
    (0.1, 1), 
    (0.1, 1)
    ]
hyperpars["nested_cv"]["LNSM_RLN"]["enzyme"] = [
    (0.1, 10),
    (0.1, 10),
    (0.1, 10),
    (0.1, 100),
    (0.1, 100)
    ]
hyperpars["nested_cv"]["LNSM_RLN"]["Chem"] = [
    (0.9, 100),
    (0.9, 100),
    (0.4, 100),
    (0.4, 100),
    (0.4, 100)
    ]
hyperpars["nested_cv"]["LNSM_RLN"]["DGI"] = [
    (0.3, 100),
    (0.3, 100),
    (0.3, 100),
    (0.3, 100),
    (0.3, 100)
    ]
hyperpars["nested_cv"]["LNSM_RLN"]["transporter"] = [
    (0.1, 10),
    (0.2, 10),
    (0.1, 10),
    (0.1, 10),
    (0.1, 10)
    ]
hyperpars["nested_cv"]["LNSM_RLN"]["pathway"] = [
    (0.1, 10), 
    (0.1, 10), 
    (0.1, 10), 
    (0.1, 10), 
    (0.1, 10)
    ]
hyperpars["nested_cv"]["LNSM_RLN"]["indication"] = [
    (0.7, 100),
    (0.7, 100),
    (0.7, 100),
    (0.8, 100),
    (0.8, 100)
    ]
hyperpars["cv"]["LNSM_RLN"] = {}
hyperpars["cv"]["LNSM_RLN"]["target"] = (0.1, 10)
hyperpars["cv"]["LNSM_RLN"]["enzyme"] = (0.1, 100)
hyperpars["cv"]["LNSM_RLN"]["Chem"] = (0.8, 100)
hyperpars["cv"]["LNSM_RLN"]["DGI"] = (0.2, 100)
hyperpars["cv"]["LNSM_RLN"]["transporter"] = (0.1, 10)
hyperpars["cv"]["LNSM_RLN"]["pathway"] = (0.1, 100)
hyperpars["cv"]["LNSM_RLN"]["indication"] = (0.5, 100)

In [85]:
hyperpars["nested_cv"]["LNSM_jaccard"] = {}
hyperpars["nested_cv"]["LNSM_jaccard"]["target"] = [
    (0.1,), 
    (0.2,), 
    (0.1,), 
    (0.1,), 
    (0.3,)
    ]
hyperpars["nested_cv"]["LNSM_jaccard"]["enzyme"] = [
    (0.8,), 
    (0.4,), 
    (0.7,), 
    (0.1,), 
    (0.9,)
    ]
hyperpars["nested_cv"]["LNSM_jaccard"]["Chem"] = [
    (0.1,), 
    (0.8,), 
    (0.3,), 
    (0.4,), 
    (0.8,)
    ]
hyperpars["nested_cv"]["LNSM_jaccard"]["DGI"] = [
    (0.2,), 
    (0.1,), 
    (0.1,), 
    (0.1,), 
    (0.1,)
    ]
hyperpars["nested_cv"]["LNSM_jaccard"]["transporter"] = [
    (0.7,),
    (0.3,),
    (0.3,),
    (0.6,),
    (0.4,)
    ]
hyperpars["nested_cv"]["LNSM_jaccard"]["pathway"] = [
    (0.1,), 
    (0.1,), 
    (0.1,), 
    (0.1,), 
    (0.1,)
    ]
hyperpars["nested_cv"]["LNSM_jaccard"]["indication"] = [
    (0.1,), 
    (0.1,), 
    (0.1,), 
    (0.1,), 
    (0.1,)
    ]
hyperpars["cv"]["LNSM_jaccard"] = {}
hyperpars["cv"]["LNSM_jaccard"]["target"] = (0.1,)
hyperpars["cv"]["LNSM_jaccard"]["enzyme"] = (0.4,)
hyperpars["cv"]["LNSM_jaccard"]["Chem"] = (0.8,)
hyperpars["cv"]["LNSM_jaccard"]["DGI"] = (0.1,)
hyperpars["cv"]["LNSM_jaccard"]["transporter"] = (0.4,)
hyperpars["cv"]["LNSM_jaccard"]["pathway"] = (0.1,)
hyperpars["cv"]["LNSM_jaccard"]["indication"] = (0.4,)

In [86]:

# # Open and read the JSON file
# with open(f'results/hyperpars_{SEs_name}.xml', 'r') as xml_file:
#     hyperpars = json.load(xml_file)

In [87]:
for method in methods:
    # # method = "SKR"
    validation = "nested_cv"
    hyperparsOut[validation][method] = {}
    results[validation][method] = {}
    for str in features_names:
        print(f"using feature {str}")
        hyperparList = loadHyperpar(*all_hyperparlist[method],method_option=method)
        results[validation][method][str], hyperparsOut[validation][method][str] = evaluation(Y=SEs[SEs_name], X=features_dict[str], method_option=method,tuning_metrice=metrice,hyperparList=hyperparList,hyperparfixed=hyperpars[validation][method][str],Validation=validation,n_jobs=1)

    # method = "SKR"
    validation = "cv"
    hyperparsOut[validation][method] = {}
    results[validation][method] = {}
    for str in features_names:
        print(f"using feature {str}")
        hyperparList = loadHyperpar(*all_hyperparlist[method],method_option=method)
        results[validation][method][str], hyperparsOut[validation][method][str] = evaluation(Y=SEs[SEs_name], X=features_dict[str], method_option=method,tuning_metrice=metrice,hyperparList=hyperparList,hyperparfixed=hyperpars[validation][method][str],Validation=validation,n_jobs=1)

using feature target
The SKR requires hyperparameter lambda, c, sigma_X, sigma_Y
---------- nested cv start ----------
Fold: 0
number of hyperpars combination:  45
first few training idx:  [ 35 107 184 194 251 328 410 461 578 653]
first few testing idx:  [224 236 273 301 306 322 406 434 435 585]
--- tuning end ---
target size: 133
------ best hyper pars:  (0.01, 0.6, 10, 100) ------
SKR starts:
SKR ends:
-----------
AUPRperdrug: 0.06660713799659151
AUROCperdrug: 0.763499214292935
AUPR+AUROCperdrug: 0.8301063522895265
AUPR: 0.05529583154927727
AUROC: 0.7158406525058227
AUPR+AUROC: 0.7711364840551
-----------
Fold: 1
number of hyperpars combination:  45
first few training idx:  [224 236 273 301 306 322 406 434 435 585]
first few testing idx:  [ 35 107 184 194 251 328 410 461 578 653]
--- tuning end ---
target size: 133
------ best hyper pars:  (0.01, 0.6, 10, 100) ------
SKR starts:
SKR ends:
-----------
AUPRperdrug: 0.045205174465070705
AUROCperdrug: 0.7311265260518489
AUPR+AUROCperdrug

In [88]:
with open(f'results/hyperpars_{SEs_name}.xml', 'w') as xml_file:
   json.dump(hyperparsOut, xml_file)
with open(f'results/results_{SEs_name}_{filter}.xml', 'w') as xml_file:
   json.dump(results, xml_file)

In [89]:
with open(f'results/results_{SEs_name}_{filter}.xml', 'r') as xml_file:
    results = json.load(xml_file)

In [90]:
df = pd.DataFrame()
for m, fs in results["nested_cv"].items():
    for f, mes in fs.items():
        for me, scores in mes.items():
            temp_df = pd.DataFrame({
                'method': m,
                'feature': f,
                'metric': me,
                "score": scores
            })
            df = pd.concat([df, temp_df], ignore_index=True)

custom_order = ["pathway","Chem", "DGI",  "indication", "target", "transporter", "enzyme"]
df['feature'] = pd.Categorical(df['feature'], categories=custom_order, ordered=True)
df['method'] = pd.Categorical(df['method'], categories=methods, ordered=True)
df['metric'] = pd.Categorical(df['metric'], categories=metrice_names, ordered=True)
df2 = pd.pivot_table(df, values=['score'], index=["feature", "method"], aggfunc={'score': ["mean","std"]}, columns=["metric"])
df3 = df2.sort_index(axis=1, level='metric').sort_index(level='feature')
df3.to_excel(f'results/nested_cv_results_{SEs_name}_{filter}.xlsx')
df3

Unnamed: 0_level_0,Unnamed: 1_level_0,score,score,score,score,score,score,score,score,score,score,score,score
Unnamed: 0_level_1,Unnamed: 1_level_1,mean,std,mean,std,mean,std,mean,std,mean,std,mean,std
Unnamed: 0_level_2,metric,AUPR+AUROC,AUPR+AUROC,AUPR+AUROCperdrug,AUPR+AUROCperdrug,AUROC,AUROC,AUPR,AUPR,AUROCperdrug,AUROCperdrug,AUPRperdrug,AUPRperdrug
feature,method,Unnamed: 2_level_3,Unnamed: 3_level_3,Unnamed: 4_level_3,Unnamed: 5_level_3,Unnamed: 6_level_3,Unnamed: 7_level_3,Unnamed: 8_level_3,Unnamed: 9_level_3,Unnamed: 10_level_3,Unnamed: 11_level_3,Unnamed: 12_level_3,Unnamed: 13_level_3
pathway,SKR,0.814269,0.018117,0.871404,0.013377,0.74111,0.020481,0.073159,0.005693,0.786743,0.01138,0.084662,0.00863
pathway,KRR,0.805827,0.042154,0.862769,0.029783,0.731458,0.04129,0.074368,0.01148,0.778234,0.032396,0.084535,0.009446
pathway,VKR,0.820921,0.027405,0.805101,0.018085,0.75538,0.018511,0.065542,0.011021,0.747522,0.012183,0.057579,0.009267
pathway,Naive,0.767696,0.010109,0.804008,0.00804,0.728631,0.003547,0.039066,0.00703,0.761394,0.006766,0.042614,0.007225
pathway,LNSM_RLN,0.808951,0.024936,0.825698,0.013919,0.734855,0.017131,0.074096,0.013623,0.746576,0.007665,0.079122,0.007145
pathway,LNSM_jaccard,0.568672,0.059902,0.613726,0.047646,0.5381,0.058566,0.030572,0.007232,0.561908,0.039101,0.051819,0.010791
Chem,SKR,0.754919,0.004893,0.799146,0.009303,0.7148,0.012575,0.04012,0.011374,0.75208,0.014257,0.047067,0.00738
Chem,KRR,0.760468,0.01133,0.799096,0.007808,0.721034,0.016006,0.039434,0.008374,0.754171,0.00852,0.044925,0.007499
Chem,VKR,0.748777,0.023003,0.763515,0.021771,0.710901,0.024292,0.037876,0.013467,0.724511,0.02578,0.039004,0.006293
Chem,Naive,0.768181,0.008198,0.791657,0.010408,0.732247,0.01257,0.035934,0.007251,0.754566,0.007614,0.037091,0.007712


In [91]:
df = pd.DataFrame()
for m, fs in results["cv"].items():
    for f, mes in fs.items():
        for me, scores in mes.items():
            temp_df = pd.DataFrame({
                'method': m,
                'feature': f,
                'metric': me,
                "score": scores
            },index=["1"])
            df = pd.concat([df, temp_df], ignore_index=True)
df['feature'] = pd.Categorical(df['feature'], categories=custom_order, ordered=True)
df['method'] = pd.Categorical(df['method'], categories=methods, ordered=True)
df['metric'] = pd.Categorical(df['metric'], categories=metrice_names, ordered=True)
df2 = pd.pivot_table(df, values=['score'], index=["feature", "method"], columns="metric")
df2.to_excel(f'results/cv_results_{SEs_name}_{filter}.xlsx')
df2

Unnamed: 0_level_0,Unnamed: 1_level_0,score,score,score,score,score,score
Unnamed: 0_level_1,metric,AUPR+AUROC,AUPR+AUROCperdrug,AUROC,AUPR,AUROCperdrug,AUPRperdrug
feature,method,Unnamed: 2_level_2,Unnamed: 3_level_2,Unnamed: 4_level_2,Unnamed: 5_level_2,Unnamed: 6_level_2,Unnamed: 7_level_2
pathway,SKR,0.767661,0.813291,0.717659,0.050001,0.760587,0.052704
pathway,KRR,0.767345,0.811571,0.717364,0.049981,0.759049,0.052522
pathway,VKR,0.763138,0.778752,0.713824,0.049315,0.727959,0.050793
pathway,Naive,0.770548,0.810164,0.719736,0.050811,0.757912,0.052252
pathway,LNSM_RLN,0.771845,0.77071,0.685184,0.08666,0.67627,0.09444
pathway,LNSM_jaccard,0.634257,0.625775,0.584585,0.049671,0.555283,0.070492
Chem,SKR,0.798219,0.822046,0.74191,0.056309,0.766944,0.055103
Chem,KRR,0.780916,0.805757,0.735871,0.045044,0.758818,0.046939
Chem,VKR,0.77015,0.770245,0.727403,0.042748,0.725953,0.044292
Chem,Naive,0.773802,0.797056,0.730914,0.042888,0.752628,0.044427


In [92]:
df = pd.DataFrame()
for m, fs in results["nested_cv"].items():
    for f, mes in fs.items():
        for me, scores in mes.items():
            temp_df = pd.DataFrame({
                'method': m,
                'feature': f,
                'metric': me,
            }, index=["1"])
            temp_df2 = pd.concat([temp_df, pd.DataFrame(scores, columns=["1"]).T], axis=1)
            df = pd.concat([df, temp_df2], ignore_index=True)
for m in metrice_names:
    for f in features_names:
        df2 = df[(df["metric"] == m) & (df["feature"] == f)]
        df3 = df2.iloc[:, np.array([0, 3, 4, 5, 6, 7])]
        df4 = df3.set_index(df3.columns[0])
        df5 = df4.T.ptests(paired=True, stars=False)
        df5.to_excel(f'results/pvalue_{SEs_name}_{filter}_{f}_{m}.xlsx')