In [None]:
from itertools import product
import numpy as np
import pandas as pd
from ADRprofilePrediction import Pairs2Mat, evaluation
from Models import loadHyperpar
import seaborn as sns 
import matplotlib.pylab as plt
from matplotlib.ticker import MultipleLocator
import matplotlib.patheffects as path_effects
import json
import pingouin as pg

In [None]:
import sklearn
print(sklearn.__version__)

In [None]:
features_dict = {
    "target":Pairs2Mat(path="data/drug_target.tsv",colname1="0",colname2="1"),
    "enzyme":Pairs2Mat(path="data/drug_enzyme.tsv",colname1="0",colname2="1"),
    "Chem":pd.read_csv("data/drug_chemsfp.tsv",sep = "\t",header=0,index_col=0),
    "DGI":Pairs2Mat(path="data/interactions.tsv",colname1="drug_claim_name",colname2="gene_name"),
    "transporter":Pairs2Mat(path="data/drug_transporter.tsv",colname1="0",colname2="1"),
    "pathway":Pairs2Mat(path="data/drug_pathway.tsv",colname1="0",colname2="1"),
    "indication":Pairs2Mat(path="data/drug_indication.tsv",colname1="1_x",colname2="6")
}


In [None]:
filter = "rare"
SEs = {}
if filter == "all":
    SIDER = Pairs2Mat(path="data/drug_se.tsv",colname1="1_x",colname2="5")
    column_sums = np.sum(SIDER, axis=0)
    SEs["SIDER"] = SIDER.loc[:, (column_sums >= 5)]

    OFFSIDERS = Pairs2Mat(path="data/OFFSIDES.csv",colname1="drug_concept_name",colname2="condition_concept_name",sep = ",")
    column_sums = np.sum(OFFSIDERS, axis=0)
    SEs["OFFSIDES"] = OFFSIDERS.loc[:, column_sums >= 5]
elif filter == "rare":
    SIDER = Pairs2Mat(path="data/drug_se.tsv",colname1="1_x",colname2="5")
    column_sums = np.sum(SIDER, axis=0)
    SEs["SIDER"] = SIDER.loc[:, (column_sums < 50)]

    OFFSIDERS = Pairs2Mat(path="data/OFFSIDES.csv",colname1="drug_concept_name",colname2="condition_concept_name",sep = ",")
    column_sums = np.sum(OFFSIDERS, axis=0)
    SEs["OFFSIDES"] = OFFSIDERS.loc[:, column_sums < 50]



In [None]:
SEs["SIDER"].shape

In [None]:
SEs["OFFSIDES"].shape

In [None]:
features_names = ["target", "enzyme", "Chem", "DGI", "transporter", "pathway", "indication"]
# SEs_names = ["SIDER", "OFFSIDES"]
# methods = ["SKR", "KR", "KRR", "Naive", "LNSM_RLN", "LNSM_jaccard", "VKR"]
methods = ["SKR", "KRR", "VKR", "Naive", "LNSM_RLN", "LNSM_jaccard"]
# methods = ["SKR", "KR", "KRR", "Naive", "LNSM_RLN", "LNSM_jaccard", "VKR", "SVM", "OCCA", "SCCA", "RF", "BRF"]
tuning_metrices=["AUROC", "AUPR", "AUROCperdrug", "AUPRperdrug"]
metrice_names = ["AUPR+AUROC", "AUPR+AUROCperdrug", "AUROC", "AUPR", "AUROCperdrug", "AUPRperdrug"]

In [None]:
A = 10**np.arange(-2, 3, 1, dtype=float)
B = np.arange(0.1, 1, 0.1, dtype=float)
C = np.arange(5, 20, 5, dtype=int)
A10 = 10**np.arange(1, 2, 1, dtype=float)
A100 = 10**np.arange(2, 3, 1, dtype=float)
all_hyperparlist = {
    "SKR":[A,B,A10,A100], 
    # "KR":[A,A], 
    "KRR":[A,A],
    "VKR":[A,A,C], 
    "Naive":[], 
    "LNSM_RLN":[B,A], 
    "LNSM_jaccard":[B], 
    # "SVM":[A,A,A], 
    # "OCCA":[], 
    # "SCCA":[A], 
    # "RF":[C], 
    # "BRF":[C]
}

In [None]:
SEs_name = "SIDER"
metrice = "AUPR"

In [None]:
hyperpars = {}
hyperpars["nested_cv"] = {}
hyperpars["cv"] = {}
hyperparsOut = {}
hyperparsOut["nested_cv"] = {}
hyperparsOut["cv"] = {}
results = {}
results["nested_cv"] = {}
results["cv"] = {}

In [None]:
# hyperpars["nested_cv"]["SKR"] = {}
# hyperpars["nested_cv"]["SKR"]["target"] = [
#     (0.01, 0.4, 10, 100),
#     (0.01, 0.4, 10, 100),
#     (0.01, 0.4, 10, 100),
#     (0.01, 0.5, 10, 100),
#     (0.01, 0.4, 10, 100),
#     ]
# hyperpars["nested_cv"]["SKR"]["enzyme"] = [
#     (0.01, 0.6, 10, 100),
#     (1, 0.8, 10, 100),
#     (1, 0.2, 10, 100),
#     (1, 0.2, 10, 100),
#     (1, 0.2, 10, 100),
#     ]
# hyperpars["nested_cv"]["SKR"]["Chem"] = [
#     (1, 0.3, 10, 100),
#     (1, 0.3, 10, 100),
#     (1, 0.3, 10, 100),
#     (1, 0.3, 10, 100),
#     (1, 0.3, 10, 100),
#     ]
# hyperpars["nested_cv"]["SKR"]["DGI"] = [
#     (0.1, 0.2, 10, 100),
#     (0.1, 0.2, 10, 100),
#     (0.1, 0.2, 10, 100),
#     (0.1, 0.1, 10, 100),
#     (0.1, 0.2, 10, 100),
#     ]
# # hyperpars["nested_cv"]["SKR"]["transporter"] = [
# #     (1, 0.3, 10, 1),
# #     (1, 0.1, 10, 100),
# #     (0.1, 0.5, 10, 100),
# #     (0.1, 0.5, 10, 100),
# #     (0.1, 0.5, 10, 100),
# #     ]
# hyperpars["nested_cv"]["SKR"]["transporter"] = [
#     (1, 0.1, 10, 100),
#     (1, 0.1, 10, 100),
#     (0.1, 0.5, 10, 100),
#     (0.1, 0.5, 10, 100),
#     (0.1, 0.5, 10, 100),
#     ]
# hyperpars["nested_cv"]["SKR"]["pathway"] = [
#     (0.01, 0.3, 10, 100),
#     (0.01, 0.3, 10, 100),
#     (0.01, 0.3, 10, 100),
#     (0.01, 0.3, 10, 100),
#     (0.01, 0.3, 10, 100),
#     ]
# hyperpars["nested_cv"]["SKR"]["indication"] = [
#     (0.1, 0.1, 10, 100),
#     (0.1, 0.1, 10, 100),
#     (0.1, 0.1, 10, 100),
#     (0.1, 0.1, 10, 100),
#     (0.1, 0.1, 10, 100),
#     ]
# # hyperpars["cv"]["SKR"] = {}
# # hyperpars["cv"]["SKR"]["target"] = (10, 0.9, 10, 100)
# # hyperpars["cv"]["SKR"]["enzyme"] = (1, 0.9, 10, 100)
# # hyperpars["cv"]["SKR"]["Chem"] = (0.01, 0.7, 100, 100)
# # hyperpars["cv"]["SKR"]["DGI"] =(0.1, 0.4, 100, 100)
# # hyperpars["cv"]["SKR"]["transporter"] = (10, 0.9, 10, 10)
# # hyperpars["cv"]["SKR"]["pathway"] = (0.1, 0.4, 100, 10)
# # hyperpars["cv"]["SKR"]["indication"] = (1, 0.4, 100, 10)
# hyperpars["cv"]["SKR"] = {}
# hyperpars["cv"]["SKR"]["target"] = (10, 0.9, 10, 100)
# hyperpars["cv"]["SKR"]["enzyme"] = (1, 0.9, 10, 100)
# hyperpars["cv"]["SKR"]["Chem"] = (1, 0.9, 10, 100)
# hyperpars["cv"]["SKR"]["DGI"] =(1, 0.9, 10, 100)
# hyperpars["cv"]["SKR"]["transporter"] = (100, 0.9, 10, 100)
# hyperpars["cv"]["SKR"]["pathway"] = (100, 0.5, 10, 100)
# hyperpars["cv"]["SKR"]["indication"] = (1, 0.9, 10, 100)

In [None]:
# hyperpars["nested_cv"]["KRR"] = {}
# hyperpars["nested_cv"]["KRR"]["target"] = [
#     (0.1, 10),
#     (0.1, 10),
#     (0.1, 10),
#     (0.1, 10),
#     (0.1, 10),
#     ]
# hyperpars["nested_cv"]["KRR"]["enzyme"] = [
#     (1, 10),
#     (10, 10),
#     (1, 10),
#     (1, 10),
#     (1, 10),
#     ]
# hyperpars["nested_cv"]["KRR"]["Chem"] = [
#     (1, 10),
#     (1, 10),
#     (1, 10),
#     (1, 10),
#     (1, 10),
#     ]
# hyperpars["nested_cv"]["KRR"]["DGI"] = [
#     (0.1, 10),
#     (0.1, 10),
#     (0.1, 10),
#     (0.1, 10),
#     (0.1, 10),
#     ]
# hyperpars["nested_cv"]["KRR"]["transporter"] = [
#     (1, 10),
#     (1, 10),
#     (1, 10),
#     (1, 10),
#     (1, 10),
#     ]
# hyperpars["nested_cv"]["KRR"]["pathway"] = [
#     (0.01, 10),
#     (0.01, 10),
#     (0.01, 10),
#     (0.01, 10),
#     (0.01, 10),
#     ]
# hyperpars["nested_cv"]["KRR"]["indication"] = [
#     (0.1, 10),
#     (0.1, 10),
#     (0.1, 10),
#     (0.1, 10),
#     (0.1, 10),
#     ]
# hyperpars["cv"]["KRR"] = {}
# hyperpars["cv"]["KRR"]["target"] = (100, 10)
# hyperpars["cv"]["KRR"]["enzyme"] = (100, 10)
# hyperpars["cv"]["KRR"]["Chem"] = (0.1, 100)
# hyperpars["cv"]["KRR"]["DGI"] = (0.1, 100)
# hyperpars["cv"]["KRR"]["transporter"] = (100, 100)
# hyperpars["cv"]["KRR"]["pathway"] = (0.1, 100)
# hyperpars["cv"]["KRR"]["indication"] = (100, 100)

In [None]:
# hyperpars["nested_cv"]["Naive"] = {}
# hyperpars["nested_cv"]["Naive"]["target"] = [
#     (),
#     (),
#     (),
#     (),
#     (),
#     ]
# hyperpars["nested_cv"]["Naive"]["enzyme"] = [
#     (),
#     (),
#     (),
#     (),
#     (),
#     ]
# hyperpars["nested_cv"]["Naive"]["Chem"] = [
#     (),
#     (),
#     (),
#     (),
#     (),
#     ]
# hyperpars["nested_cv"]["Naive"]["DGI"] = [
#     (),
#     (),
#     (),
#     (),
#     (),
#     ]
# hyperpars["nested_cv"]["Naive"]["transporter"] = [
#     (),
#     (),
#     (),
#     (),
#     (),
#     ]
# hyperpars["nested_cv"]["Naive"]["pathway"] = [
#     (),
#     (),
#     (),
#     (),
#     (),
#     ]
# hyperpars["nested_cv"]["Naive"]["indication"] = [
#     (),
#     (),
#     (),
#     (),
#     (),
#     ]
# hyperpars["cv"]["Naive"] = {}
# hyperpars["cv"]["Naive"]["target"] = ()
# hyperpars["cv"]["Naive"]["enzyme"] = ()
# hyperpars["cv"]["Naive"]["Chem"] =()
# hyperpars["cv"]["Naive"]["DGI"] = ()
# hyperpars["cv"]["Naive"]["transporter"] = ()
# hyperpars["cv"]["Naive"]["pathway"] = ()
# hyperpars["cv"]["Naive"]["indication"] = ()

In [None]:
# hyperpars["nested_cv"]["VKR"] = {}
# hyperpars["nested_cv"]["VKR"]["target"] = [
#     (10, 0.1, 15),
#     (10, 0.01, 15),
#     (10, 0.01, 15),
#     (10, 0.01, 15),
#     (10, 0.01, 15),
#     ]
# hyperpars["nested_cv"]["VKR"]["enzyme"] = [
#     (10, 1, 15),
#     (10, 1, 15),
#     (10, 1, 15),
#     (10, 1, 15),
#     (10, 1, 15),
#     ]
# hyperpars["nested_cv"]["VKR"]["Chem"] = [
#     (10, 1, 15),
#     (10, 1, 15),
#     (10, 1, 15),
#     (10, 1, 15),
#     (10, 1, 15),
#     ]
# hyperpars["nested_cv"]["VKR"]["DGI"] = [
#     (10, 0.1, 15),
#     (10, 0.1, 10),
#     (10, 0.1, 15),
#     (10, 0.01, 15),
#     (10, 0.1, 15),
#     ]
# hyperpars["nested_cv"]["VKR"]["transporter"] = [
#     (10, 1, 15),
#     (10, 1, 15),
#     (10, 1, 15),
#     (10, 0.1, 15),
#     (10, 0.1, 15),
#     ]
# hyperpars["nested_cv"]["VKR"]["pathway"] = [
#     (10, 0.01, 15),
#     (10, 0.01, 15),
#     (10, 0.01, 15),
#     (10, 0.01, 15),
#     (10, 0.01, 15),
#     ]
# hyperpars["nested_cv"]["VKR"]["indication"] = [
#     (10, 0.01, 15),
#     (10, 0.01, 15),
#     (10, 0.01, 15),
#     (10, 0.01, 15),
#     (10, 0.01, 15),
#     ]
# hyperpars["cv"]["VKR"] = {}
# hyperpars["cv"]["VKR"]["target"] = (10, 100, 10)
# hyperpars["cv"]["VKR"]["enzyme"] = (10, 100, 10)
# hyperpars["cv"]["VKR"]["Chem"] =(100, 0.01, 10)
# hyperpars["cv"]["VKR"]["DGI"] = (100, 0.1, 10)
# hyperpars["cv"]["VKR"]["transporter"] = (100, 0.1, 15)
# hyperpars["cv"]["VKR"]["pathway"] = (100, 10, 15)
# hyperpars["cv"]["VKR"]["indication"] = (100, 1, 15)

In [None]:
# hyperpars["nested_cv"]["LNSM_RLN"] = {}
# hyperpars["nested_cv"]["LNSM_RLN"]["target"] = [
#     (0.2, 10), 
#     (0.2, 10), 
#     (0.2, 10), 
#     (0.2, 10), 
#     (0.2, 10)
#     ]
# hyperpars["nested_cv"]["LNSM_RLN"]["enzyme"] = [
#     (0.1, 100),
#     (0.1, 100),
#     (0.1, 100),
#     (0.1, 100),
#     (0.1, 10)
#     ]
# hyperpars["nested_cv"]["LNSM_RLN"]["Chem"] = [
#     (0.6, 100),
#     (0.1, 100),
#     (0.6, 100),
#     (0.1, 100),
#     (0.6, 100)
#     ]
# hyperpars["nested_cv"]["LNSM_RLN"]["DGI"] = [
#     (0.2, 10),
#     (0.2, 10),
#     (0.3, 100),
#     (0.2, 10),
#     (0.2, 10)
#     ]
# hyperpars["nested_cv"]["LNSM_RLN"]["transporter"] = [
#     (0.3, 100),
#     (0.3, 10),
#     (0.3, 10),
#     (0.2, 10),
#     (0.3, 10)
#     ]
# hyperpars["nested_cv"]["LNSM_RLN"]["pathway"] = [
#     (0.1, 10), 
#     (0.1, 10), 
#     (0.1, 10), 
#     (0.1, 1), 
#     (0.1, 10)
#     ]
# hyperpars["nested_cv"]["LNSM_RLN"]["indication"] = [
#     (0.5, 100),
#     (0.4, 100),
#     (0.4, 100),
#     (0.3, 100),
#     (0.4, 100)
#     ]
# hyperpars["cv"]["LNSM_RLN"] = {}
# hyperpars["cv"]["LNSM_RLN"]["target"] = (0.1, 1)
# hyperpars["cv"]["LNSM_RLN"]["enzyme"] = (0.1, 100)
# hyperpars["cv"]["LNSM_RLN"]["Chem"] = (0.9, 100)
# hyperpars["cv"]["LNSM_RLN"]["DGI"] = (0.2, 10)
# hyperpars["cv"]["LNSM_RLN"]["transporter"] = (0.1, 10)
# hyperpars["cv"]["LNSM_RLN"]["pathway"] = (0.1, 10)
# hyperpars["cv"]["LNSM_RLN"]["indication"] = (0.3, 100)

In [None]:
# hyperpars["nested_cv"]["LNSM_jaccard"] = {}
# hyperpars["nested_cv"]["LNSM_jaccard"]["target"] = [
#     (0.1,), 
#     (0.1,), 
#     (0.1,), 
#     (0.1,), 
#     (0.1,)
#     ]
# hyperpars["nested_cv"]["LNSM_jaccard"]["enzyme"] = [
#     (0.2,), 
#     (0.2,), 
#     (0.2,), 
#     (0.2,), 
#     (0.2,)
#     ]
# hyperpars["nested_cv"]["LNSM_jaccard"]["Chem"] = [
#     (0.2,), 
#     (0.2,), 
#     (0.2,), 
#     (0.8,), 
#     (0.5,)
#     ]
# hyperpars["nested_cv"]["LNSM_jaccard"]["DGI"] = [
#     (0.2,), 
#     (0.2,), 
#     (0.2,), 
#     (0.2,), 
#     (0.2,)
#     ]
# hyperpars["nested_cv"]["LNSM_jaccard"]["transporter"] = [
#     (0.6,),
#     (0.7,),
#     (0.1,),
#     (0.7,),
#     (0.1,)
#     ]
# hyperpars["nested_cv"]["LNSM_jaccard"]["pathway"] = [
#     (0.1,), 
#     (0.1,), 
#     (0.1,), 
#     (0.1,), 
#     (0.1,)
#     ]
# hyperpars["nested_cv"]["LNSM_jaccard"]["indication"] = [
#     (0.1,), 
#     (0.1,), 
#     (0.1,), 
#     (0.1,), 
#     (0.1,)
#     ]
# hyperpars["cv"]["LNSM_jaccard"] = {}
# hyperpars["cv"]["LNSM_jaccard"]["target"] = (0.1,)
# hyperpars["cv"]["LNSM_jaccard"]["enzyme"] = (0.4,)
# hyperpars["cv"]["LNSM_jaccard"]["Chem"] = (0.4,)
# hyperpars["cv"]["LNSM_jaccard"]["DGI"] = (0.1,)
# hyperpars["cv"]["LNSM_jaccard"]["transporter"] = (0.5,)
# hyperpars["cv"]["LNSM_jaccard"]["pathway"] = (0.1,)
# hyperpars["cv"]["LNSM_jaccard"]["indication"] = (0.1,)

In [None]:

# Open and read the JSON file
with open(f'results/hyperpars_{SEs_name}.xml', 'r') as xml_file:
    hyperpars = json.load(xml_file)

In [None]:
for method in methods:
    # # method = "SKR"
    validation = "nested_cv"
    hyperparsOut[validation][method] = {}
    results[validation][method] = {}
    for str in features_names:
        print(f"using feature {str}")
        hyperparList = loadHyperpar(*all_hyperparlist[method],method_option=method)
        results[validation][method][str], hyperparsOut[validation][method][str] = evaluation(Y=SEs["SIDER"], X=features_dict[str], method_option=method,tuning_metrice=metrice,hyperparList=hyperparList,hyperparfixed=hyperpars[validation][method][str],Validation=validation,n_jobs=1)

    # method = "SKR"
    validation = "cv"
    hyperparsOut[validation][method] = {}
    results[validation][method] = {}
    for str in features_names:
        print(f"using feature {str}")
        hyperparList = loadHyperpar(*all_hyperparlist[method],method_option=method)
        results[validation][method][str], hyperparsOut[validation][method][str] = evaluation(Y=SEs["SIDER"], X=features_dict[str], method_option=method,tuning_metrice=metrice,hyperparList=hyperparList,hyperparfixed=hyperpars[validation][method][str],Validation=validation,n_jobs=1)

In [None]:
# with open(f'results/hyperpars_{SEs_name}.xml', 'w') as xml_file:
#    json.dump(hyperparsOut, xml_file)
with open(f'results/results_{SEs_name}_{filter}.xml', 'w') as xml_file:
   json.dump(results, xml_file)

In [45]:
with open(f'results/results_{SEs_name}_{filter}.xml', 'r') as xml_file:
    results = json.load(xml_file)

In [46]:
df = pd.DataFrame()
for m, fs in results["nested_cv"].items():
    for f, mes in fs.items():
        for me, scores in mes.items():
            temp_df = pd.DataFrame({
                'method': m,
                'feature': f,
                'metric': me,
                "score": scores
            })
            df = pd.concat([df, temp_df], ignore_index=True)

custom_order = ["pathway","Chem", "DGI",  "indication", "target", "transporter", "enzyme"]
df['feature'] = pd.Categorical(df['feature'], categories=custom_order, ordered=True)
df['method'] = pd.Categorical(df['method'], categories=methods, ordered=True)
df['metric'] = pd.Categorical(df['metric'], categories=metrice_names, ordered=True)
df2 = pd.pivot_table(df, values=['score'], index=["feature", "method"], aggfunc={'score': ["mean","std"]}, columns=["metric"])
df3 = df2.sort_index(axis=1, level='metric').sort_index(level='feature')
df3.to_excel(f'results/nested_cv_results_{SEs_name}_{filter}.xlsx')
df3

Unnamed: 0_level_0,Unnamed: 1_level_0,score,score,score,score,score,score,score,score,score,score,score,score
Unnamed: 0_level_1,Unnamed: 1_level_1,mean,std,mean,std,mean,std,mean,std,mean,std,mean,std
Unnamed: 0_level_2,metric,AUPR+AUROC,AUPR+AUROC,AUPR+AUROCperdrug,AUPR+AUROCperdrug,AUROC,AUROC,AUPR,AUPR,AUROCperdrug,AUROCperdrug,AUPRperdrug,AUPRperdrug
feature,method,Unnamed: 2_level_3,Unnamed: 3_level_3,Unnamed: 4_level_3,Unnamed: 5_level_3,Unnamed: 6_level_3,Unnamed: 7_level_3,Unnamed: 8_level_3,Unnamed: 9_level_3,Unnamed: 10_level_3,Unnamed: 11_level_3,Unnamed: 12_level_3,Unnamed: 13_level_3
pathway,SKR,0.835042,0.015121,0.916771,0.024795,0.753108,0.00869,0.081934,0.007327,0.784764,0.009174,0.132007,0.016873
pathway,KRR,0.792077,0.021675,0.875733,0.028389,0.711942,0.015692,0.080135,0.007405,0.746195,0.012473,0.129538,0.017402
pathway,VKR,0.799172,0.016793,0.804518,0.008558,0.760231,0.008005,0.038941,0.009946,0.758814,0.006057,0.045704,0.00393
pathway,Naive,0.791896,0.011867,0.813665,0.005101,0.767493,0.01237,0.024404,0.002594,0.784361,0.005165,0.029304,0.002392
pathway,LNSM_RLN,0.75168,0.151963,0.815806,0.147834,0.684964,0.138341,0.066716,0.015302,0.697824,0.135225,0.117982,0.015063
pathway,LNSM_jaccard,0.606089,0.076855,0.666055,0.056602,0.581359,0.074172,0.02473,0.008278,0.580674,0.05302,0.085381,0.007064
Chem,SKR,0.799193,0.034896,0.857009,0.025752,0.728132,0.024448,0.071061,0.013855,0.752113,0.016128,0.104897,0.010783
Chem,KRR,0.767846,0.035998,0.82824,0.023691,0.697571,0.025012,0.070275,0.014396,0.72214,0.014004,0.106099,0.011412
Chem,VKR,0.770189,0.011737,0.785314,0.011519,0.735898,0.011943,0.034292,0.006718,0.744736,0.013146,0.040578,0.008811
Chem,Naive,0.795526,0.006659,0.81677,0.01155,0.773083,0.007569,0.022443,0.003305,0.789683,0.010823,0.027087,0.0031


In [47]:
df = pd.DataFrame()
for m, fs in results["cv"].items():
    for f, mes in fs.items():
        for me, scores in mes.items():
            temp_df = pd.DataFrame({
                'method': m,
                'feature': f,
                'metric': me,
                "score": scores
            },index=["1"])
            df = pd.concat([df, temp_df], ignore_index=True)
df['feature'] = pd.Categorical(df['feature'], categories=custom_order, ordered=True)
df['method'] = pd.Categorical(df['method'], categories=methods, ordered=True)
df['metric'] = pd.Categorical(df['metric'], categories=metrice_names, ordered=True)
df2 = pd.pivot_table(df, values=['score'], index=["feature", "method"], columns="metric")
df2.to_excel(f'results/cv_results_{SEs_name}_rare.xlsx')
df2

Unnamed: 0_level_0,Unnamed: 1_level_0,score,score,score,score,score,score
Unnamed: 0_level_1,metric,AUPR+AUROC,AUPR+AUROCperdrug,AUROC,AUPR,AUROCperdrug,AUPRperdrug
feature,method,Unnamed: 2_level_2,Unnamed: 3_level_2,Unnamed: 4_level_2,Unnamed: 5_level_2,Unnamed: 6_level_2,Unnamed: 7_level_2
pathway,SKR,0.800483,0.832402,0.771716,0.028767,0.795311,0.037091
pathway,KRR,0.8025,0.832841,0.77298,0.02952,0.795415,0.037426
pathway,VKR,0.78339,0.792441,0.75602,0.027371,0.759717,0.032724
pathway,Naive,0.79694,0.824556,0.76812,0.028821,0.789031,0.035525
pathway,LNSM_RLN,0.359796,0.405702,0.341474,0.018323,0.360783,0.044919
pathway,LNSM_jaccard,0.417071,0.484296,0.406426,0.010645,0.431501,0.052795
Chem,SKR,0.834835,0.878129,0.782746,0.052089,0.805465,0.072664
Chem,KRR,0.808519,0.838999,0.775254,0.033265,0.795312,0.043687
Chem,VKR,0.799166,0.795581,0.756581,0.042585,0.755255,0.040326
Chem,Naive,0.791128,0.816394,0.765827,0.025301,0.786149,0.030245


In [48]:
df = pd.DataFrame()
for m, fs in results["nested_cv"].items():
    for f, mes in fs.items():
        for me, scores in mes.items():
            temp_df = pd.DataFrame({
                'method': m,
                'feature': f,
                'metric': me,
            }, index=["1"])
            temp_df2 = pd.concat([temp_df, pd.DataFrame(scores, columns=["1"]).T], axis=1)
            df = pd.concat([df, temp_df2], ignore_index=True)
for m in metrice_names:
    for f in features_names:
        df2 = df[(df["metric"] == m) & (df["feature"] == f)]
        df3 = df2.iloc[:, np.array([0, 3, 4, 5, 6, 7])]
        df4 = df3.set_index(df3.columns[0])
        df5 = df4.T.ptests(paired=True, stars=False)
        df5.to_excel(f'results/pvalue_{SEs_name}_{filter}_{f}_{m}.xlsx')