In [3]:
from itertools import product
import numpy as np
import pandas as pd
from ADRprofilePrediction import Pairs2Mat, evaluation, evaluation2
from APPFC import completion
from Models import loadHyperpar
import seaborn as sns 
import matplotlib.pylab as plt
from matplotlib.ticker import MultipleLocator
import matplotlib.patheffects as path_effects
import json
import pingouin as pg
from functools import reduce

In [4]:

class NpEncoder(json.JSONEncoder):
    def default(self, obj):
        if isinstance(obj, np.integer):
            return int(obj)
        elif isinstance(obj, np.floating):
            return float(obj)
        elif isinstance(obj, np.ndarray):
            return obj.tolist()
        else:
            return super(NpEncoder, self).default(obj)

In [5]:
import sklearn
print(sklearn.__version__)

0.24.2


In [6]:
SEs_name = "SIDER"
metrice = "mse"

In [7]:
features_dict = {
    "target":Pairs2Mat(path="data/drug_target.tsv",colname1="0",colname2="1"),
    "enzyme":Pairs2Mat(path="data/drug_enzyme.tsv",colname1="0",colname2="1"),
    "Chem":pd.read_csv("data/drug_chemsfp.tsv",sep = "\t",header=0,index_col=0),
    "DGI":Pairs2Mat(path="data/interactions.tsv",colname1="drug_claim_name",colname2="gene_name"),
    "transporter":Pairs2Mat(path="data/drug_transporter.tsv",colname1="0",colname2="1"),
    "pathway":Pairs2Mat(path="data/drug_pathway.tsv",colname1="0",colname2="1"),
    "indication":Pairs2Mat(path="data/drug_indication.tsv",colname1="1_x",colname2="6")
}


In [8]:
filter = "all"
SEs = {}
if filter == "all":
    SIDER = Pairs2Mat(path="data/drug_se.tsv",colname1="1_x",colname2="5")
    column_sums = np.sum(SIDER, axis=0)
    SEs["SIDER"] = SIDER.loc[:, (column_sums >= 5)]

    OFFSIDES = Pairs2Mat(path="data/OFFSIDES.csv",colname1="drug_concept_name",colname2="condition_concept_name",sep = ",")
    column_sums = np.sum(OFFSIDES, axis=0)
    SEs["OFFSIDES"] = OFFSIDES.loc[:, column_sums >= 5]
elif filter == "rare":
    SIDER = Pairs2Mat(path="data/drug_se.tsv",colname1="1_x",colname2="5")
    column_sums = np.sum(SIDER, axis=0)
    SEs["SIDER"] = SIDER.loc[:, (column_sums < 50)]

    OFFSIDES = Pairs2Mat(path="data/OFFSIDES.csv",colname1="drug_concept_name",colname2="condition_concept_name",sep = ",")
    column_sums = np.sum(OFFSIDES, axis=0)
    SEs["OFFSIDES"] = OFFSIDES.loc[:, column_sums < 50]


In [9]:
SEs["SIDER"].shape

(1344, 2556)

In [10]:
SEs["OFFSIDES"].shape

(2730, 12750)

In [11]:

df_dict = features_dict
intersection_indices = df_dict['target'].index
for df_name, df in df_dict.items():
    intersection_indices = intersection_indices.intersection(df.index)
intersection_indices = intersection_indices.tolist()
print("Intersection indices:", intersection_indices)
print("Size of intersection set", len(intersection_indices))

Intersection indices: ['ACETAMINOPHEN', 'ACETAZOLAMIDE', 'ALFENTANIL', 'ALLOPURINOL', 'AMANTADINE', 'AMIODARONE', 'AMITRIPTYLINE', 'AMLODIPINE', 'AMOXICILLIN', 'AMPRENAVIR', 'ANASTROZOLE', 'APIXABAN', 'APOMORPHINE', 'ARIPIPRAZOLE', 'ATAZANAVIR', 'ATENOLOL', 'AZATHIOPRINE', 'AZELASTINE', 'AZITHROMYCIN', 'BENAZEPRIL', 'BENZOCAINE', 'BEPRIDIL', 'BETAMETHASONE', 'BEZAFIBRATE', 'BICALUTAMIDE', 'BISOPROLOL', 'BOSENTAN', 'BOSUTINIB', 'BROMOCRIPTINE', 'BUMETANIDE', 'BUPRENORPHINE', 'BUSPIRONE', 'CABERGOLINE', 'CANAGLIFLOZIN', 'CANDESARTAN', 'CARBAMAZEPINE', 'CARBOPLATIN', 'CARVEDILOL', 'CEFACLOR', 'CEFAZOLIN', 'CEFTRIAXONE', 'CELECOXIB', 'CERIVASTATIN', 'CHLORAMBUCIL', 'CHLORPROMAZINE', 'CHLORPROPAMIDE', 'CIDOFOVIR', 'CIMETIDINE', 'CINOXACIN', 'CIPROFLOXACIN', 'CISPLATIN', 'CITALOPRAM', 'CLADRIBINE', 'CLARITHROMYCIN', 'CLOBAZAM', 'CLOMIPRAMINE', 'CLONIDINE', 'CLOPIDOGREL', 'CLOTRIMAZOLE', 'CODEINE', 'COLCHICINE', 'CRIZOTINIB', 'CYPROHEPTADINE', 'DAPAGLIFLOZIN', 'DARUNAVIR', 'DASATINIB', 'DESIP

In [12]:
features_names = ["target", "enzyme", "Chem", "DGI", "transporter", "pathway", "indication"]
# SEs_names = ["SIDER", "OFFSIDES"]
# methods = ["SKR", "KR", "KRR", "Naive", "LNSM_RLN", "LNSM_jaccard", "VKR"]
methods = ["SKR", "KRR", "VKR", "Naive", "LNSM_RLN", "LNSM_jaccard"]
# methods = ["SKR", "KR", "KRR", "Naive", "LNSM_RLN", "LNSM_jaccard", "VKR", "SVM", "OCCA", "SCCA", "RF", "BRF"]
tuning_metrices=["AUROC", "AUPR", "AUROCperdrug", "AUPRperdrug"]
metrice_names = ["AUPR+AUROC", "AUPR+AUROCperdrug", "AUROC", "AUPR", "AUROCperdrug", "AUPRperdrug"]

In [13]:
A = 10**np.arange(-2, 3, 1, dtype=float)
B = np.arange(0.1, 1, 0.1, dtype=float)
C = np.arange(5, 205, 50, dtype=int)
C2 = np.arange(5, 20, 5, dtype=int)
A10 = 10**np.arange(1, 2, 1, dtype=float)
A100 = 10**np.arange(2, 3, 1, dtype=float)
all_hyperparlist = {
    "SKR":[A,B,A10,A100], 
    # "KR":[A,A], 
    "KRR":[A,A],
    "VKR":[A,A,C2], 
    # "Naive":[], 
    # "LNSM_RLN":[B,A], 
    # "LNSM_jaccard":[B], 
    # "SVM":[A,A,A], 
    # "OCCA":[], 
    # "SCCA":[A], 
    # "RF":[C], 
    # "BRF":[C],
    "TNMF":[C],
    "SRI":[],
    "MICE":[],
    "TRF":[],
    "TKNN":[C2],
    "TWNMF":[C]
    
}

In [14]:
fhyperpars = {}
fhyperpars["nested_cv"] = {}
fhyperpars["cv"] = {}
fhyperparsOut = {}
fhyperparsOut["nested_cv"] = {}
fhyperparsOut["cv"] = {}
fresults = {}
fresults["nested_cv"] = {}
fresults["cv"] = {}

In [15]:

# Open and read the JSON file
# with open(f'results/hyperpars_{SEs_name}.xml', 'r') as xml_file:

#     hyperpars = json.load(xml_file)

In [16]:
# method = "TNMF"
# validation = "nested_cv"
# fhyperparsOut[validation][method] = {}
# fresults[validation][method] = {}
# hyperparList = loadHyperpar(*all_hyperparlist[method],method_option=method)
# fresults[validation][method], fhyperparsOut[validation][method], features_new = completion(Y=SEs[SEs_name], X=features_dict, method_option=method,tuning_metrice=metrice,hyperparList=hyperparList,Validation=validation,n_jobs=3)

# validation = "cv"
# fhyperparsOut[validation][method] = {}
# fresults[validation][method] = {}
# hyperparList = loadHyperpar(*all_hyperparlist[method],method_option=method)
# fresults[validation], fhyperparsOut[validation][method], features_new = completion(Y=SEs[SEs_name], X=features_dict, method_option=method,tuning_metrice=metrice,hyperparList=hyperparList,Validation=validation,n_jobs=1)

In [17]:
# fhyperparsOut

In [18]:
# with open(f'results/fhyperpars_{SEs_name}.xml', 'w') as xml_file:
#    json.dump(fhyperparsOut, xml_file, cls=NpEncoder)
# with open(f'results/fresults_{SEs_name}_{method}.xml', 'w') as xml_file:
#    json.dump(fresults, xml_file)

In [None]:
# method = "KRR"
# validation = "nested_cv"
# hyperparsOut[validation][method] = {}
# results[validation][method] = {}
# str = "DGI"
# hyperparList = loadHyperpar(*all_hyperparlist[method],method_option=method)
# results[validation][method]["noAPPFC"][str], hyperparsOut[validation][method][str]["noAPPFC"] = evaluation(Y=SEs["SIDER"], X=features_dict[str], method_option=method,tuning_metrice=metrice, hyperparList=hyperparList,Validation=validation,n_jobs=1)

In [20]:
method = "SRI"
validation = "completion"
hyperparList = loadHyperpar(*all_hyperparlist[method],method_option=method)
features_new = completion(Y=SEs[SEs_name], X=features_dict,method_option=method,tuning_metrice=metrice,hyperparList=hyperparList,hyperparfixed=(),Validation=validation,n_jobs=1)

The SRI requires no hyperparameter
common drugs chosen to be the test set:  set()
SRI starts:
SRI ends:


In [21]:
hyperpars = {}
hyperpars["nested_cv"] = {}
hyperpars["cv"] = {}
hyperparsOut = {}
hyperparsOut["nested_cv"] = {}
hyperparsOut["cv"] = {}
results = {}
results["nested_cv"] = {}
results["cv"] = {}

In [23]:
method = "KRR"
metrice = "AUPR"
validation = "nested_cv"
hyperparsOut[validation][method] = {}
results[validation][method] = {}
hyperparsOut[validation][method]["noAPPFC"] = {}
results[validation][method]["noAPPFC"] = {}
hyperparsOut[validation][method]["APPFC"] = {}
results[validation][method]["APPFC"] = {}
for str in features_names:
    print(f"using feature {str}")
    # print("without APPFC")
    # hyperparList = loadHyperpar(*all_hyperparlist[method],method_option=method)
    # results[validation][method]["noAPPFC"][str], hyperparsOut[validation][method]["noAPPFC"][str] = evaluation(Y=SEs[SEs_name], X=features_dict[str], method_option=method,tuning_metrice=metrice,hyperparList=hyperparList,Validation=validation,n_jobs=1)
    print("with APPFC")
    hyperparList = loadHyperpar(*all_hyperparlist[method],method_option=method)
    results[validation][method]["APPFC"][str], hyperparsOut[validation][method]["APPFC"][str] = evaluation2(Y=SEs[SEs_name], X=features_dict[str], X2=features_new[str], method_option=method,tuning_metrice=metrice,hyperparList=hyperparList,Validation=validation,n_jobs=10)

using feature target
with APPFC
The KRR requires hyperparameter lambda, sigma_X
---------- nested cv start ----------
Fold: 0
number of hyperpars combination:  25
first few training idx:  [ 0  1  2  3  4  7  9 10 11 12]
first few testing idx:  [ 153  283  397  440  800  857  858 1177 1230 1327]
Inner Fold: 0
Inner Fold: 1
Inner Fold: 2
Inner Fold: 3
best hyperpar: (0.1, 10.0)
AUPR: 0.40456389441441315
AUPR for each fold: [0.42848554 0.40130395 0.3897826  0.39868349]
--- tuning end ---
target size: 142
------ best hyper pars:  (0.1, 10.0) ------
KRR starts:
KRR ends:
-----------
AUPRperdrug: 0.45821384568125034
AUROCperdrug: 0.8953835004601001
AUPR+AUROCperdrug: 1.3535973461413504
AUPR: 0.40402631828469626
AUROC: 0.8635411281677559
AUPR+AUROC: 1.267567446452452
-----------
Fold: 1
number of hyperpars combination:  25
first few training idx:  [ 0  1  2  3  4  7  9 10 11 12]
first few testing idx:  [ 125  164  480  518  552  696  913  919 1277 1310]
Inner Fold: 0
Inner Fold: 1
Inner Fold:

In [24]:
method = "KRR"
validation = "cv"
hyperparsOut[validation][method] = {}
results[validation][method] = {}
hyperparsOut[validation][method]["noAPPFC"] = {}
results[validation][method]["noAPPFC"] = {}
hyperparsOut[validation][method]["APPFC"] = {}
results[validation][method]["APPFC"] = {}
for str in features_names:
    print(f"using feature {str}")
    # print("without APPFC")
    # hyperparList = loadHyperpar(*all_hyperparlist[method],method_option=method)
    # results[validation][method]["noAPPFC"][str], hyperparsOut[validation][method]["noAPPFC"][str] = evaluation(Y=SEs[SEs_name], X=features_dict[str], method_option=method,tuning_metrice=metrice,hyperparList=hyperparList,Validation=validation,n_jobs=1)
    print("with APPFC")
    hyperparList = loadHyperpar(*all_hyperparlist[method],method_option=method)
    results[validation][method]["APPFC"][str], hyperparsOut[validation][method]["APPFC"][str] = evaluation2(Y=SEs[SEs_name], X=features_dict[str], X2=features_new[str], method_option=method,tuning_metrice=metrice,hyperparList=hyperparList,Validation=validation,n_jobs=10)

using feature target
with APPFC
The KRR requires hyperparameter lambda, sigma_X
---------- cv start ----------
Fold: 0
Fold: 1
Fold: 2
Fold: 3
Fold: 4
best hyperpar: (0.1, 10.0)
AUPR: 0.4084737626646199
AUPR for each fold: [0.40402632 0.4019429  0.39709078 0.44205624 0.39725257]
--- tuning end ---
target size: 227
------ best hyper pars:  (0.1, 10.0) ------
KRR starts:
KRR ends:
-----------
AUPRperdrug: 0.4721830315540797
AUROCperdrug: 0.8976792883282373
AUPR+AUROCperdrug: 1.369862319882317
AUPR: 0.4182427218163188
AUROC: 0.8679787471602908
AUPR+AUROC: 1.2862214689766096
-----------
using feature enzyme
with APPFC
The KRR requires hyperparameter lambda, sigma_X
---------- cv start ----------
Fold: 0
Fold: 1
Fold: 2
Fold: 3
Fold: 4
best hyperpar: (10.0, 10.0)
AUPR: 0.39463205898624165
AUPR for each fold: [0.40688552 0.41235834 0.35872952 0.41248259 0.38270433]
--- tuning end ---
target size: 175
------ best hyper pars:  (10.0, 10.0) ------
KRR starts:
KRR ends:
-----------
AUPRperdrug: 

In [25]:
df = pd.DataFrame()
for m, fs in results["nested_cv"]["KRR"].items():
    for f, mes in fs.items():
        for me, scores in mes.items():
            temp_df = pd.DataFrame({
                'method': m,
                'feature': f,
                'metric': me,
                "score": scores
            })
            df = pd.concat([df, temp_df], ignore_index=True)

custom_order = ["pathway","Chem", "DGI", "indication", "target", "transporter", "enzyme"]
metrice_names = ["AUROC", "AUPR", "AUROCperdrug", "AUPRperdrug"]
# APPFC = ["APPFC", "noAPPFC"]
df['feature'] = pd.Categorical(df['feature'], categories=custom_order, ordered=True)
# df['method'] = pd.Categorical(df['method'], categories=APPFC, ordered=True)
df['metric'] = pd.Categorical(df['metric'], categories=metrice_names, ordered=True)
df2 = pd.pivot_table(df, values=['score'], index=["feature", "method"], aggfunc={'score': ["mean","std"]}, columns=["metric"])
df3 = df2.sort_index(axis=1, level='metric').sort_index(level='feature')
df3.to_excel(f'results/nested_cv_results_{SEs_name}_{method}_SRI.xlsx')
df3

Unnamed: 0_level_0,Unnamed: 1_level_0,score,score,score,score,score,score,score,score
Unnamed: 0_level_1,Unnamed: 1_level_1,mean,std,mean,std,mean,std,mean,std
Unnamed: 0_level_2,metric,AUROC,AUROC,AUPR,AUPR,AUROCperdrug,AUROCperdrug,AUPRperdrug,AUPRperdrug
feature,method,Unnamed: 2_level_3,Unnamed: 3_level_3,Unnamed: 4_level_3,Unnamed: 5_level_3,Unnamed: 6_level_3,Unnamed: 7_level_3,Unnamed: 8_level_3,Unnamed: 9_level_3
pathway,APPFC,0.874972,0.006609,0.442168,0.020526,0.903641,0.003777,0.488041,0.010157
Chem,APPFC,0.866238,0.010028,0.399104,0.021605,0.893435,0.003586,0.457232,0.020794
DGI,APPFC,0.853253,0.018067,0.417603,0.013741,0.882565,0.008735,0.453041,0.005362
indication,APPFC,0.871912,0.004725,0.458861,0.008066,0.892299,0.002632,0.465431,0.005873
target,APPFC,0.868389,0.003604,0.408474,0.019011,0.897358,0.002994,0.461952,0.018787
transporter,APPFC,0.859486,0.012156,0.422737,0.036487,0.885091,0.00625,0.475089,0.034613
enzyme,APPFC,0.866845,0.007402,0.394632,0.023528,0.88602,0.001225,0.440822,0.013136


In [26]:
df = pd.DataFrame()
for m, fs in results["cv"]["KRR"].items():
    for f, mes in fs.items():
        for me, scores in mes.items():
            temp_df = pd.DataFrame({
                'method': m,
                'feature': f,
                'metric': me,
                "score": scores
            },index=["1"])
            df = pd.concat([df, temp_df], ignore_index=True)
df['feature'] = pd.Categorical(df['feature'], categories=custom_order, ordered=True)
# df['method'] = pd.Categorical(df['method'], categories=APPFC, ordered=True)
df['metric'] = pd.Categorical(df['metric'], categories=metrice_names, ordered=True)
df2 = pd.pivot_table(df, values=['score'], index=["feature", "method"], columns="metric")
df2.to_excel(f'results/cv_results_{SEs_name}_{method}_SRI.xlsx')
df2

Unnamed: 0_level_0,Unnamed: 1_level_0,score,score,score,score
Unnamed: 0_level_1,metric,AUROC,AUPR,AUROCperdrug,AUPRperdrug
feature,method,Unnamed: 2_level_2,Unnamed: 3_level_2,Unnamed: 4_level_2,Unnamed: 5_level_2
pathway,APPFC,0.874428,0.464177,0.904188,0.511292
Chem,APPFC,0.862766,0.408611,0.891679,0.470297
DGI,APPFC,0.852058,0.443429,0.881137,0.469204
indication,APPFC,0.872925,0.480709,0.894596,0.482254
target,APPFC,0.867979,0.418243,0.897679,0.472183
transporter,APPFC,0.863037,0.456326,0.882186,0.50724
enzyme,APPFC,0.862233,0.427341,0.88296,0.477442


In [21]:
method = "TKNN"
validation = "nested_cv"
fhyperparsOut[validation][method] = {}
fresults[validation][method] = {}
hyperparList = loadHyperpar(*all_hyperparlist[method],method_option=method)
fresults[validation][method], fhyperparsOut[validation][method], features_new = completion(Y=SEs[SEs_name], X=features_dict, method_option=method,tuning_metrice=metrice,hyperparList=hyperparList,Validation=validation,n_jobs=1)

validation = "cv"
fhyperparsOut[validation][method] = {}
fresults[validation][method] = {}
hyperparList = loadHyperpar(*all_hyperparlist[method],method_option=method)
fresults[validation], fhyperparsOut[validation][method], features_new = completion(Y=SEs[SEs_name], X=features_dict, method_option=method,tuning_metrice=metrice,hyperparList=hyperparList,Validation=validation,n_jobs=1)

The TKNN requires hyperparameter k
common drugs chosen to be the test set:  set()
---------- nested cv start ----------
Fold: 0
number of hyperpars combination:  3
Inner Fold: 0
Imputing row 1/1344 with 8021 missing, elapsed time: 220.607
Imputing row 1/1344 with 8021 missing, elapsed time: 221.668
Imputing row 1/1344 with 8021 missing, elapsed time: 221.611
Imputing row 1/1344 with 8021 missing, elapsed time: 222.117
Imputing row 101/1344 with 2283 missing, elapsed time: 256.244
Imputing row 101/1344 with 2283 missing, elapsed time: 260.844
Imputing row 101/1344 with 2283 missing, elapsed time: 260.937
Imputing row 101/1344 with 2283 missing, elapsed time: 262.962
Imputing row 201/1344 with 8021 missing, elapsed time: 291.002
Imputing row 201/1344 with 8021 missing, elapsed time: 295.927
Imputing row 201/1344 with 8021 missing, elapsed time: 296.099
Imputing row 201/1344 with 8021 missing, elapsed time: 296.687
Imputing row 301/1344 with 10982 missing, elapsed time: 328.405
Imputing r

In [19]:
# method = "TNMF"
# validation = "nested_cv"
# fhyperparsOut[validation][method] = {}
# fresults[validation][method] = {}
# hyperparList = loadHyperpar(*all_hyperparlist[method],method_option=method)
# fresults[validation][method], fhyperparsOut[validation][method], features_new = completion(Y=SEs[SEs_name], X=features_dict, method_option=method,tuning_metrice=metrice,hyperparList=hyperparList,Validation=validation,n_jobs=3)

# validation = "cv"
# fhyperparsOut[validation][method] = {}
# fresults[validation][method] = {}
# hyperparList = loadHyperpar(*all_hyperparlist[method],method_option=method)
# fresults[validation], fhyperparsOut[validation][method], features_new = completion(Y=SEs[SEs_name], X=features_dict, method_option=method,tuning_metrice=metrice,hyperparList=hyperparList,Validation=validation,n_jobs=1)
method = "TKNN"
validation = "completion"
hyperparList = loadHyperpar(*all_hyperparlist[method],method_option=method)
features_new = completion(Y=SEs[SEs_name], X=features_dict,method_option=method,tuning_metrice=metrice,hyperparList=hyperparList,hyperparfixed=(15,),Validation=validation,n_jobs=1)
hyperpars = {}
hyperpars["nested_cv"] = {}
hyperpars["cv"] = {}
hyperparsOut = {}
hyperparsOut["nested_cv"] = {}
hyperparsOut["cv"] = {}
results = {}
results["nested_cv"] = {}
results["cv"] = {}
method = "KRR"
metrice = "AUPR"
validation = "nested_cv"
hyperparsOut[validation][method] = {}
results[validation][method] = {}
hyperparsOut[validation][method]["noAPPFC"] = {}
results[validation][method]["noAPPFC"] = {}
hyperparsOut[validation][method]["APPFC"] = {}
results[validation][method]["APPFC"] = {}
for str in features_names:
    print(f"using feature {str}")
    # print("without APPFC")
    # hyperparList = loadHyperpar(*all_hyperparlist[method],method_option=method)
    # results[validation][method]["noAPPFC"][str], hyperparsOut[validation][method]["noAPPFC"][str] = evaluation(Y=SEs[SEs_name], X=features_dict[str], method_option=method,tuning_metrice=metrice,hyperparList=hyperparList,Validation=validation,n_jobs=1)
    print("with APPFC")
    hyperparList = loadHyperpar(*all_hyperparlist[method],method_option=method)
    results[validation][method]["APPFC"][str], hyperparsOut[validation][method]["APPFC"][str] = evaluation2(Y=SEs[SEs_name], X=features_dict[str], X2=features_new[str], method_option=method,tuning_metrice=metrice,hyperparList=hyperparList,Validation=validation,n_jobs=10)
method = "KRR"
validation = "cv"
hyperparsOut[validation][method] = {}
results[validation][method] = {}
hyperparsOut[validation][method]["noAPPFC"] = {}
results[validation][method]["noAPPFC"] = {}
hyperparsOut[validation][method]["APPFC"] = {}
results[validation][method]["APPFC"] = {}
for str in features_names:
    print(f"using feature {str}")
    # print("without APPFC")
    # hyperparList = loadHyperpar(*all_hyperparlist[method],method_option=method)
    # results[validation][method]["noAPPFC"][str], hyperparsOut[validation][method]["noAPPFC"][str] = evaluation(Y=SEs[SEs_name], X=features_dict[str], method_option=method,tuning_metrice=metrice,hyperparList=hyperparList,Validation=validation,n_jobs=1)
    print("with APPFC")
    hyperparList = loadHyperpar(*all_hyperparlist[method],method_option=method)
    results[validation][method]["APPFC"][str], hyperparsOut[validation][method]["APPFC"][str] = evaluation2(Y=SEs[SEs_name], X=features_dict[str], X2=features_new[str], method_option=method,tuning_metrice=metrice,hyperparList=hyperparList,Validation=validation,n_jobs=10)


The TKNN requires hyperparameter k
common drugs chosen to be the test set:  set()
TKNN starts:
Imputing row 1/1344 with 8021 missing, elapsed time: 78.504
Imputing row 101/1344 with 0 missing, elapsed time: 82.919
Imputing row 201/1344 with 8021 missing, elapsed time: 86.217
Imputing row 301/1344 with 7931 missing, elapsed time: 88.980
Imputing row 401/1344 with 830 missing, elapsed time: 92.562
Imputing row 501/1344 with 7931 missing, elapsed time: 95.185
Imputing row 601/1344 with 8021 missing, elapsed time: 99.952
Imputing row 701/1344 with 555 missing, elapsed time: 104.070
Imputing row 801/1344 with 830 missing, elapsed time: 107.867
Imputing row 901/1344 with 465 missing, elapsed time: 111.523
Imputing row 1001/1344 with 90 missing, elapsed time: 114.946
Imputing row 1101/1344 with 275 missing, elapsed time: 117.295
Imputing row 1201/1344 with 4815 missing, elapsed time: 120.555
Imputing row 1301/1344 with 90 missing, elapsed time: 124.687
TKNN ends:
using feature target
with APP

Unnamed: 0_level_0,Unnamed: 1_level_0,score,score,score,score
Unnamed: 0_level_1,metric,AUROC,AUPR,AUROCperdrug,AUPRperdrug
feature,method,Unnamed: 2_level_2,Unnamed: 3_level_2,Unnamed: 4_level_2,Unnamed: 5_level_2
pathway,APPFC,0.859195,0.448855,0.88666,0.508068
Chem,APPFC,0.868554,0.41076,0.895409,0.47537
DGI,APPFC,0.861662,0.452502,0.888759,0.47736
indication,APPFC,0.874497,0.481191,0.895865,0.482793
target,APPFC,0.872534,0.42294,0.900592,0.476608
transporter,APPFC,0.863768,0.454442,0.884519,0.508011
enzyme,APPFC,0.862538,0.427835,0.883756,0.477628


In [21]:
df = pd.DataFrame()
for m, fs in results["nested_cv"]["KRR"].items():
    for f, mes in fs.items():
        for me, scores in mes.items():
            temp_df = pd.DataFrame({
                'method': m,
                'feature': f,
                'metric': me,
                "score": scores
            })
            df = pd.concat([df, temp_df], ignore_index=True)

custom_order = ["pathway","Chem", "DGI", "indication", "target", "transporter", "enzyme"]
metrice_names = ["AUROC", "AUPR", "AUROCperdrug", "AUPRperdrug"]
# APPFC = ["APPFC", "noAPPFC"]
df['feature'] = pd.Categorical(df['feature'], categories=custom_order, ordered=True)
# df['method'] = pd.Categorical(df['method'], categories=APPFC, ordered=True)
df['metric'] = pd.Categorical(df['metric'], categories=metrice_names, ordered=True)
df2 = pd.pivot_table(df, values=['score'], index=["feature", "method"], aggfunc={'score': ["mean","std"]}, columns=["metric"])
df3 = df2.sort_index(axis=1, level='metric').sort_index(level='feature')
df3.to_excel(f'results/nested_cv_results_{SEs_name}_{method}_KNN.xlsx')
df3


Unnamed: 0_level_0,Unnamed: 1_level_0,score,score,score,score,score,score,score,score
Unnamed: 0_level_1,Unnamed: 1_level_1,mean,std,mean,std,mean,std,mean,std
Unnamed: 0_level_2,metric,AUROC,AUROC,AUPR,AUPR,AUROCperdrug,AUROCperdrug,AUPRperdrug,AUPRperdrug
feature,method,Unnamed: 2_level_3,Unnamed: 3_level_3,Unnamed: 4_level_3,Unnamed: 5_level_3,Unnamed: 6_level_3,Unnamed: 7_level_3,Unnamed: 8_level_3,Unnamed: 9_level_3
pathway,APPFC,0.872388,0.006426,0.445948,0.021346,0.89858,0.009729,0.488724,0.010494
Chem,APPFC,0.870415,0.009664,0.403318,0.020662,0.895801,0.003893,0.460617,0.018756
DGI,APPFC,0.858797,0.009585,0.422823,0.014822,0.886634,0.004702,0.459339,0.00679
indication,APPFC,0.873967,0.004872,0.460117,0.008269,0.893865,0.002685,0.466084,0.005362
target,APPFC,0.872169,0.002437,0.411636,0.016103,0.899469,0.003183,0.465182,0.018213
transporter,APPFC,0.86116,0.011325,0.421151,0.039047,0.887394,0.006004,0.474826,0.035675
enzyme,APPFC,0.867538,0.007447,0.395751,0.024972,0.886769,0.001036,0.441539,0.01322


In [22]:
df = pd.DataFrame()
for m, fs in results["cv"]["KRR"].items():
    for f, mes in fs.items():
        for me, scores in mes.items():
            temp_df = pd.DataFrame({
                'method': m,
                'feature': f,
                'metric': me,
                "score": scores
            },index=["1"])
            df = pd.concat([df, temp_df], ignore_index=True)
df['feature'] = pd.Categorical(df['feature'], categories=custom_order, ordered=True)
# df['method'] = pd.Categorical(df['method'], categories=APPFC, ordered=True)
df['metric'] = pd.Categorical(df['metric'], categories=metrice_names, ordered=True)
df2 = pd.pivot_table(df, values=['score'], index=["feature", "method"], columns="metric")
df2.to_excel(f'results/cv_results_{SEs_name}_{method}_KNN.xlsx')
df2

Unnamed: 0_level_0,Unnamed: 1_level_0,score,score,score,score
Unnamed: 0_level_1,metric,AUROC,AUPR,AUROCperdrug,AUPRperdrug
feature,method,Unnamed: 2_level_2,Unnamed: 3_level_2,Unnamed: 4_level_2,Unnamed: 5_level_2
pathway,APPFC,0.859195,0.448855,0.88666,0.508068
Chem,APPFC,0.868554,0.41076,0.895409,0.47537
DGI,APPFC,0.861662,0.452502,0.888759,0.47736
indication,APPFC,0.874497,0.481191,0.895865,0.482793
target,APPFC,0.872534,0.42294,0.900592,0.476608
transporter,APPFC,0.863768,0.454442,0.884519,0.508011
enzyme,APPFC,0.862538,0.427835,0.883756,0.477628


In [42]:
# with open(f'results/hyperpars_{SEs_name}.xml', 'w') as xml_file:
#    json.dump(hyperparsOut, xml_file, cls=NpEncoder)
# with open(f'results/results_{SEs_name}.xml', 'w') as xml_file:
#    json.dump(results, xml_file)

In [1]:
results

NameError: name 'results' is not defined