In [35]:
from itertools import product
import numpy as np
import pandas as pd
from ADRprofilePrediction import Pairs2Mat, evaluation
from Models import loadHyperpar
import seaborn as sns 
import matplotlib.pylab as plt
from matplotlib.ticker import MultipleLocator
import matplotlib.patheffects as path_effects
import json
import pingouin as pg

In [36]:
import sklearn
print(sklearn.__version__)

0.24.2


In [37]:
features_dict = {
    "target":Pairs2Mat(path="data/drug_target.tsv",colname1="0",colname2="1"),
    "enzyme":Pairs2Mat(path="data/drug_enzyme.tsv",colname1="0",colname2="1"),
    "Chem":pd.read_csv("data/drug_chemsfp.tsv",sep = "\t",header=0,index_col=0),
    "DGI":Pairs2Mat(path="data/interactions.tsv",colname1="drug_claim_name",colname2="gene_name"),
    "transporter":Pairs2Mat(path="data/drug_transporter.tsv",colname1="0",colname2="1"),
    "pathway":Pairs2Mat(path="data/drug_pathway.tsv",colname1="0",colname2="1"),
    "indication":Pairs2Mat(path="data/drug_indication.tsv",colname1="1_x",colname2="6")
}


In [38]:
filter = "all"
SEs = {}
if filter == "all":
    SIDER = Pairs2Mat(path="data/drug_se.tsv",colname1="1_x",colname2="5")
    column_sums = np.sum(SIDER, axis=0)
    SEs["SIDER"] = SIDER.loc[:, (column_sums >= 5)]

    OFFSIDERS = Pairs2Mat(path="data/OFFSIDES.csv",colname1="drug_concept_name",colname2="condition_concept_name",sep = ",")
    column_sums = np.sum(OFFSIDERS, axis=0)
    SEs["OFFSIDES"] = OFFSIDERS.loc[:, column_sums >= 5]
elif filter == "rare":
    SIDER = Pairs2Mat(path="data/drug_se.tsv",colname1="1_x",colname2="5")
    column_sums = np.sum(SIDER, axis=0)
    SEs["SIDER"] = SIDER.loc[:, (column_sums < 50)]

    OFFSIDERS = Pairs2Mat(path="data/OFFSIDES.csv",colname1="drug_concept_name",colname2="condition_concept_name",sep = ",")
    column_sums = np.sum(OFFSIDERS, axis=0)
    SEs["OFFSIDES"] = OFFSIDERS.loc[:, column_sums < 50]


In [39]:
features_names = ["pathway"]
# SEs_names = ["SIDER", "OFFSIDES"]
# methods = ["SKR", "KR", "KRR", "Naive", "LNSM_RLN", "LNSM_jaccard", "VKR"]
methods = ["SVM", "RF", "BRF"]
# methods = ["SKR", "KR", "KRR", "Naive", "LNSM_RLN", "LNSM_jaccard", "VKR", "SVM", "OCCA", "SCCA", "RF", "BRF"]
tuning_metrices=["AUROC", "AUPR", "AUROCperdrug", "AUPRperdrug"]
metrice_names = ["AUPR+AUROC", "AUPR+AUROCperdrug", "AUROC", "AUPR", "AUROCperdrug", "AUPRperdrug"]

In [40]:
A = 10**np.arange(-2, 3, 1, dtype=float)
B = np.arange(0.1, 1, 0.1, dtype=float)
C = np.arange(5, 20, 5, dtype=int)
A10 = 10**np.arange(1, 2, 1, dtype=float)
A100 = 10**np.arange(2, 3, 1, dtype=float)
all_hyperparlist = {
    "SKR":[A,B,A10,A100], 
    # "KR":[A,A], 
    "KRR":[A,A],
    "VKR":[A,A,C], 
    "Naive":[], 
    "LNSM_RLN":[B,A], 
    "LNSM_jaccard":[B], 
    "SVM":[A,A], 
    # "OCCA":[], 
    # "SCCA":[A], 
    "RF":[C], 
    "BRF":[C]
}

In [41]:
hyperpars = {}
hyperpars["nested_cv"] = {}
hyperpars["cv"] = {}
hyperparsOut = {}
hyperparsOut["nested_cv"] = {}
hyperparsOut["cv"] = {}
results = {}
results["nested_cv"] = {}
results["cv"] = {}

In [42]:
SEs_name = "SIDER"
metrice = "AUPR"

In [43]:
hyperpars["nested_cv"]["SVM"] = {}
hyperpars["cv"]["SVM"] = {}
hyperpars["nested_cv"]["SVM"]["pathway"] = [
    (100, 100),
    (0.1, 10),
    (0.1, 10),
    (10, 10),
    (10, 10),
    ]

hyperpars["cv"]["SVM"]["pathway"] = (10, 10)


In [44]:
hyperpars["nested_cv"]["RF"] = {}
hyperpars["cv"]["RF"] = {}
hyperpars["nested_cv"]["RF"]["pathway"] = [
    (15,),
    (15,),
    (15,),
    (15,),
    (15,),
    ]

hyperpars["cv"]["RF"]["pathway"] = (15,)

In [45]:
hyperpars["nested_cv"]["BRF"] = {}
hyperpars["cv"]["BRF"] = {}
hyperpars["nested_cv"]["BRF"]["pathway"] = [
    (15,),
    (15,),
    (15,),
    (15,),
    (15,),
    ]

hyperpars["cv"]["BRF"]["pathway"] = (15,)

In [46]:
import time

In [47]:
for method in methods:
    # # method = "SKR"
    validation = "nested_cv"
    hyperparsOut[validation][method] = {}
    results[validation][method] = {}
    for str in features_names:
        print(f"using feature {str}")
        hyperparList = loadHyperpar(*all_hyperparlist[method],method_option=method)
        t1 = time.time()
        results[validation][method][str], hyperparsOut[validation][method][str] = evaluation(Y=SEs[SEs_name], X=features_dict[str], method_option=method,tuning_metrice=metrice,hyperparList=hyperparList,hyperparfixed=hyperpars[validation][method][str],Validation=validation,n_jobs=1)
        t2 = time.time()
        print("run time: ", t2 - t1)

    # method = "SKR"
    validation = "cv"
    hyperparsOut[validation][method] = {}
    results[validation][method] = {}
    for str in features_names:
        print(f"using feature {str}")
        hyperparList = loadHyperpar(*all_hyperparlist[method],method_option=method)
        t1 = time.time()
        results[validation][method][str], hyperparsOut[validation][method][str] = evaluation(Y=SEs[SEs_name], X=features_dict[str], method_option=method,tuning_metrice=metrice,hyperparList=hyperparList,hyperparfixed=hyperpars[validation][method][str],Validation=validation,n_jobs=1)
        t2 = time.time()
        print("run time: ", t2 - t1)

using feature pathway
The SVM requires hyperparameter c, gamma
---------- nested cv start ----------
Fold: 0
number of hyperpars combination:  25
first few training idx:  [ 96 127 200 219 286 391 395 415 454 484]
first few testing idx:  [ 48  78  94 164 192 373 396 402 481 487]
--- tuning end ---
target size: 106
------ best hyper pars:  (100, 100) ------
SVM starts:
SVM ends:
-----------
AUPRperdrug: 0.41924003232058354
AUROCperdrug: 0.8720250072102603
AUPR+AUROCperdrug: 1.2912650395308438
AUPR: 0.36227197049316584
AUROC: 0.8425448573029706
AUPR+AUROC: 1.2048168277961364
-----------
Fold: 1
number of hyperpars combination:  25
first few training idx:  [ 48  78  94 164 192 373 396 402 481 487]
first few testing idx:  [ 96 127 200 219 286 391 395 415 454 484]
--- tuning end ---
target size: 106
------ best hyper pars:  (0.1, 10) ------
SVM starts:
SVM ends:
-----------
AUPRperdrug: 0.4403688965928966
AUROCperdrug: 0.8808364554703981
AUPR+AUROCperdrug: 1.3212053520632947
AUPR: 0.39310122

In [48]:
SEs_name = "OFFSIDES"
metrice = "AUPR"

In [49]:
hyperpars["nested_cv"]["pathway"] = {}
hyperpars["cv"]["SVM"] = {}
hyperpars["nested_cv"]["SVM"]["pathway"] = [
    (10, 10),
    (100, 100),
    (100, 100),
    (100, 100),
    (10, 10),
    ]

hyperpars["cv"]["SVM"]["pathway"] = (0.1, 10)


In [50]:
hyperpars["nested_cv"]["RF"] = {}
hyperpars["cv"]["RF"] = {}
hyperpars["nested_cv"]["RF"]["pathway"] = [
    (15,),
    (15,),
    (15,),
    (15,),
    (15,),
    ]

hyperpars["cv"]["RF"]["pathway"] = (15,)

In [51]:
hyperpars["nested_cv"]["BRF"] = {}
hyperpars["cv"]["BRF"] = {}
hyperpars["nested_cv"]["BRF"]["pathway"] = [
    (15,),
    (15,),
    (15,),
    (15,),
    (15,),
    ]

hyperpars["cv"]["BRF"]["pathway"] = (15,)

In [52]:
for method in methods:
    # # method = "SKR"
    validation = "nested_cv"
    hyperparsOut[validation][method] = {}
    results[validation][method] = {}
    for str in features_names:
        print(f"using feature {str}")
        t1 = time.time()
        hyperparList = loadHyperpar(*all_hyperparlist[method],method_option=method)
        results[validation][method][str], hyperparsOut[validation][method][str] = evaluation(Y=SEs[SEs_name], X=features_dict[str], method_option=method,tuning_metrice=metrice,hyperparList=hyperparList,hyperparfixed=hyperpars[validation][method][str],Validation=validation,n_jobs=1)
        t2 = time.time()
        print("run time: ", t2 - t1)

    # method = "SKR"
    validation = "cv"
    hyperparsOut[validation][method] = {}
    results[validation][method] = {}
    for str in features_names:
        print(f"using feature {str}")
        t1 = time.time()
        hyperparList = loadHyperpar(*all_hyperparlist[method],method_option=method)
        results[validation][method][str], hyperparsOut[validation][method][str] = evaluation(Y=SEs[SEs_name], X=features_dict[str], method_option=method,tuning_metrice=metrice,hyperparList=hyperparList,hyperparfixed=hyperpars[validation][method][str],Validation=validation,n_jobs=1)
        t2 = time.time()
        print("run time: ", t2 - t1)

using feature pathway
The SVM requires hyperparameter c, gamma
---------- nested cv start ----------
Fold: 0
number of hyperpars combination:  25
first few training idx:  [  7 158 245 251 274 323 440 462 642 743]
first few testing idx:  [ 10  36  92 145 470 503 590 640 711 730]
--- tuning end ---
target size: 150
------ best hyper pars:  (10, 10) ------
SVM starts:
SVM ends:
-----------
AUPRperdrug: 0.6303730471183844
AUROCperdrug: 0.9439565758547991
AUPR+AUROCperdrug: 1.5743296229731834
AUPR: 0.48201782850598107
AUROC: 0.8419971284550303
AUPR+AUROC: 1.3240149569610113
-----------
Fold: 1
number of hyperpars combination:  25
first few training idx:  [ 10  36  92 145 470 503 590 640 711 730]
first few testing idx:  [  7 158 245 251 274 323 440 462 642 743]
--- tuning end ---
target size: 150
------ best hyper pars:  (100, 100) ------
SVM starts:
SVM ends:
-----------
AUPRperdrug: 0.6414820509514807
AUROCperdrug: 0.9429120497578329
AUPR+AUROCperdrug: 1.5843941007093136
AUPR: 0.5008889906