In [1]:
import numpy as np
from sklearn.model_selection import KFold, GridSearchCV
from sklearn.linear_model import ElasticNet
from sklearn.linear_model import Ridge
from sklearn.ensemble import RandomForestRegressor
import pandas as pd
import math
from math import sqrt
import sklearn.preprocessing as sk
import seaborn as sns
from sklearn import metrics
from sklearn.feature_selection import VarianceThreshold
from sklearn.model_selection import train_test_split
import random
from random import randint
from sklearn.model_selection import StratifiedKFold
from scipy.stats import pearsonr, spearmanr, kendalltau
from sklearn.metrics import mean_squared_error
from sklearn.pipeline import Pipeline
from sklearn.preprocessing import StandardScaler

  from numpy.core.umath_tests import inner1d


In [2]:
def RG(X_train, Y_train, X_test, alpha, cv, seed):
    Y_pred_0 = np.zeros([X_test[0].shape[0], 1])
    Y_pred_1 = np.zeros([X_test[1].shape[0], 1])
    RG_pipe = Pipeline([('scaler', StandardScaler()),('RdG', Ridge())])
    model = GridSearchCV(RG_pipe, param_grid={"RdG__alpha": alpha}, scoring='neg_mean_squared_error', cv=KFold(n_splits=cv, shuffle=True, random_state=seed))
    y_train = Y_train
    x_train = X_train
    model.fit(x_train, y_train)
    Y_pred_0 = model.predict(X_test[0])
    Y_pred_1 = model.predict(X_test[1])
    return Y_pred_0, Y_pred_1  

In [78]:
alph = [0.1, 0.2, 0.3, 0.4, 0.5, 0.6, 0.7, 0.8, 0.9, 1]
folds = 10
seeds = 42
drugs = ["Bortezomib", "Entinostat", "Sirolimus","Docetaxel","Gemcitabine", "Crizotinib", 
         "Lapatinib","Vorinostat","Erlotinib","Paclitaxel","Pictilisib"]

CTRP_exprs = pd.read_csv("Data_All/CTRP.exprsALL.tsv", sep = "\t", index_col=0)
GDSC_exprs = pd.read_csv("Data_All/GDSCv2.exprsALL.tsv", sep = "\t", index_col=0)
gCSI_exprs = pd.read_csv("Data_All/gCSI.exprsALL.tsv", sep = "\t", index_col=0)

CTRP_aac = pd.read_csv("Data_All/CTRP.aacALL.tsv", sep = "\t", index_col=0)
GDSC_aac = pd.read_csv("Data_All/GDSCv2.aacALL.tsv", sep = "\t", index_col=0)
gCSI_aac = pd.read_csv("Data_All/gCSI.aacALL.tsv", sep = "\t", index_col=0)

CTRP_ic50 = pd.read_csv("Data_All/CTRP.logIC50.tsv", sep = "\t", index_col=0)
GDSC_ic50 = pd.read_csv("Data_All/GDSC.logIC50.tsv", sep = "\t", index_col=0)
gCSI_ic50 = pd.read_csv("Data_All/gCSI.logIC50.tsv", sep = "\t", index_col=0)

CTRP_info = pd.read_csv("Data_All/CTRP.infoALL.tsv", sep = "\t", index_col=0)
idx_other_ctrp = CTRP_info.index[CTRP_info["Tumor"] == 1]
GDSC_info = pd.read_csv("Data_All/GDSCv2.infoALL.tsv", sep = "\t", index_col=0)
idx_other_gdsc = GDSC_info.index[GDSC_info["Tumor"] == 1]
gCSI_info = pd.read_csv("Data_All/gCSI.infoALL.tsv", sep = "\t", index_col=0)
idx_other_gcsi = gCSI_info.index[gCSI_info["Tumor"] == 1]

In [10]:
# Cross-domain AAC
for drug in drugs:
    CTRP_aac_drug = CTRP_aac.loc[drug].dropna()
    GDSC_aac_drug = GDSC_aac.loc[drug].dropna()
    gCSI_aac_drug = gCSI_aac.loc[drug].dropna()

    idx_ctrp = CTRP_exprs.columns.intersection(CTRP_aac_drug.index)
    idx_ctrp = [x for x in idx_ctrp if x not in idx_other_ctrp]    
    idx_gdsc = GDSC_exprs.columns.intersection(GDSC_aac_drug.index)
    idx_gdsc = [x for x in idx_gdsc if x not in idx_other_gdsc]    
    idx_gcsi = gCSI_exprs.columns.intersection(gCSI_aac_drug.index)
    idx_gcsi = [x for x in idx_gcsi if x not in idx_other_gcsi]    

    CTRP_exprs_drug = pd.DataFrame.transpose(CTRP_exprs.loc[:,idx_ctrp])
    CTRP_aac_drug = CTRP_aac_drug.loc[idx_ctrp]
    GDSC_exprs_drug = pd.DataFrame.transpose(GDSC_exprs.loc[:,idx_gdsc])
    GDSC_aac_drug = GDSC_aac_drug.loc[idx_gdsc]
    gCSI_exprs_drug = pd.DataFrame.transpose(gCSI_exprs.loc[:,idx_gcsi])
    gCSI_aac_drug = gCSI_aac_drug.loc[idx_gcsi]

    X_train_N = CTRP_exprs_drug.values
    y_train = CTRP_aac_drug.values

    pred_gdsc, pred_gcsi = RG(X_train_N, y_train, [GDSC_exprs_drug.values, gCSI_exprs_drug.values],
                   alph, folds, seeds)
    print(drug)
    print("GDSC Pearson correlation and p-value:", pearsonr(pred_gdsc, GDSC_aac_drug.values))
    print("GDSC RMSE:", sqrt(mean_squared_error(pred_gdsc, GDSC_aac_drug.values)))
    print("gCSI Pearson correlation and p-value:", pearsonr(pred_gcsi, gCSI_aac_drug.values))
    print("gCSI RMSE:", sqrt(mean_squared_error(pred_gcsi, gCSI_aac_drug.values)))
    print("*****************************************************************")
    print("GDSC Spearman correlation and p-value:", spearmanr(pred_gdsc, GDSC_aac_drug.values))
    print("gCSI Spearman correlation and p-value:", spearmanr(pred_gcsi, gCSI_aac_drug.values))
    print("GDSC Kendall correlation and p-value:", kendalltau(pred_gdsc, GDSC_aac_drug.values))
    print("gCSI Kendall correlation and p-value:", kendalltau(pred_gcsi, gCSI_aac_drug.values))    
    print("-----------------------------------------------------------------")

Bortezomib
GDSC Pearson correlation and p-value: (0.19309957808954667, 0.003211613543916227)
GDSC RMSE: 0.08646918822447294
gCSI Pearson correlation and p-value: (0.2839003996246455, 1.3719509252324141e-06)
gCSI RMSE: 0.1515645226377327
*****************************************************************
GDSC Spearman correlation and p-value: SpearmanrResult(correlation=0.17056893631106526, pvalue=0.009392643779872678)
gCSI Spearman correlation and p-value: SpearmanrResult(correlation=0.2582913776232569, pvalue=1.2020894266865008e-05)
GDSC Kendall correlation and p-value: KendalltauResult(correlation=0.11665725578769058, pvalue=0.008301679046794344)
gCSI Kendall correlation and p-value: KendalltauResult(correlation=0.17823860727086535, pvalue=8.750281687995552e-06)
-----------------------------------------------------------------
Entinostat
GDSC Pearson correlation and p-value: (0.502010072660359, 7.873196082899344e-16)
GDSC RMSE: 0.16723090316859132
gCSI Pearson correlation and p-value: 

Pictilisib
GDSC Pearson correlation and p-value: (0.30433546483802876, 2.1004895122712866e-06)
GDSC RMSE: 0.09935063967634512
gCSI Pearson correlation and p-value: (0.40963468466747477, 1.1288769139815588e-12)
gCSI RMSE: 0.1343755899581506
*****************************************************************
GDSC Spearman correlation and p-value: SpearmanrResult(correlation=0.3143780336182857, pvalue=9.185900584247756e-07)
gCSI Spearman correlation and p-value: SpearmanrResult(correlation=0.3582317677689936, pvalue=7.682166608794054e-10)
GDSC Kendall correlation and p-value: KendalltauResult(correlation=0.22504677011114776, pvalue=2.967840764704267e-07)
gCSI Kendall correlation and p-value: KendalltauResult(correlation=0.24449350723577046, pvalue=1.2317855166215045e-09)
-----------------------------------------------------------------


In [14]:
# Cross-domain IC50
for drug in drugs:
    CTRP_ic50_drug = CTRP_ic50.loc[drug].dropna()
    GDSC_ic50_drug = GDSC_ic50.loc[drug].dropna()
    gCSI_ic50_drug = gCSI_ic50.loc[drug].dropna()

    idx_ctrp = CTRP_exprs.columns.intersection(CTRP_ic50_drug.index)
    idx_ctrp = [x for x in idx_ctrp if x not in idx_other_ctrp]
    idx_gdsc = GDSC_exprs.columns.intersection(GDSC_ic50_drug.index)
    idx_gdsc = [x for x in idx_gdsc if x not in idx_other_gdsc]
    idx_gcsi = gCSI_exprs.columns.intersection(gCSI_ic50_drug.index)
    idx_gcsi = [x for x in idx_gcsi if x not in idx_other_gcsi]

    CTRP_exprs_drug = pd.DataFrame.transpose(CTRP_exprs.loc[:,idx_ctrp])
    CTRP_ic50_drug = CTRP_ic50_drug.loc[idx_ctrp]
    GDSC_exprs_drug = pd.DataFrame.transpose(GDSC_exprs.loc[:,idx_gdsc])
    GDSC_ic50_drug = GDSC_ic50_drug.loc[idx_gdsc]
    gCSI_exprs_drug = pd.DataFrame.transpose(gCSI_exprs.loc[:,idx_gcsi])
    gCSI_ic50_drug = gCSI_ic50_drug.loc[idx_gcsi]

    X_train_N = CTRP_exprs_drug.values
    y_train = CTRP_ic50_drug.values

    pred_gdsc, pred_gcsi = RG(X_train_N, y_train, [GDSC_exprs_drug.values, gCSI_exprs_drug.values],
                   alph, folds, seeds)
    print(drug)
    print("GDSC Pearson correlation and p-value:", pearsonr(pred_gdsc, GDSC_ic50_drug.values))
    print("GDSC RMSE:", sqrt(mean_squared_error(pred_gdsc, GDSC_ic50_drug.values)))
    print("gCSI Pearson correlation and p-value:", pearsonr(pred_gcsi, gCSI_ic50_drug.values))
    print("gCSI RMSE:", sqrt(mean_squared_error(pred_gcsi, gCSI_ic50_drug.values)))
    print("*****************************************************************")
    print("GDSC Spearman correlation and p-value:", spearmanr(pred_gdsc, GDSC_ic50_drug.values))
    print("gCSI Spearman correlation and p-value:", spearmanr(pred_gcsi, gCSI_ic50_drug.values))
    print("GDSC Kendall correlation and p-value:", kendalltau(pred_gdsc, GDSC_ic50_drug.values))
    print("gCSI Kendall correlation and p-value:", kendalltau(pred_gcsi, gCSI_ic50_drug.values))    
    print("-----------------------------------------------------------------")

Bortezomib
GDSC Pearson correlation and p-value: (0.27688424693040553, 2.5223731302517667e-05)
GDSC RMSE: 1.2226627626608644
gCSI Pearson correlation and p-value: (0.2312884723523337, 9.661826138100106e-05)
gCSI RMSE: 2.2704259133410285
*****************************************************************
GDSC Spearman correlation and p-value: SpearmanrResult(correlation=0.23212020567390346, pvalue=0.00044729957453735147)
gCSI Spearman correlation and p-value: SpearmanrResult(correlation=0.19110554874073094, pvalue=0.0013395347149886111)
GDSC Kendall correlation and p-value: KendalltauResult(correlation=0.15743149925513442, pvalue=0.000451340845874519)
gCSI Kendall correlation and p-value: KendalltauResult(correlation=0.13081147984837938, pvalue=0.001125825729964234)
-----------------------------------------------------------------
Entinostat
GDSC Pearson correlation and p-value: (0.5406671073696168, 6.691383434586079e-15)
GDSC RMSE: 1.1405597872819906
gCSI Pearson correlation and p-value:

  c /= stddev[:, None]
  c /= stddev[None, :]
  return (a < x) & (x < b)
  return (a < x) & (x < b)
  cond2 = cond0 & (x <= _a)


Sirolimus
GDSC Pearson correlation and p-value: (0.140874676110059, 0.4745901852713202)
GDSC RMSE: 9.17771110229334
gCSI Pearson correlation and p-value: (0.0908024872694426, 0.5930102603158804)
gCSI RMSE: 9.610915990712746
*****************************************************************
GDSC Spearman correlation and p-value: SpearmanrResult(correlation=0.12331354672241977, pvalue=0.5318675781747372)
gCSI Spearman correlation and p-value: SpearmanrResult(correlation=0.09769004823103436, pvalue=0.5651501315094778)
GDSC Kendall correlation and p-value: KendalltauResult(correlation=0.10482848367219182, pvalue=0.5048876687972792)
gCSI Kendall correlation and p-value: KendalltauResult(correlation=0.05522745708059766, pvalue=0.635861513886157)
-----------------------------------------------------------------
Docetaxel
GDSC Pearson correlation and p-value: (0.34855117632356536, 5.931252529603894e-07)
GDSC RMSE: 4.0260263416549
gCSI Pearson correlation and p-value: (0.3432826744532476, 1.0524

In [4]:
print("AAC Baseline results")
for drug in drugs:
    CTRP_aac_drug = CTRP_aac.loc[drug].dropna()
    GDSC_aac_drug = GDSC_aac.loc[drug].dropna()
    gCSI_aac_drug = gCSI_aac.loc[drug].dropna()

    idx_ctrpgdsc = GDSC_aac_drug.index.intersection(CTRP_aac_drug.index)
    idx_ctrpgdsc = CTRP_exprs.columns.intersection(idx_ctrpgdsc)
    idx_ctrpgdsc = GDSC_exprs.columns.intersection(idx_ctrpgdsc)
    idx_ctrpgdsc = [x for x in idx_ctrpgdsc if x not in idx_other_ctrp]
    idx_ctrpgdsc = [x for x in idx_ctrpgdsc if x not in idx_other_gdsc]
    
    idx_ctrpgcsi = CTRP_aac_drug.index.intersection(gCSI_aac_drug.index)
    idx_ctrpgcsi = CTRP_exprs.columns.intersection(idx_ctrpgcsi)
    idx_ctrpgcsi = gCSI_exprs.columns.intersection(idx_ctrpgcsi)
    idx_ctrpgcsi = [x for x in idx_ctrpgcsi if x not in idx_other_ctrp]    
    idx_ctrpgcsi = [x for x in idx_ctrpgcsi if x not in idx_other_gcsi]        
    
#     idx_gdscgcsi = gCSI_aac_drug.index.intersection(GDSC_aac_drug.index)
#     idx_gdscgcsi = [x for x in idx_gdscgcsi if x not in idx_other_gdsc]    
#     idx_gdscgcsi = [x for x in idx_gdscgcsi if x not in idx_other_gcsi]            
    
    CTRP_aac_drug_1 = CTRP_aac_drug.loc[idx_ctrpgdsc]
    GDSC_aac_drug_1 = GDSC_aac_drug.loc[idx_ctrpgdsc]
    print(drug)
    print("Baseline Pearson correlation between CTRPv2 and GDSCv2:", pearsonr(CTRP_aac_drug_1.values, GDSC_aac_drug_1.values))
    print("Baseline Spearman correlation between CTRPv2 and GDSCv2:", spearmanr(CTRP_aac_drug_1.values, GDSC_aac_drug_1.values))
    print("Baseline Kendall correlation between CTRPv2 and GDSCv2:", kendalltau(CTRP_aac_drug_1.values, GDSC_aac_drug_1.values))
    print("sample size of CTRPv2_GDSCv2:", len(idx_ctrpgdsc))  
    print("*************************************************")
    
    CTRP_aac_drug_2 = CTRP_aac_drug.loc[idx_ctrpgcsi]
    gCSI_aac_drug_2 = gCSI_aac_drug.loc[idx_ctrpgcsi]
    print("Baseline Pearson correlation between CTRPv2 and gCSI:", pearsonr(CTRP_aac_drug_2.values, gCSI_aac_drug_2.values))
    print("Baseline Spearman correlation between CTRPv2 and gCSI:", spearmanr(CTRP_aac_drug_2.values, gCSI_aac_drug_2.values))
    print("Baseline Kendall correlation between CTRPv2 and gCSI:", kendalltau(CTRP_aac_drug_2.values, gCSI_aac_drug_2.values))    
    print("sample size of CTRPv2_gCSI:", len(idx_ctrpgcsi))   
    print("*************************************************")
    
#     GDSC_aac_drug_3 = GDSC_aac_drug.loc[idx_gdscgcsi]
#     gCSI_aac_drug_3 = gCSI_aac_drug.loc[idx_gdscgcsi]
#     print("Baseline Pearson correlation between GDSCv2 and gCSI:", pearsonr(GDSC_aac_drug_3, gCSI_aac_drug_3))
#     print("Baseline Spearman correlation between GDSCv2 and gCSI:", spearmanr(GDSC_aac_drug_3, gCSI_aac_drug_3))
#     print("Baseline Kendall correlation between GDSCv2 and gCSI:", kendalltau(GDSC_aac_drug_3, gCSI_aac_drug_3))
#     print("sample size of GDSCv2_gCSI:", len(idx_gdscgcsi))    
    print("------------------------------------------------")


Bortezomib
Baseline Pearson correlation between CTRPv2 and GDSCv2: (0.5819405726803103, 0.00030599346703397097)
Baseline Spearman correlation between CTRPv2 and GDSCv2: SpearmanrResult(correlation=0.5819709702062642, pvalue=0.0003057179112928473)
Baseline Kendall correlation between CTRPv2 and GDSCv2: KendalltauResult(correlation=0.4188948306595366, pvalue=0.000494451852877984)
sample size of CTRPv2_GDSCv2: 34
*************************************************
Baseline Pearson correlation between CTRPv2 and gCSI: (0.3861733756578955, 2.267746121156382e-08)
Baseline Spearman correlation between CTRPv2 and gCSI: SpearmanrResult(correlation=0.36005865055901315, pvalue=2.1777438081296335e-07)
Baseline Kendall correlation between CTRPv2 and gCSI: KendalltauResult(correlation=0.24217687074829938, pvalue=4.6388394565878343e-07)
sample size of CTRPv2_gCSI: 196
*************************************************
------------------------------------------------
Entinostat
Baseline Pearson correlati

In [5]:
print("IC50 Baseline results")
for drug in drugs:
    CTRP_ic50_drug = CTRP_ic50.loc[drug].dropna()
    GDSC_ic50_drug = GDSC_ic50.loc[drug].dropna()
    gCSI_ic50_drug = gCSI_ic50.loc[drug].dropna()

    idx_ctrpgdsc = GDSC_ic50_drug.index.intersection(CTRP_ic50_drug.index)
    idx_ctrpgdsc = CTRP_exprs.columns.intersection(idx_ctrpgdsc)
    idx_ctrpgdsc = GDSC_exprs.columns.intersection(idx_ctrpgdsc)
    idx_ctrpgdsc = [x for x in idx_ctrpgdsc if x not in idx_other_ctrp]
    idx_ctrpgdsc = [x for x in idx_ctrpgdsc if x not in idx_other_gdsc]
    
    idx_ctrpgcsi = CTRP_ic50_drug.index.intersection(gCSI_ic50_drug.index)
    idx_ctrpgcsi = CTRP_exprs.columns.intersection(idx_ctrpgcsi)
    idx_ctrpgcsi = gCSI_exprs.columns.intersection(idx_ctrpgcsi)
    idx_ctrpgcsi = [x for x in idx_ctrpgcsi if x not in idx_other_ctrp]    
    idx_ctrpgcsi = [x for x in idx_ctrpgcsi if x not in idx_other_gcsi] 
    
#     idx_gdscgcsi = gCSI_ic50_drug.index.intersection(GDSC_ic50_drug.index)
#     idx_gdscgcsi = [x for x in idx_gdscgcsi if x not in idx_other_gdsc]    
#     idx_gdscgcsi = [x for x in idx_gdscgcsi if x not in idx_other_gcsi] 
    
    CTRP_ic50_drug_1 = CTRP_ic50_drug.loc[idx_ctrpgdsc]
    GDSC_ic50_drug_1 = GDSC_ic50_drug.loc[idx_ctrpgdsc]
    print(drug)
    print("Baseline Pearson correlation between CTRPv2 and GDSCv2:", pearsonr(CTRP_ic50_drug_1.values, GDSC_ic50_drug_1.values))
    print("Baseline Spearman correlation between CTRPv2 and GDSCv2:", spearmanr(CTRP_ic50_drug_1.values, GDSC_ic50_drug_1.values))
    print("Baseline Kendall correlation between CTRPv2 and GDSCv2:", kendalltau(CTRP_ic50_drug_1.values, GDSC_ic50_drug_1.values))    
    print("sample size of CTRPv2_GDSCv2:", len(idx_ctrpgdsc))
    print("*************************************************")    
    
    CTRP_ic50_drug_2 = CTRP_ic50_drug.loc[idx_ctrpgcsi]
    gCSI_ic50_drug_2 = gCSI_ic50_drug.loc[idx_ctrpgcsi]
    print("Baseline Pearson correlation between CTRPv2 and gCSI:", pearsonr(CTRP_ic50_drug_2.values, gCSI_ic50_drug_2.values))
    print("Baseline Spearman correlation between CTRPv2 and gCSI:", spearmanr(CTRP_ic50_drug_2.values, gCSI_ic50_drug_2.values))
    print("Baseline Kendall correlation between CTRPv2 and gCSI:", kendalltau(CTRP_ic50_drug_2.values, gCSI_ic50_drug_2.values))        
    print("sample size of CTRPv2_gCSI:", len(idx_ctrpgcsi))
    print("*************************************************")    
        
#     GDSC_ic50_drug_3 = GDSC_ic50_drug.loc[idx_gdscgcsi]
#     gCSI_ic50_drug_3 = gCSI_ic50_drug.loc[idx_gdscgcsi]
#     print("Baseline Pearson correlation between GDSCv2 and gCSI:", pearsonr(GDSC_ic50_drug_3, gCSI_ic50_drug_3))
#     print("Baseline Spearman correlation between GDSCv2 and gCSI:", spearmanr(GDSC_ic50_drug_3, gCSI_ic50_drug_3))
#     print("Baseline Kendall correlation between GDSCv2 and gCSI:", kendalltau(GDSC_ic50_drug_3, gCSI_ic50_drug_3))
#     print("sample size of GDSCv2_gCSI:", len(idx_gdscgcsi))
    print("------------------------------------------------")


IC50 Baseline results
Bortezomib
Baseline Pearson correlation between CTRPv2 and GDSCv2: (0.7334186053630858, 1.202496300975255e-06)
Baseline Spearman correlation between CTRPv2 and GDSCv2: SpearmanrResult(correlation=0.723823851445015, pvalue=1.9314296089088033e-06)
Baseline Kendall correlation between CTRPv2 and GDSCv2: KendalltauResult(correlation=0.5327016611045488, pvalue=1.3339978013772812e-05)
sample size of CTRPv2_GDSCv2: 33
*************************************************
Baseline Pearson correlation between CTRPv2 and gCSI: (0.3239490854144997, 5.12949851990103e-06)
Baseline Spearman correlation between CTRPv2 and gCSI: SpearmanrResult(correlation=0.2704432418057801, pvalue=0.00016079902518051215)
Baseline Kendall correlation between CTRPv2 and gCSI: KendalltauResult(correlation=0.1829067922400181, pvalue=0.00017885183288910867)
sample size of CTRPv2_gCSI: 190
*************************************************
------------------------------------------------
Entinostat
Baseli

  c /= stddev[:, None]
  c /= stddev[None, :]
  return (a < x) & (x < b)
  return (a < x) & (x < b)
  cond2 = cond0 & (x <= _a)


In [13]:
# Number of samples in different datasets for AAC and IC50
for drug in drugs:
    CTRP_aac_drug = CTRP_aac.loc[drug].dropna()
    GDSC_aac_drug = GDSC_aac.loc[drug].dropna()
    gCSI_aac_drug = gCSI_aac.loc[drug].dropna()

    idx_ctrp = CTRP_exprs.columns.intersection(CTRP_aac_drug.index)
    idx_ctrp = [x for x in idx_ctrp if x not in idx_other_ctrp]    
    idx_gdsc = GDSC_exprs.columns.intersection(GDSC_aac_drug.index)
    idx_gdsc = [x for x in idx_gdsc if x not in idx_other_gdsc]    
    idx_gcsi = gCSI_exprs.columns.intersection(gCSI_aac_drug.index)
    idx_gcsi = [x for x in idx_gcsi if x not in idx_other_gcsi]

    CTRP_exprs_drug = pd.DataFrame.transpose(CTRP_exprs.loc[:,idx_ctrp])
    CTRP_aac_drug = CTRP_aac_drug.loc[idx_ctrp]
    GDSC_exprs_drug = pd.DataFrame.transpose(GDSC_exprs.loc[:,idx_gdsc])
    GDSC_aac_drug = GDSC_aac_drug.loc[idx_gdsc]
    gCSI_exprs_drug = pd.DataFrame.transpose(gCSI_exprs.loc[:,idx_gcsi])
    gCSI_aac_drug = gCSI_aac_drug.loc[idx_gcsi]
    
    print(drug)
    print("number of AAC train samples in CTRPv2 is:", len(CTRP_aac_drug))
    print("number of AAC test samples in GDSCv2 is:", len(GDSC_aac_drug))
    print("number of AAC test samples in gCSI is:", len(gCSI_aac_drug))
    print("************************************************")    

    
    CTRP_ic50_drug = CTRP_ic50.loc[drug].dropna()
    GDSC_ic50_drug = GDSC_ic50.loc[drug].dropna()
    gCSI_ic50_drug = gCSI_ic50.loc[drug].dropna()

    idx_ctrp = CTRP_exprs.columns.intersection(CTRP_ic50_drug.index)
    idx_ctrp = [x for x in idx_ctrp if x not in idx_other_ctrp]
    idx_gdsc = GDSC_exprs.columns.intersection(GDSC_ic50_drug.index)
    idx_gdsc = [x for x in idx_gdsc if x not in idx_other_gdsc]
    idx_gcsi = gCSI_exprs.columns.intersection(gCSI_ic50_drug.index)
    idx_gcsi = [x for x in idx_gcsi if x not in idx_other_gcsi]

    CTRP_ic50_drug = CTRP_ic50_drug.loc[idx_ctrp]
    GDSC_ic50_drug = GDSC_ic50_drug.loc[idx_gdsc]
    gCSI_ic50_drug = gCSI_ic50_drug.loc[idx_gcsi]
    
    print("number of IC50 train samples in CTRPv2 is:", len(CTRP_ic50_drug))
    print("number of IC50 test samples in GDSCv2 is:", len(GDSC_ic50_drug))
    print("number of IC50 test samples in gCSI is:", len(gCSI_ic50_drug))
    print("------------------------------------------------")


Bortezomib
number of AAC train samples in CTRPv2 is: 556
number of AAC test samples in GDSCv2 is: 231
number of AAC test samples in gCSI is: 280
************************************************
number of IC50 train samples in CTRPv2 is: 536
number of IC50 test samples in GDSCv2 is: 225
number of IC50 test samples in gCSI is: 279
------------------------------------------------
Entinostat
number of AAC train samples in CTRPv2 is: 540
number of AAC test samples in GDSCv2 is: 226
number of AAC test samples in gCSI is: 280
************************************************
number of IC50 train samples in CTRPv2 is: 512
number of IC50 test samples in GDSCv2 is: 178
number of IC50 test samples in gCSI is: 55
------------------------------------------------
Sirolimus
number of AAC train samples in CTRPv2 is: 584
number of AAC test samples in GDSCv2 is: 226
number of AAC test samples in gCSI is: 280
************************************************
number of IC50 train samples in CTRPv2 is: 318
n