This notebook divides the raw data into labelled and unlabelled based on whether a PFS/RECIST/AUC value is available.

In [1]:
import pandas as pd
import numpy as np

In [2]:
import pickle as pickle

In [3]:
response_data_dir = "/data/papers_data/systematic_assessment/raw/response_files/"

### Raw data files - response files

#### RECIST response files

In [4]:
# TCGA
tcga_df = pd.read_csv(f"{response_data_dir}/tcga_recist.csv")
print(tcga_df.shape)
tcga_df.head()

(611, 6)


Unnamed: 0,sample_id,drug_name,recist_like,days_to_drug_therapy_start,days_to_drug_therapy_end,drug_category
0,TCGA-VQ-A928,FLUOROURACIL,Clinical Progressive Disease,98,138,1
1,TCGA-JY-A93D,FLUOROURACIL,Complete Response,150,280,1
2,TCGA-P3-A6T3,CISPLATIN,Complete Response,61,92,1
3,TCGA-AZ-6606,FLUOROURACIL,Clinical Progressive Disease,165,165,1
4,TCGA-VS-A954,CISPLATIN,Complete Response,94,122,1


In [5]:
tcga_df.drug_name.value_counts()

drug_name
CISPLATIN           104
TEMOZOLOMIDE         99
GEMCITABINE          61
FLUOROURACIL         50
PACLITAXEL           40
DOCETAXEL            26
CAPECITABINE         24
DOXORUBICIN          20
CARBOPLATIN          20
SORAFENIB            15
DACARBAZINE          13
CETUXIMAB            11
BEVACIZUMAB          11
TAMOXIFEN            11
SUNITINIB             8
BICALUTAMIDE          8
DASATINIB             7
ERLOTINIB             5
PAZOPANIB             5
CARMUSTINE            4
BLEOMYCIN             4
PEMETREXED            4
IFOSFAMIDE            4
LETROZOLE             4
OXALIPLATIN           4
EXEMESTANE            4
MITOMYCIN             4
TOPOTECAN             3
TRASTUZUMAB           3
MELPHALAN             3
LOMUSTINE             3
ETOPOSIDE             3
VINORELBINE           3
VORINOSTAT            2
BCG                   2
GEFITINIB             2
AXITINIB              2
CYCLOPHOSPHAMIDE      2
IMATINIB              2
IRINOTECAN            2
TEMSIROLIMUS          2
VINCRI

In [6]:
# Moore's
moores_df = pd.read_csv(f"{response_data_dir}/moores_recist.csv")
print(moores_df.shape)
moores_df.head()

(44, 4)


Unnamed: 0,patient_id,drug_name,recist,drug_category
0,2,LAPATINIB,1,1
1,3,TAMOXIFEN,1,1
2,6,PONATINIB,0,1
3,7,BEVACIZUMAB,0,1
4,8,LAPATINIB,0,1


In [7]:
moores_df.drug_name.value_counts()

drug_name
BEVACIZUMAB     12
ERLOTINIB        4
EXEMESTANE       4
LAPATINIB        3
TRASTUZUMAB      3
TAMOXIFEN        2
TRAMETINIB       2
PAZOPANIB        2
SORAFENIB        2
LETROZOLE        2
VISMODEGIB       1
DASATINIB        1
DABRAFENIB       1
PONATINIB        1
CRIZOTINIB       1
CABOZANTINIB     1
RUXOLITINIB      1
PALBOCICLIB      1
Name: count, dtype: int64

In [8]:
# CBIO (hcc_mskimpact_2018)
cbio_hcc_mskimpact_2018_df = pd.read_csv(f"{response_data_dir}/cbio_hcc_mskimpact_2018_recist.csv")
print(cbio_hcc_mskimpact_2018_df.shape)
cbio_hcc_mskimpact_2018_df.head()

(43, 4)


Unnamed: 0,sample_id,drug_name,recist_like,drug_category
0,P-0005038-T02-IM6,SORAFENIB,SD,1
1,P-0015203-T01-IM6,SORAFENIB,SD,1
2,P-0019058-T01-IM6,SORAFENIB,PD,1
3,P-0019238-T01-IM6,SORAFENIB,PD,1
4,P-0020359-T01-IM6,SORAFENIB,PD,1


In [9]:
cbio_hcc_mskimpact_2018_df = cbio_hcc_mskimpact_2018_df[~(cbio_hcc_mskimpact_2018_df.recist_like == "NE")].reset_index(drop=True)
cbio_hcc_mskimpact_2018_df

Unnamed: 0,sample_id,drug_name,recist_like,drug_category
0,P-0005038-T02-IM6,SORAFENIB,SD,1
1,P-0015203-T01-IM6,SORAFENIB,SD,1
2,P-0019058-T01-IM6,SORAFENIB,PD,1
3,P-0019238-T01-IM6,SORAFENIB,PD,1
4,P-0020359-T01-IM6,SORAFENIB,PD,1
5,P-0001324-T01-IM3,SORAFENIB,SD,1
6,P-0003212-T01-IM5,SORAFENIB,PD,1
7,P-0005757-T01-IM5,SORAFENIB,PD,1
8,P-0006245-T02-IM5,SORAFENIB,PD,1
9,P-0012628-T01-IM5,SORAFENIB,PR,1


In [10]:
cbio_hcc_mskimpact_2018_df.drug_name.value_counts()

drug_name
SORAFENIB    43
Name: count, dtype: int64

In [11]:
# cbio_hcc_mskimpact_2018_df.to_csv(f"{response_data_dir}/cbio_hcc_mskimpact_2018_recist.csv", index=False)

In [12]:
# CBIO (brca_mskcc_2019)
cbio_brca_mskcc_2019_df = pd.read_csv(f"{response_data_dir}/cbio_brca_mskcc_2019_recist.csv")
print(cbio_brca_mskcc_2019_df.shape)
cbio_brca_mskcc_2019_df.head()

(27, 4)


Unnamed: 0,sample_id,drug_name,recist_like,drug_category
0,s_DS_bkm_001_T,BUPARLISIB,Stable Disease,2
1,s_DS_bkm_002_T,BUPARLISIB,Stable Disease,2
2,s_DS_bkm_003_T,BUPARLISIB,Partial Response,2
3,s_DS_bkm_005_T,BUPARLISIB,Complete Response,2
4,s_DS_bkm_006_T,BUPARLISIB,Stable Disease,2


In [13]:
cbio_brca_mskcc_2019_df = cbio_brca_mskcc_2019_df[~(cbio_brca_mskcc_2019_df.recist_like == "Not Evaluable")].reset_index(drop=True)
cbio_brca_mskcc_2019_df

Unnamed: 0,sample_id,drug_name,recist_like,drug_category
0,s_DS_bkm_001_T,BUPARLISIB,Stable Disease,2
1,s_DS_bkm_002_T,BUPARLISIB,Stable Disease,2
2,s_DS_bkm_003_T,BUPARLISIB,Partial Response,2
3,s_DS_bkm_005_T,BUPARLISIB,Complete Response,2
4,s_DS_bkm_006_T,BUPARLISIB,Stable Disease,2
5,s_DS_bkm_007_T,BUPARLISIB,Stable Disease,2
6,s_DS_bkm_008_T,BUPARLISIB,Stable Disease,2
7,s_DS_bkm_009_T,BUPARLISIB,Stable Disease,2
8,s_DS_bkm_010_T,BUPARLISIB,Progressive Disease,2
9,s_DS_bkm_013_T,BUPARLISIB,Stable Disease,2


In [14]:
cbio_brca_mskcc_2019_df.drug_name.value_counts()

drug_name
BUPARLISIB    27
Name: count, dtype: int64

In [15]:
# cbio_brca_mskcc_2019_df.to_csv(f"{response_data_dir}/cbio_brca_mskcc_2019_recist.csv", index=False)

#### AUDRC response file

In [16]:
ccle_df = pd.read_csv(f"{response_data_dir}/ccle_auc.csv")
print(ccle_df.shape)
ccle_df.head()

(224495, 5)


Unnamed: 0,sample_id,drug_name,auc,ic50,drug_category
0,PR-00UtU3,5-FLUOROURACIL,0.952134,4.327152,1
1,PR-00UtU3,ABT737,0.59538,-0.445402,3
2,PR-00UtU3,ACETALAX,0.982784,5.474095,3
3,PR-00UtU3,AFATINIB,0.824703,1.064304,1
4,PR-00UtU3,AFURESERTIB,0.901798,2.332643,2


In [17]:
ccle_df.drug_name.value_counts()

drug_name
STAUROSPORINE    1197
MG-132           1197
PALBOCICLIB      1196
NAVITOCLAX       1196
MK-2206          1196
                 ... 
IOX2              264
SL0101            264
CHIR-99021        264
CCT-018159        264
TRETINOIN         264
Name: count, Length: 211, dtype: int64

#### PFS response file

In [45]:
# GENIE NSCLC
genie_nsclc_df = pd.read_csv(f"{response_data_dir}/genie_nsclc_pfs.csv")
print(genie_nsclc_df.shape)
genie_nsclc_df.head()

(71, 8)


Unnamed: 0,sample_id,REGIMEN_NUMBER,drug_name,drug_start_date,drug_end_date,pfs_status,pfs_days,category
0,GENIE-DFCI-003908-234520,2.0,ALECTINIB,1488.0,2419.0,1.0,864.0,1.0
1,GENIE-DFCI-037921-36048,1.0,CRIZOTINIB,497.0,566.0,1.0,83.0,1.0
2,GENIE-DFCI-033743-88529,3.0,DOCETAXEL,582.0,687.0,1.0,126.0,1.0
3,GENIE-DFCI-090170-266320,1.0,ALECTINIB,35.0,35.0,0.0,942.0,1.0
4,GENIE-DFCI-078051-298963,5.0,CERITINIB,4593.0,4638.0,1.0,2.0,1.0


In [46]:
genie_nsclc_df.drug_name.value_counts()

drug_name
CRIZOTINIB     25
OSIMERTINIB    22
GEFITINIB       6
ALECTINIB       6
BEVACIZUMAB     5
DOCETAXEL       5
CERITINIB       1
CARBOPLATIN     1
Name: count, dtype: int64

In [47]:
# GENIE CRC
genie_crc_df = pd.read_csv(f"{response_data_dir}/genie_crc_pfs.csv")
print(genie_crc_df.shape)
genie_crc_df.head()

(71, 8)


Unnamed: 0,sample_id,REGIMEN_NUMBER,drug_name,drug_start_date,drug_end_date,pfs_status,pfs_days,category
0,GENIE-DFCI-008889-6970,1.0,BEVACIZUMAB,23.0,23.0,0.0,56.0,1.0
1,GENIE-DFCI-008759-7792,1.0,CAPECITABINE,73.0,272.0,1.0,21.0,1.0
2,GENIE-DFCI-009942-7682,2.0,CARBOPLATIN,570.0,570.0,0.0,56.0,1.0
3,GENIE-DFCI-007011-11659,1.0,FLUOROURACIL,262.0,440.0,0.0,206.0,1.0
4,GENIE-DFCI-011252-11577,1.0,CETUXIMAB,457.0,527.0,0.0,142.0,1.0


In [48]:
genie_crc_df.drug_name.value_counts()

drug_name
CAPECITABINE    39
FLUOROURACIL    12
MITOMYCIN        8
BEVACIZUMAB      6
PANITUMUMAB      2
CETUXIMAB        2
CARBOPLATIN      1
ETOPOSIDE        1
Name: count, dtype: int64

In [20]:
# 3 folds for train-test
# Each train fold - split into train and validate - use validate for hyperparam tuning
# downstream analysis on one fold split

### Raw data files - mutation files

In [18]:
mutation_file_path = "/data//papers_data/systematic_assessment/raw/mutation_files/"
raw_path = "/data/druid_data/"

In [19]:
# TCGA
tcga_input_df = pd.read_csv(f"{raw_path}/Tcga/patient_gene_alteration(mutation).csv")
len(tcga_input_df.patient_id.unique())

10173

In [20]:
tcga_input_df

Unnamed: 0,patient_id,gene,alteration
0,TCGA-50-5931,CAMTA1,V870E
1,TCGA-50-5931,CATSPER4,P365=
2,TCGA-50-5931,KDF1,I243T
3,TCGA-50-5931,CSMD2,T417S
4,TCGA-50-5931,SFPQ,G647C
...,...,...,...
3093849,TCGA-YD-A9TA,CNGA2,G303G
3093850,TCGA-YD-A9TA,MAGEA12,R243R
3093851,TCGA-YD-A9TA,ZNF275,L224L
3093852,TCGA-YD-A9TA,L1CAM,P279P


In [21]:
labelled_tcga_df = tcga_input_df[tcga_input_df["patient_id"].isin(tcga_df.sample_id)].reset_index(drop=True)
print(f"No of patients: {len(labelled_tcga_df.patient_id.unique())}")
labelled_tcga_df.head()

No of patients: 549


Unnamed: 0,patient_id,gene,alteration
0,TCGA-VQ-A928,EPHA2,G75=
1,TCGA-VQ-A928,ELAVL4,G138C
2,TCGA-VQ-A928,SLC35D1,G30=
3,TCGA-VQ-A928,SRSF11,G30=
4,TCGA-VQ-A928,CD5L,S207T


In [22]:
unlabelled_tcga_df = tcga_input_df[~tcga_input_df["patient_id"].isin(tcga_df.sample_id)].reset_index(drop=True)
print(f"No of patients: {len(unlabelled_tcga_df.patient_id.unique())}")
unlabelled_tcga_df.head()

No of patients: 9624


Unnamed: 0,patient_id,gene,alteration
0,TCGA-50-5931,CAMTA1,V870E
1,TCGA-50-5931,CATSPER4,P365=
2,TCGA-50-5931,KDF1,I243T
3,TCGA-50-5931,CSMD2,T417S
4,TCGA-50-5931,SFPQ,G647C


In [23]:
# labelled_tcga_df.to_csv(f"{mutation_file_path}/labelled_tcga_patient_gene_alteration_mutation.csv", index=False)
# unlabelled_tcga_df.to_csv(f"{mutation_file_path}/unlabelled_tcga_patient_gene_alteration_mutation.csv", index=False)

In [24]:
# Moore's
moores_input_df = pd.read_csv(f"{raw_path}/Moores/patient_gene_alteration(mutation).csv")
len(moores_input_df.patient_id.unique())

82

In [25]:
moores_input_df

Unnamed: 0,patient_id,gene,alteration
0,1,PTEN,splice site 493-1 G>A
1,2,TP53,P151A
2,3,ESR1,Y537S
3,4,PTEN,I67K
4,4,CTNNB1,T257I
...,...,...,...
220,84,GATA3,G335fs*18
221,85,TP53,H168R
222,85,GATA3,N332fs*21
223,86,MLL2,A4571T


In [26]:
labelled_moores_df = moores_input_df[moores_input_df["patient_id"].isin(moores_df.patient_id)].reset_index(drop=True)
print(f"No of patients: {len(labelled_moores_df.patient_id.unique())}")
labelled_moores_df.head()

No of patients: 42


Unnamed: 0,patient_id,gene,alteration
0,2,TP53,P151A
1,3,ESR1,Y537S
2,6,APC,I1307K
3,6,ARID1A,P2139fs*62
4,6,TP53,F113C


In [27]:
unlabelled_moores_df = moores_input_df[~moores_input_df["patient_id"].isin(moores_df.patient_id)].reset_index(drop=True)
print(f"No of patients: {len(unlabelled_moores_df.patient_id.unique())}")
unlabelled_moores_df.head()

No of patients: 40


Unnamed: 0,patient_id,gene,alteration
0,1,PTEN,splice site 493-1 G>A
1,4,PTEN,I67K
2,4,CTNNB1,T257I
3,5,PIK3CA,E545K
4,5,TP53,D228fs*1


In [25]:
# labelled_moores_df.to_csv(f"{mutation_file_path}/labelled_moores_patient_gene_alteration_mutation.csv", index=False)
# unlabelled_moores_df.to_csv(f"{mutation_file_path}/unlabelled_moores_patient_gene_alteration_mutation.csv", index=False)

In [28]:
# CBIO (hcc_mskimpact_2018)
cbio_hcc_mskimpact_2018_input_df = pd.read_csv(f"{raw_path}/CBIO/hcc_mskimpact_2018/patient_gene_alteration(mutation).csv")
len(cbio_hcc_mskimpact_2018_input_df.patient_id.unique())

120

In [29]:
cbio_hcc_mskimpact_2018_input_df

Unnamed: 0,patient_id,gene,alteration
0,P-0005038-T02-IM6,TNFRSF14,Q242R
1,P-0005038-T02-IM6,JAK1,S729C
2,P-0005038-T02-IM6,MEN1,X224_splice
3,P-0005038-T02-IM6,ALK,E717K
4,P-0015203-T01-IM6,ZRSR2,C172S
...,...,...,...
531,P-0012182-T01-IM5,NEGR1,Q8L
532,P-0012182-T01-IM5,SETD2,S2479A
533,P-0012182-T01-IM5,POLE,V544M
534,P-0012182-T01-IM5,AXIN1,E291*


In [30]:
labelled_cbio_hcc_mskimpact_2018_df = cbio_hcc_mskimpact_2018_input_df[cbio_hcc_mskimpact_2018_input_df["patient_id"].isin(cbio_hcc_mskimpact_2018_df.sample_id)].reset_index(drop=True)
print(f"No of patients: {len(labelled_cbio_hcc_mskimpact_2018_df.patient_id.unique())}")
labelled_cbio_hcc_mskimpact_2018_df.head()

No of patients: 43


Unnamed: 0,patient_id,gene,alteration
0,P-0005038-T02-IM6,TNFRSF14,Q242R
1,P-0005038-T02-IM6,JAK1,S729C
2,P-0005038-T02-IM6,MEN1,X224_splice
3,P-0005038-T02-IM6,ALK,E717K
4,P-0015203-T01-IM6,ZRSR2,C172S


In [31]:
unlabelled_cbio_hcc_mskimpact_2018_df = cbio_hcc_mskimpact_2018_input_df[~cbio_hcc_mskimpact_2018_input_df["patient_id"].isin(cbio_hcc_mskimpact_2018_df.sample_id)].reset_index(drop=True)
print(f"No of patients: {len(unlabelled_cbio_hcc_mskimpact_2018_df.patient_id.unique())}")
unlabelled_cbio_hcc_mskimpact_2018_df.head()

No of patients: 77


Unnamed: 0,patient_id,gene,alteration
0,P-0015581-T01-IM6,PTPN11,G503V
1,P-0015687-T01-IM6,SUFU,T13Wfs*29
2,P-0015687-T01-IM6,PTPRS,R34K
3,P-0015687-T01-IM6,PTPRT,K758T
4,P-0015687-T01-IM6,RB1,Y325Sfs*8


In [60]:
# labelled_cbio_hcc_mskimpact_2018_df.to_csv(f"{mutation_file_path}/labelled_cbio_hcc_mskimpact_2018_patient_gene_alteration_mutation.csv", index=False)
# unlabelled_cbio_hcc_mskimpact_2018_df.to_csv(f"{mutation_file_path}/unlabelled_cbio_hcc_mskimpact_2018_patient_gene_alteration_mutation.csv", index=False)

In [32]:
# CBIO (brca_mskcc_2019)
cbio_brca_mskcc_2019_input_df = pd.read_csv(f"{raw_path}/CBIO/brca_mskcc_2019/patient_gene_alteration(mutation).csv")
len(cbio_brca_mskcc_2019_input_df.patient_id.unique())

70

In [33]:
cbio_brca_mskcc_2019_input_df

Unnamed: 0,patient_id,gene,alteration
0,s_DS_bkm_077_T,VTCN1,S192L
1,s_DS_bkm_078_T2,NOTCH2,D1582N
2,s_DS_bkm_078_T1,NOTCH2,D1582N
3,s_DS_bkm_074_T,NOTCH2,T1303P
4,s_DS_bkm_064_T2,NOTCH2,P6Rfs*27
...,...,...,...
653,s_DS_bkm_058_T,NCOR1,A750V
654,s_DS_bkm_058_T,BCOR,N193T
655,s_DS_bkm_059_T,SF3B1,I641V
656,s_DS_bkm_059_T,ESR1,L536R


In [34]:
labelled_cbio_brca_mskcc_2019_df = cbio_brca_mskcc_2019_input_df[cbio_brca_mskcc_2019_input_df["patient_id"].isin(cbio_brca_mskcc_2019_df.sample_id)].reset_index(drop=True)
print(f"No of patients: {len(labelled_cbio_brca_mskcc_2019_df.patient_id.unique())}")
labelled_cbio_brca_mskcc_2019_df.head()

No of patients: 27


Unnamed: 0,patient_id,gene,alteration
0,s_DS_bkm_001_T,ALK,R291C
1,s_DS_bkm_001_T,TGFBR2,S527I
2,s_DS_bkm_001_T,PDGFRA,K304N
3,s_DS_bkm_001_T,TET2,T995P
4,s_DS_bkm_001_T,MLL,V1238M


In [35]:
unlabelled_cbio_brca_mskcc_2019_df = cbio_brca_mskcc_2019_input_df[~cbio_brca_mskcc_2019_input_df["patient_id"].isin(cbio_brca_mskcc_2019_df.sample_id)].reset_index(drop=True)
print(f"No of patients: {len(unlabelled_cbio_brca_mskcc_2019_df.patient_id.unique())}")
unlabelled_cbio_brca_mskcc_2019_df.head()

No of patients: 43


Unnamed: 0,patient_id,gene,alteration
0,s_DS_bkm_077_T,VTCN1,S192L
1,s_DS_bkm_078_T2,NOTCH2,D1582N
2,s_DS_bkm_078_T1,NOTCH2,D1582N
3,s_DS_bkm_074_T,NOTCH2,T1303P
4,s_DS_bkm_064_T2,NOTCH2,P6Rfs*27


In [66]:
# labelled_cbio_brca_mskcc_2019_df.to_csv(f"{mutation_file_path}/labelled_cbio_brca_mskcc_2019_patient_gene_alteration_mutation.csv", index=False)
# unlabelled_cbio_brca_mskcc_2019_df.to_csv(f"{mutation_file_path}/unlabelled_cbio_brca_mskcc_2019_patient_gene_alteration_mutation.csv", index=False)

In [36]:
# CCLE
ccle_input_df = pd.read_csv(f"{raw_path}/CCLE_23Q4/patient_gene_alteration(mutation).csv")
len(ccle_input_df.depmap_id.unique())

2335

In [37]:
ccle_input_df

Unnamed: 0,depmap_id,gene,alteration
0,PR-sxFiuq,SAMD11,L76V
1,PR-DNEoiz,SAMD11,P107S
2,PR-2ei6MD,SAMD11,E160K
3,PR-CYz5sB,SAMD11,A218V
4,PR-xcsbEI,SAMD11,N285S
...,...,...,...
885436,PR-MX9ndc,KDM5D,R68H
885437,PR-AiAKPa,EIF1AY,D83Y
885438,PR-MX9ndc,RPS4Y2,T115A
885439,PR-Bs4EcD,RPS4Y2,P152S


In [38]:
labelled_ccle_df = ccle_input_df[ccle_input_df["depmap_id"].isin(ccle_df.sample_id)].reset_index(drop=True)
print(f"No of patients: {len(labelled_ccle_df.depmap_id.unique())}")
labelled_ccle_df.head()

No of patients: 1197


Unnamed: 0,depmap_id,gene,alteration
0,PR-sxFiuq,SAMD11,L76V
1,PR-DNEoiz,SAMD11,P107S
2,PR-xcsbEI,SAMD11,N285S
3,PR-ppRdDh,SAMD11,R295P
4,PR-HN3C6C,NOC2L,E728GfsTer49


In [39]:
unlabelled_ccle_df = ccle_input_df[~ccle_input_df["depmap_id"].isin(ccle_df.sample_id)].reset_index(drop=True)
print(f"No of patients: {len(unlabelled_ccle_df.depmap_id.unique())}")
unlabelled_ccle_df.head()

No of patients: 1138


Unnamed: 0,depmap_id,gene,alteration
0,PR-2ei6MD,SAMD11,E160K
1,PR-CYz5sB,SAMD11,A218V
2,PR-oj10m2,SAMD11,C340Y
3,PR-YK2xaU,NOC2L,A385T
4,PR-yDgpga,NOC2L,A385T


In [40]:
# labelled_ccle_df.to_csv(f"{mutation_file_path}/labelled_ccle_patient_gene_alteration_mutation.csv", index=False)
# unlabelled_ccle_df.to_csv(f"{mutation_file_path}/unlabelled_ccle_patient_gene_alteration_mutation.csv", index=False)

In [41]:
# GENIE NSCLC
genie_nsclc_input_df = pd.read_csv(f"{raw_path}/NSCLC/patient_gene_alteration(mutation).csv")
len(genie_nsclc_input_df.patient_id.unique())

1943

In [42]:
genie_nsclc_input_df

Unnamed: 0,patient_id,gene,alteration
0,GENIE-DFCI-003908-234520,RAD50,*31*
1,GENIE-DFCI-003908-234520,ARID2,Q1227R
2,GENIE-DFCI-003908-234520,FANCB,L43I
3,GENIE-DFCI-003908-234520,SETD2,E1971Kfs*35
4,GENIE-DFCI-003908-234520,POLD1,V553I
...,...,...,...
17341,GENIE-VICC-780278-unk-1,WISP3,S352Y
17342,GENIE-VICC-120723-unk-1,WT1,V73M
17343,GENIE-VICC-287735-unk-2,WT1,P132L
17344,GENIE-VICC-780278-unk-1,WT1,X447_splice


In [49]:
labelled_genie_nsclc_df = genie_nsclc_input_df[genie_nsclc_input_df["patient_id"].isin(genie_nsclc_df.sample_id)].reset_index(drop=True)
print(f"No of patients: {len(labelled_genie_nsclc_df.patient_id.unique())}")
labelled_genie_nsclc_df.head()

No of patients: 71


Unnamed: 0,patient_id,gene,alteration
0,GENIE-DFCI-003908-234520,RAD50,*31*
1,GENIE-DFCI-003908-234520,ARID2,Q1227R
2,GENIE-DFCI-003908-234520,FANCB,L43I
3,GENIE-DFCI-003908-234520,SETD2,E1971Kfs*35
4,GENIE-DFCI-003908-234520,POLD1,V553I


In [50]:
unlabelled_genie_nsclc_df = genie_nsclc_input_df[~genie_nsclc_input_df["patient_id"].isin(genie_nsclc_df.sample_id)].reset_index(drop=True)
print(f"No of patients: {len(unlabelled_genie_nsclc_df.patient_id.unique())}")
unlabelled_genie_nsclc_df.head()

No of patients: 1872


Unnamed: 0,patient_id,gene,alteration
0,GENIE-DFCI-002183-6917,EWSR1,P568S
1,GENIE-DFCI-002183-6917,EXT1,F193V
2,GENIE-DFCI-002183-6917,EZH2,E54Kfs*3
3,GENIE-DFCI-002183-6917,FLT1,Y584F
4,GENIE-DFCI-002183-6917,APC,S1559F


In [44]:
# labelled_genie_nsclc_df.to_csv(f"{mutation_file_path}/labelled_genie_nsclc_patient_gene_alteration_mutation.csv", index=False)
# unlabelled_genie_nsclc_df.to_csv(f"{mutation_file_path}/unlabelled_genie_nsclc_patient_gene_alteration_mutation.csv", index=False)

In [48]:
# GENIE CRC
genie_crc_input_df = pd.read_csv(f"{raw_path}/GenieCRC/patient_gene_alteration(mutation).csv")
len(genie_crc_input_df.patient_id.unique())

1532

In [49]:
genie_crc_input_df

Unnamed: 0,patient_id,gene,alteration
0,GENIE-DFCI-002643-6598,PALB2,*16*
1,GENIE-DFCI-002643-6598,FBXW7,E287Q
2,GENIE-DFCI-002643-6598,EGFR,R252C
3,GENIE-DFCI-002643-6598,PSMD13,D175H
4,GENIE-DFCI-002643-6598,NRAS,G12D
...,...,...,...
23060,GENIE-VICC-182499-unk-1,ZNF703,D403_P404insAPRRLQLLHLQRAD
23061,GENIE-VICC-397091-unk-1,ZNF703,D403_P404insAPRRLQLLHLQRAD
23062,GENIE-VICC-669338-unk-1,ZNF703,D403_P404insAPRRLQLLHLQRAD
23063,GENIE-VICC-669338-unk-1,ZNF703,A507delinsERP


In [50]:
labelled_genie_crc_df = genie_crc_input_df[genie_crc_input_df["patient_id"].isin(genie_crc_df.sample_id)].reset_index(drop=True)
print(f"No of patients: {len(labelled_genie_crc_df.patient_id.unique())}")
labelled_genie_crc_df.head()

No of patients: 71


Unnamed: 0,patient_id,gene,alteration
0,GENIE-DFCI-008889-6970,APC,E1577*
1,GENIE-DFCI-008889-6970,FANCG,T357M
2,GENIE-DFCI-008889-6970,PMS2,D526G
3,GENIE-DFCI-008889-6970,SOX9,S431Ffs*147
4,GENIE-DFCI-008889-6970,MSH6,I570T


In [51]:
unlabelled_genie_crc_df = genie_crc_input_df[~genie_crc_input_df["patient_id"].isin(genie_crc_df.sample_id)].reset_index(drop=True)
print(f"No of patients: {len(unlabelled_genie_crc_df.patient_id.unique())}")
unlabelled_genie_crc_df.head()

No of patients: 1461


Unnamed: 0,patient_id,gene,alteration
0,GENIE-DFCI-002643-6598,PALB2,*16*
1,GENIE-DFCI-002643-6598,FBXW7,E287Q
2,GENIE-DFCI-002643-6598,EGFR,R252C
3,GENIE-DFCI-002643-6598,PSMD13,D175H
4,GENIE-DFCI-002643-6598,NRAS,G12D


In [19]:
# labelled_genie_crc_df.to_csv(f"{mutation_file_path}/labelled_genie_crc_patient_gene_alteration_mutation.csv", index=False)
# unlabelled_genie_crc_df.to_csv(f"{mutation_file_path}/unlabelled_genie_crc_patient_gene_alteration_mutation.csv", index=False)

In [None]:
# Other unlabelled datasets copied over
# unlabelled_cbio_chol_msk_2018_patient_gene_alteration_mutation.csv
# unlabelled_cbio_crc_apc_impact_2020_patient_gene_alteration_mutation.csv
# unlabelled_cbio_egc_msk_2017_patient_gene_alteration_mutation.csv
# unlabelled_cbio_egc_msk_tp53_ccr_2022_patient_gene_alteration_mutation.csv
# unlabelled_cbio_gct_msk_2016_patient_gene_alteration_mutation.csv
# unlabelled_cbio_hcc_mskimpact_2018_patient_gene_alteration_mutation.csv
# unlabelled_cbio_hnc_mskcc_2016_patient_gene_alteration_mutation.csv
# unlabelled_cbio_ihch_msk_2021_patient_gene_alteration_mutation.csv
# unlabelled_cbio_odg_msk_2017_patient_gene_alteration_mutation.csv
# unlabelled_cbio_summit_2018_patient_gene_alteration_mutation.csv
# unlabelled_genie_aacr_patient_gene_alteration_mutation.csv
# unlabelled_icgc_brca_patient_gene_alteration_mutation.csv
# unlabelled_icgc_btca_patient_gene_alteration_mutation.csv
# unlabelled_mskimpact_patient_gene_alteration_mutation.csv
# unlabelled_cbio_breast_alpelisib_2020_patient_gene_alteration_mutation.csv
# unlabelled_cbio_egc_trap_msk_2020_patient_gene_alteration_mutation.csv

### Metadata files - Cancer type details for labelled patients and cell lines.

In [60]:
# TCGA labelled and unlabelled
tcga_metadata = pd.read_csv("/data/druid/data/raw/tcga_clinical_data/clinical.tsv", sep="\t", usecols=["case_submitter_id", "project_id"])
tcga_metadata.drop_duplicates(inplace=True)
tcga_metadata.reset_index(drop=True, inplace=True)
tcga_metadata

Unnamed: 0,case_submitter_id,project_id
0,TCGA-DD-AAVP,TCGA-LIHC
1,TCGA-KK-A7B2,TCGA-PRAD
2,TCGA-DC-6158,TCGA-READ
3,TCGA-DD-A4NP,TCGA-LIHC
4,TCGA-HQ-A5ND,TCGA-BLCA
...,...,...
11423,TCGA-BP-4790,TCGA-KIRC
11424,TCGA-N9-A4Q4,TCGA-UCS
11425,TCGA-RY-A847,TCGA-LGG
11426,TCGA-AB-2881,TCGA-LAML


In [61]:
len(set(tcga_input_df["patient_id"]))

10173

In [62]:
len(set(tcga_metadata["case_submitter_id"]) & set(tcga_input_df["patient_id"]))

10173

In [63]:
# tcga_metadata.to_csv("/data//papers_data/systematic_assessment/raw/metadata/tcga_metadata.csv")

In [54]:
# CCLE 
pd.read_csv("/data/yanrong/druid_data/CCLE/raw_data/DepMap23Q4/OmicsProfiles.csv")

Unnamed: 0,ProfileID,ModelCondition,ModelID,Datatype,WESKit
0,PR-00UtU3,MC-001131-kkJv,ACH-001131,wgs,
1,PR-01r7OM,MC-000957-Yckn,ACH-000957,rna,
2,PR-02XmLG,MC-002785-qo9e,ACH-002785,rna,
3,PR-04VvBz,MC-001289-BpdI,ACH-001289,wes,ICE
4,PR-09gmEI,MC-000520-YIm7,ACH-000520,rna,
...,...,...,...,...,...
3825,PR-zyM15A,MC-000796-MvBN,ACH-000796,wes,AGILENT
3826,PR-ZYtjVT,MC-000278-uC8l,ACH-000278,wgs,
3827,PR-ZzHTvC,MC-000231-tEoc,ACH-000231,rna,
3828,PR-ZzjQOA,MC-000310-adwH,ACH-000310,wes,ICE


In [55]:
m = pd.merge(pd.read_csv("/data/yanrong/druid_data/CCLE/raw_data/DepMap23Q4/OmicsProfiles.csv"), pd.read_csv("/data/yanrong/druid_data/CCLE/raw_data/DepMap23Q4/Model.csv"), on="ModelID")
m

Unnamed: 0,ProfileID,ModelCondition,ModelID,Datatype,WESKit,PatientID,CellLineName,StrippedCellLineName,DepmapModelType,OncotreeLineage,...,TissueOrigin,CCLEName,CatalogNumber,PlateCoating,ModelDerivationMaterial,PublicComments,WTSIMasterCellID,SangerModelID,COSMICID,LegacySubSubtype
0,PR-00UtU3,MC-001131-kkJv,ACH-001131,wgs,,PT-RgVcvG,MS-1,MSDASH1,MCC,Skin,...,,MS1_SKIN,,,,,674.0,SIDM00408,753594.0,
1,PR-01r7OM,MC-000957-Yckn,ACH-000957,rna,,PT-FXUrcz,LS 180,LS180,COAD,Bowel,...,,LS180_LARGE_INTESTINE,CL-187,,,,2081.0,SIDM00680,998189.0,
2,PR-02XmLG,MC-002785-qo9e,ACH-002785,rna,,PT-6sPicj,NCC-LMS1-C1,NCCLMS1C1,LMS,Soft Tissue,...,,,,,,Patient-derived model; Matches NCC-LMS1-X3-C1,,,,
3,PR-04VvBz,MC-001289-BpdI,ACH-001289,wes,ICE,PT-773uN4,COG-AR-359,COGAR359,ATRT,CNS/Brain,...,,COGAR359_SOFT_TISSUE,,,,,,,,
4,PR-09gmEI,MC-000520-YIm7,ACH-000520,rna,,PT-tv8Ku2,59M,59M,HGSOC,Ovary/Fallopian Tube,...,,59M_OVARY,,,,,,,,high_grade_serous
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
3825,PR-zyM15A,MC-000796-MvBN,ACH-000796,wes,AGILENT,PT-Tlf0ul,MCAS,MCAS,MOV,Ovary/Fallopian Tube,...,,MCAS_OVARY,JCRB0240,,,,,SIDM00008,,mucinous
3826,PR-ZYtjVT,MC-000278-uC8l,ACH-000278,wgs,,PT-BexFYH,COV362,COV362,HGSOC,Ovary/Fallopian Tube,...,,COV362_OVARY,7071910,,,,,,,high_grade_serous
3827,PR-ZzHTvC,MC-000231-tEoc,ACH-000231,rna,,PT-nIom71,KALS-1,KALS1,GB,CNS/Brain,...,,KALS1_CENTRAL_NERVOUS_SYSTEM,IFO50434,,,,1936.0,SIDM00613,907271.0,glioblastoma
3828,PR-ZzjQOA,MC-000310-adwH,ACH-000310,wes,ICE,PT-2Urqss,IMR-32,IMR32,NBL,Peripheral Nervous System,...,,IMR32_AUTONOMIC_GANGLIA,,,,,,SIDM00226,,


In [147]:
# m.to_csv("/data/papers_data/systematic_assessment/raw/metadata/labelled_ccle_metadata.csv")

In [44]:
# Moore's
set(moores_df.patient_id) - set(pd.read_csv("/data//druid/data/raw/moores_metadata.csv", skiprows=[1])["#"])

set()

In [None]:
# All other labelled datasets
# cbio_hcc_mskimpact_2018: Hepatocellular Carcinoma tumor (LIHC)
# cbio_brca_mskcc_2019: Breast Cancer (BRCA)
# genie_crc: Colorectal Cancer 
# genie_nsclc: Non small cell lung cancer

### Drug Fingerprint

In [None]:
# Get all drug fingerprints of drugs in cell lines and patients

In [152]:
all_drugs = set(tcga_df.drug_name) | set(moores_df.drug_name) | set(genie_nsclc_df.drug_name) | \
            set(genie_crc_df.drug_name) |  set(ccle_df.drug_name) | set(cbio_hcc_mskimpact_2018_df.drug_name) | \
            set(cbio_egc_trap_msk_2020_df.drug_name) | set(cbio_breast_alpelisib_2020_df.drug_name) | set(cbio_brca_mskcc_2019_df.drug_name)
len(all_drugs)

233

Drug SMILES were obtained from DrugBank or from https://www.guidetopharmacology.org/ (isomeric SMILES).

In [190]:
drug_smiles = pd.read_csv("/data/druid/data/raw/drug_smiles.csv", header=None, index_col=0)
drug_smiles

Unnamed: 0_level_0,1
0,Unnamed: 1_level_1
JW-7-24-1,COC1=CC(=CC(=C1)C2=CC3=C4C(=CN=C3C=C2)C=CC(=O)...
KIN001-260,C1CC1COC2=CC=CC(=O)C2=C3C=C(C(=C(N3)N)C#N)C4CC...
NSC-87877,C1=CC2=C(C(=O)C(=NNC3=CC4=C(C=C3)C=C(C=C4)S(=O...
GNE-317,CC1=C(SC2=C1N=C(N=C2N3CCOCC3)C4=CN=C(N=C4)N)C5...
NAVITOCLAX,CC1(CCC(=C(C1)CN2CCN(CC2)C3=CC=C(C=C3)C(=O)NS(...
...,...
SB590885,O=NC1=c2cc/c(=c/3\nc([nH]c3=c3cc[nH]cc3)c3ccc(...
STAUROSPORINE,[H][C@]1(C[C@@]2([H])O[C@](C)(N3C4=CC=CC=C4C4=...
TW 37,CC(C)C1=CC=CC=C1CC1=C(O)C(O)=C(O)C(=C1)C(=O)NC...
ULIXERTINIB,[H]N([C@H](CO)C1=CC(Cl)=CC=C1)C(=O)C1=CC(=CN1[...


In [None]:
# copy drug_smiles.csv to /data/papers_data/systematic_assessment/raw/metadata folder

In [173]:
drug_fp = pd.read_csv('/data//copied_from_cdal1/_home_folder/processed/drug_morgan_fingerprints.csv', index_col=0)
drug_fp

Unnamed: 0_level_0,0,1,2,3,4,5,6,7,8,9,...,2038,2039,2040,2041,2042,2043,2044,2045,2046,2047
drug_name,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
JW-7-24-1,0,1,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
KIN001-260,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
NSC-87877,0,0,0,0,0,0,0,0,0,0,...,1,0,0,0,0,0,0,0,0,0
GNE-317,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
NAVITOCLAX,0,1,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
LGH447,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
TRASTUZUMAB,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
WNT974,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
TRIFLURIDINE,0,0,0,0,1,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0


In [191]:
# Sanity check to ensure existing Morgan fingerprints match so that same strategy can be reused to generate all fingerprints.
# Convert SMILES to Morgan fingerprint
from rdkit.Chem import AllChem
from rdkit import Chem
smiles_list = [drug_smiles.loc[drug][1] for drug in drug_fp.index]
fpgen = AllChem.GetMorganGenerator(radius=2)
# create a list of mols
mols = []
for idx, smiles in enumerate(smiles_list):
    print(drug_fp.index[idx])
    mols.append(Chem.MolFromSmiles(smiles))
    

# create a list of fingerprints from mols
fps = [fpgen.GetFingerprint(mol) for mol in mols]
# print(fps)

from rdkit.Chem import DataStructs
for i in range(len(fps)):
    array = np.zeros((0, ), dtype=np.int8)
    DataStructs.ConvertToNumpyArray(fps[i], array)
    if not (array.nonzero()[0].all() == drug_fp.loc[drug_fp.index[i]].values.nonzero()[0].all()):
        print(f"Not same for {drug_fp.index[i]}")
    

JW-7-24-1
KIN001-260
NSC-87877
GNE-317
NAVITOCLAX
PLX-4720
ERK5-IN-1
VX-11E
TGX-221
DASATINIB
ZM-447439
CGP-082996
UPROSERTIB
DORAMAPIMOD
AST-1306
WIKI4
OLAPARIB
AGI-6780
CUDC-101
SB-505124
CP-724714
AZD8931
IMATINIB
LGK-974
NVP-BHG712
ZG-10
CX-5461
ERLOTINIB
CMK
LIMK1 INHIBITOR BMS4
AT13148
FTI-277
NVP-ADW742
ENZASTAURIN
NVP-TAE684
FULVESTRANT
WH-4-023
AZD7762
UMI-77
KIN001-244
QL-X-138
AFATINIB
PF-4708671
BICALUTAMIDE
METAP2 INHIBITOR, A832234
LINSITINIB
VISMODEGIB
FTY-720
5-FLUOROURACIL
BELINOSTAT
PEVONEDISTAT
P22077
PAZOPANIB
BUPARLISIB
SELUMETINIB
OXALIPLATIN
ALPELISIB
TAMOXIFEN
CAY10566
WEE1 INHIBITOR
AZ-628
PIPERLONGUMINE
LINIFANIB
GDC-0941
GSK2126458
VINORELBINE
LDN-193189
IBRUTINIB
TORIN-2
RU-SKI-43
UNC0642
JW-7-52-1
BOSUTINIB
AZ6102
GDC-0068
ETP-45835
AZD6738
MG-132
EPZ004777
LFM-A13
UNC0638
PAC-1
RUXOLITINIB
AS601245
GSK1070916
PELITINIB
GNF-2
TEMOZOLOMIDE
DABRAFENIB
OSI-027
IWP-2
AZD6482
XMD11-85H
JQ1-(+)
EPZ-5676
IOX2
DOXORUBICIN
A-770041
WHI-P97
NU-7441
AZD5363
TIVOZANIB


In [195]:
all_drugs - set(drug_fp.index) # drugs without fingerprints

{'ABT737',
 'AFURESERTIB',
 'AZD1208',
 'AZD1332',
 'AZD2014',
 'AZD5153',
 'AZD5438',
 'AZD5991',
 'BCG',
 'BEVACIZUMAB',
 'BLEOMYCIN (50 UM)',
 'BX795',
 'CAPECITABINE',
 'CARBOPLATIN',
 'CCT-018159',
 'CERITINIB',
 'DACTOLISIB',
 'ENTOSPLETINIB',
 'EPZ5676',
 'EXEMESTANE',
 'FLUOROURACIL',
 'GDC0810',
 'GSK269962A',
 'GW441756',
 'IFOSFAMIDE',
 'IPATASERTIB',
 'JQ1',
 'LCL161',
 'LETROZOLE',
 'LGK974',
 'LOMUSTINE',
 'MELPHALAN',
 'MITOMYCIN',
 'NU7441',
 'NUTLIN-3A (-)',
 'OBATOCLAX MESYLATE',
 'PANITUMUMAB',
 'PD0325901',
 'PD173074',
 'PFI3',
 'PICTILISIB',
 'PODOPHYLLOTOXIN BROMIDE',
 'PRIMA-1MET',
 'RAPAMYCIN',
 'RVX-208',
 'SAPITINIB',
 'SB216763',
 'SB505124',
 'SB590885',
 'SEPANTRONIUM BROMIDE',
 'SL0101',
 'STAUROSPORINE',
 'TW 37',
 'ULIXERTINIB',
 'VE821',
 'XAV939',
 'ZM447439'}

In [197]:
all_drugs - set(drug_smiles.index) # drugs without SMILES

{'BEVACIZUMAB',
 'BLEOMYCIN (50 UM)',
 'NUTLIN-3A (-)',
 'PANITUMUMAB',
 'PFI3',
 'PODOPHYLLOTOXIN BROMIDE',
 'PRIMA-1MET',
 'SEPANTRONIUM BROMIDE',
 'SL0101',
 'VE821',
 'XAV939'}

In [199]:
# Convert available SMILES to Morgan fingerprint
from rdkit.Chem import AllChem
from rdkit import Chem
from rdkit.Chem import DataStructs
fpgen = AllChem.GetMorganGenerator(radius=2)

new_drug_fp_df = pd.DataFrame(columns = ["drug_name"] + [i for i in range(0, 2048)])

for drug in drug_smiles.index:
    smiles = drug_smiles.loc[drug][1]
    mol = Chem.MolFromSmiles(smiles) # create a molecule
    fp = fpgen.GetFingerprint(mol) # get fingerprint
    array = np.zeros((0, ), dtype=np.int8)
    DataStructs.ConvertToNumpyArray(fp, array) # convert to numpy array
    new_drug_fp_df.loc[len(new_drug_fp_df)] = [drug] + array.tolist() # append to drug fp dataframe


In [200]:
new_drug_fp_df

Unnamed: 0,drug_name,0,1,2,3,4,5,6,7,8,...,2038,2039,2040,2041,2042,2043,2044,2045,2046,2047
0,JW-7-24-1,0,1,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
1,KIN001-260,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
2,NSC-87877,0,0,0,0,0,0,0,0,0,...,1,0,0,0,0,0,0,0,0,0
3,GNE-317,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
4,NAVITOCLAX,0,1,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
474,SB590885,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
475,STAUROSPORINE,0,0,0,0,1,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
476,TW 37,0,1,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
477,ULIXERTINIB,0,1,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0


In [202]:
all_drugs - set(new_drug_fp_df.drug_name) # drugs without fingerprints

{'BEVACIZUMAB',
 'BLEOMYCIN (50 UM)',
 'NUTLIN-3A (-)',
 'PANITUMUMAB',
 'PFI3',
 'PODOPHYLLOTOXIN BROMIDE',
 'PRIMA-1MET',
 'SEPANTRONIUM BROMIDE',
 'SL0101',
 'VE821',
 'XAV939'}

In [201]:
# new_drug_fp_df.to_csv("/data//papers_data/systematic_assessment/raw/metadata/drug_morgan_fingerprints.csv", index=False)

In [275]:
# Drug graphs for PanCDR and drug2tme
adjacency_matrices_drugs = {}
for drug in drug_smiles.index:
    smiles = drug_smiles.loc[drug][1]
    mol = Chem.MolFromSmiles(smiles) # create a molecule
    # Get adjacency matrix
    adjacency_matrix = Chem.GetAdjacencyMatrix(mol, useBO = True)
    # print(adjacency_matrix.shape)
    adjacency_matrices_drugs[drug] = adjacency_matrix


In [278]:
adjacency_matrices_drugs["CISPLATIN"]

array([[0., 0., 0., 0., 0.],
       [0., 0., 0., 0., 0.],
       [0., 0., 0., 1., 0.],
       [0., 0., 1., 0., 1.],
       [0., 0., 0., 1., 0.]])

In [279]:
len(adjacency_matrices_drugs)

479

In [281]:
# with open("/data//papers_data/systematic_assessment/raw/metadata/drug_adjacency_matrices.pkl", "wb") as f:
#     pickle.dump(adjacency_matrices_drugs, f)

In [None]:
# Feature matrices are obtained using the deepchem library - use conda env deepchem-test
# Setup instructions: https://deepchem.readthedocs.io/en/latest/get_started/installation.html#jupyter-notebook
# Steps as in https://stackoverflow.com/questions/70459042/convert-a-smiles-dataset-to-graph
# Refer to notebook deepchem_drug_feature_matrices.ipynb

### Gene Expression data

In [204]:
# From CODE-AE for cell lines and TCGA
code_ae_gene_exp = pd.read_csv("/data//drp_baselines_data/CODE_AE/data/preprocessed_dat/uq1000_feature.csv", index_col=0)
code_ae_gene_exp

Unnamed: 0_level_0,PARVA,FMNL1,SORD,KLF6,C4A,SGCE,IMPA2,PALLD,IDS,HPN,...,COL1A1,C12orf75,AMOTL1,MSN,H2BC5,TNFRSF14,ASPH,LBH,MT1G,LYN
Sample,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
TCGA-19-1787-01,-0.210190,0.548827,0.988168,-0.128672,0.339531,1.425034,-0.025024,0.300400,1.115845,-0.822493,...,-0.619659,0.261581,0.881029,1.191855,-0.078095,-0.517907,-0.109530,-0.370074,-0.092216,0.357845
TCGA-S9-A7J2-01,0.284621,-0.408475,-0.155960,-0.933030,-0.201664,0.911253,-2.083038,0.057637,1.112937,-1.023959,...,-1.751529,-0.661643,0.644328,-2.027280,-2.334596,-1.906248,0.215934,-0.003675,-0.563679,-0.650880
TCGA-EK-A2RE-01,-0.289463,-0.930455,-0.063738,0.326578,-1.850428,-0.274310,0.969852,0.649759,-0.188453,-1.182537,...,-0.039307,1.203386,1.542625,0.827211,0.337716,-1.347415,-0.497253,-1.404204,-0.285066,-0.262336
TCGA-44-6778-01,-0.139284,1.363550,-0.173401,1.266110,-0.374691,0.115956,-0.197982,0.158663,0.920949,0.055052,...,-0.056528,0.257110,0.892207,0.969565,0.608736,0.514984,0.118281,1.027903,0.503104,1.483681
TCGA-F4-6854-01,0.334663,-0.355245,0.415580,-0.323382,-0.383172,-0.933924,0.193446,-0.076133,0.059245,-1.023959,...,0.573734,0.992802,-0.756095,-0.834394,-0.806832,0.858159,0.127631,0.079100,0.729177,0.085448
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
ACH-000261,1.791601,-0.236236,0.191823,0.208398,-0.044775,0.732539,1.340447,0.489642,0.503400,-0.397005,...,-0.236762,0.812140,0.375776,0.717296,-1.210695,-0.065794,0.477410,0.193079,-0.484725,0.765387
ACH-000031,0.409905,1.296251,-0.193248,1.169929,1.041570,0.175770,-0.394545,0.736052,0.885536,-0.334622,...,0.113011,-0.962157,0.623925,0.333254,1.366612,1.196051,0.410746,-0.224718,1.206167,0.930022
ACH-000682,0.728941,0.331535,0.772996,0.776657,2.032717,0.406161,-0.559731,0.383930,-0.079404,-0.334622,...,-0.551286,-0.332627,1.129947,0.227055,1.159299,1.180163,1.202418,0.756943,-0.544210,1.513033
ACH-000561,-0.641164,-0.623416,1.024934,-1.485172,-0.506803,-0.709946,1.657551,0.781892,-0.281326,0.043424,...,-0.195023,-0.926249,0.645398,-2.287914,1.085555,-1.004246,-0.721921,-0.899763,-0.538025,-0.011651


In [209]:
m1 = pd.merge(labelled_ccle_df, pd.read_csv("/data/yanrong/druid_data/CCLE/raw_data/DepMap23Q4/OmicsProfiles.csv"), left_on="depmap_id", right_on="ProfileID")
m1

Unnamed: 0,depmap_id,gene,alteration,ProfileID,ModelCondition,ModelID,Datatype,WESKit
0,PR-sxFiuq,SAMD11,L76V,PR-sxFiuq,MC-000693-vUEr,ACH-000693,wgs,
1,PR-DNEoiz,SAMD11,P107S,PR-DNEoiz,MC-000930-v0gO,ACH-000930,wgs,
2,PR-xcsbEI,SAMD11,N285S,PR-xcsbEI,MC-000776-YERh,ACH-000776,wgs,
3,PR-ppRdDh,SAMD11,R295P,PR-ppRdDh,MC-000574-NENH,ACH-000574,wgs,
4,PR-HN3C6C,NOC2L,E728GfsTer49,PR-HN3C6C,MC-000769-HyNe,ACH-000769,wgs,
...,...,...,...,...,...,...,...,...
488849,PR-MX9ndc,KDM5D,A77V,PR-MX9ndc,MC-000999-M5nP,ACH-000999,wes,AGILENT
488850,PR-MX9ndc,KDM5D,R68H,PR-MX9ndc,MC-000999-M5nP,ACH-000999,wes,AGILENT
488851,PR-AiAKPa,EIF1AY,D83Y,PR-AiAKPa,MC-000420-MsLc,ACH-000420,wes,AGILENT
488852,PR-MX9ndc,RPS4Y2,T115A,PR-MX9ndc,MC-000999-M5nP,ACH-000999,wes,AGILENT


In [211]:
len(set(m1["ModelID"]) - set(code_ae_gene_exp.index)) # labelled cell lines without gene expression in CODE-AE

247

In [215]:
set(labelled_tcga_df.patient_id) - set(["-".join(x.split("-")[:-1]) for x in code_ae_gene_exp.index]) # TCGA patients with labels but without gene exp profile

{'TCGA-A6-2671',
 'TCGA-AA-3494',
 'TCGA-AA-3692',
 'TCGA-AA-3844',
 'TCGA-AA-3869',
 'TCGA-AA-3870',
 'TCGA-AA-3930',
 'TCGA-AA-3955',
 'TCGA-AA-3967',
 'TCGA-AA-3972',
 'TCGA-AA-3973',
 'TCGA-AA-3976',
 'TCGA-AA-3994',
 'TCGA-AA-A01R',
 'TCGA-AA-A02F',
 'TCGA-AZ-4308',
 'TCGA-BG-A0VZ',
 'TCGA-BG-A2AE',
 'TCGA-BG-A2L7',
 'TCGA-FI-A2EU',
 'TCGA-HG-A9SC',
 'TCGA-VQ-A8PT',
 'TCGA-VQ-A8PY',
 'TCGA-VQ-A8PZ',
 'TCGA-VS-A9UA'}

In [231]:
# check the cancer type of above patients and the drug they are given
excluded_drugs = {}
excluded_cancer_types = {}
for p in set(labelled_tcga_df.patient_id) - set(["-".join(x.split("-")[:-1]) for x in code_ae_gene_exp.index]):
    print(p)
    print(tcga_df[tcga_df.sample_id == p]["drug_name"].unique()) # drugs given
    print(tcga_metadata[tcga_metadata["submittedCase ID"] == p]["mappedProject"].unique()) # cancer type
    for d in tcga_df[tcga_df.sample_id == p]["drug_name"].unique():
        if d not in excluded_drugs:
            excluded_drugs[d] = [p]
        else:
            excluded_drugs[d].append(p)
            
    for c in tcga_metadata[tcga_metadata["submittedCase ID"] == p]["mappedProject"].unique():
        if c not in excluded_cancer_types:
            excluded_cancer_types[c] = [p]
        else:
            excluded_cancer_types[c].append(p)

TCGA-AZ-4308
['BEVACIZUMAB']
['TCGA-COAD']
TCGA-FI-A2EU
['CARBOPLATIN' 'PACLITAXEL']
['TCGA-UCEC']
TCGA-AA-3955
['CAPECITABINE' 'OXALIPLATIN']
['TCGA-COAD']
TCGA-BG-A2AE
['CARBOPLATIN' 'PACLITAXEL']
['TCGA-UCEC']
TCGA-AA-3972
['BEVACIZUMAB']
['TCGA-COAD']
TCGA-HG-A9SC
['CISPLATIN']
['TCGA-CESC']
TCGA-AA-3692
['IRINOTECAN' 'CETUXIMAB']
['TCGA-COAD']
TCGA-AA-3870
['OXALIPLATIN']
['TCGA-COAD']
TCGA-AA-3494
['CAPECITABINE' 'OXALIPLATIN' 'IRINOTECAN' 'CETUXIMAB']
['TCGA-COAD']
TCGA-AA-3967
['OXALIPLATIN']
['TCGA-COAD']
TCGA-AA-3973
['BEVACIZUMAB']
['TCGA-COAD']
TCGA-BG-A2L7
['CARBOPLATIN']
['TCGA-UCEC']
TCGA-AA-3994
['CAPECITABINE']
['TCGA-COAD']
TCGA-VQ-A8PY
['FLUOROURACIL']
['TCGA-STAD']
TCGA-VQ-A8PT
['FLUOROURACIL' 'CAPECITABINE']
['TCGA-STAD']
TCGA-AA-3976
['OXALIPLATIN']
['TCGA-COAD']
TCGA-AA-A01R
['FLUOROURACIL']
['TCGA-COAD']
TCGA-VS-A9UA
['CISPLATIN']
['TCGA-CESC']
TCGA-AA-3869
['CAPECITABINE' 'BEVACIZUMAB' 'CETUXIMAB']
['TCGA-COAD']
TCGA-VQ-A8PZ
['FLUOROURACIL']
['TCGA-STAD']
TCGA-

In [232]:
excluded_drugs

{'BEVACIZUMAB': ['TCGA-AZ-4308',
  'TCGA-AA-3972',
  'TCGA-AA-3973',
  'TCGA-AA-3869',
  'TCGA-AA-A02F',
  'TCGA-AA-3844'],
 'CARBOPLATIN': ['TCGA-FI-A2EU', 'TCGA-BG-A2AE', 'TCGA-BG-A2L7'],
 'PACLITAXEL': ['TCGA-FI-A2EU', 'TCGA-BG-A2AE', 'TCGA-BG-A0VZ'],
 'CAPECITABINE': ['TCGA-AA-3955',
  'TCGA-AA-3494',
  'TCGA-AA-3994',
  'TCGA-VQ-A8PT',
  'TCGA-AA-3869',
  'TCGA-AA-3930',
  'TCGA-AA-3844'],
 'OXALIPLATIN': ['TCGA-AA-3955',
  'TCGA-AA-3870',
  'TCGA-AA-3494',
  'TCGA-AA-3967',
  'TCGA-AA-3976',
  'TCGA-AA-A02F'],
 'CISPLATIN': ['TCGA-HG-A9SC', 'TCGA-VS-A9UA', 'TCGA-BG-A0VZ'],
 'IRINOTECAN': ['TCGA-AA-3692', 'TCGA-AA-3494', 'TCGA-A6-2671'],
 'CETUXIMAB': ['TCGA-AA-3692', 'TCGA-AA-3494', 'TCGA-AA-3869', 'TCGA-A6-2671'],
 'FLUOROURACIL': ['TCGA-VQ-A8PY',
  'TCGA-VQ-A8PT',
  'TCGA-AA-A01R',
  'TCGA-VQ-A8PZ',
  'TCGA-A6-2671'],
 'DOXORUBICIN': ['TCGA-BG-A0VZ']}

In [233]:
excluded_cancer_types

{'TCGA-COAD': ['TCGA-AZ-4308',
  'TCGA-AA-3955',
  'TCGA-AA-3972',
  'TCGA-AA-3692',
  'TCGA-AA-3870',
  'TCGA-AA-3494',
  'TCGA-AA-3967',
  'TCGA-AA-3973',
  'TCGA-AA-3994',
  'TCGA-AA-3976',
  'TCGA-AA-A01R',
  'TCGA-AA-3869',
  'TCGA-AA-3930',
  'TCGA-A6-2671',
  'TCGA-AA-A02F',
  'TCGA-AA-3844'],
 'TCGA-UCEC': ['TCGA-FI-A2EU', 'TCGA-BG-A2AE', 'TCGA-BG-A2L7', 'TCGA-BG-A0VZ'],
 'TCGA-CESC': ['TCGA-HG-A9SC', 'TCGA-VS-A9UA'],
 'TCGA-STAD': ['TCGA-VQ-A8PY', 'TCGA-VQ-A8PT', 'TCGA-VQ-A8PZ']}

In [224]:
org_df = pd.read_excel("/data//druid/data/raw/ding_etal_2016_tcga_responses.xlsx", sheet_name="Table S2", header=[2])
org_df

Unnamed: 0,Cancer,bcr_patient_barcode,drug_name,DrugBank ID,measure_of_response,days_to_drug_therapy_start,days_to_drug_therapy_end,days_to_initial_pathologic_diagnosis,method_of_sample_procurement,days_to_sample_procurement,days_to_new_tumor_event_after_initial_treatment,additional_pharmaceutical_therapy,new_tumor_event_additional_surgery_procedure,history_of_neoadjuvant_treatment
0,,CDE_ID:2673794,CDE_ID:2975232,,CDE_ID:2857291,CDE_ID:3392465,CDE_ID:3392470,CDE_ID:3131740,CDE_ID:3103514,CDE_ID:3288495,CDE_ID:3392464,CDE_ID:3427616,CDE_ID:3427611,CDE_ID:3382737
1,Adrenocortical carcinoma (ACC),TCGA-OR-A5JM,Sunitinib,DB01268,Clinical Progressive Disease,378,439,0,Surgical Resection,1,72,YES,NO,Yes
2,Adrenocortical carcinoma (ACC),TCGA-OR-A5JM,Ketoconazole,DB01026,Clinical Progressive Disease,378,439,0,Surgical Resection,1,72,YES,NO,Yes
3,Adrenocortical carcinoma (ACC),TCGA-OU-A5PI,Etoposide,DB00773,Stable Disease,69,239,0,Surgical Resection,0,351,YES,YES,No
4,Adrenocortical carcinoma (ACC),TCGA-OU-A5PI,Doxorubicin,DB00997,Stable Disease,69,239,0,Surgical Resection,0,351,YES,YES,No
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
2568,Uterine Carcinosarcoma (UCS),TCGA-NG-A4VU,Paclitaxel,DB01229,Clinical Progressive Disease,76,258,0,Full Hysterectomy,30,281,YES,NO,No
2569,Uterine Carcinosarcoma (UCS),TCGA-NG-A4VU,Ifosfamide,DB01181,Clinical Progressive Disease,76,258,0,Full Hysterectomy,30,281,YES,NO,No
2570,Uterine Carcinosarcoma (UCS),TCGA-NG-A4VW,Cisplatin,DB00515,Complete Response,57,197,0,Full Hysterectomy,23,937,NO,NO,No
2571,Uterine Carcinosarcoma (UCS),TCGA-NG-A4VW,Doxorubicin,DB00997,Complete Response,57,197,0,Full Hysterectomy,23,937,NO,NO,No


In [227]:
org_df[org_df.bcr_patient_barcode == "TCGA-AA-3869"]

Unnamed: 0,Cancer,bcr_patient_barcode,drug_name,DrugBank ID,measure_of_response,days_to_drug_therapy_start,days_to_drug_therapy_end,days_to_initial_pathologic_diagnosis,method_of_sample_procurement,days_to_sample_procurement,days_to_new_tumor_event_after_initial_treatment,additional_pharmaceutical_therapy,new_tumor_event_additional_surgery_procedure,history_of_neoadjuvant_treatment
791,Colon adenocarcinoma (COAD),TCGA-AA-3869,Capecitabine,DB01101,Partial Response,92,365,0,Right Hemicolectomy,0,,,,No
792,Colon adenocarcinoma (COAD),TCGA-AA-3869,Leucovorin,DB00650,Partial Response,365,396,0,Right Hemicolectomy,0,,,,No
793,Colon adenocarcinoma (COAD),TCGA-AA-3869,Fluorouracil,DB00544,Partial Response,365,761,0,Right Hemicolectomy,0,,,,No
794,Colon adenocarcinoma (COAD),TCGA-AA-3869,Irinotecan,DB00762,Partial Response,365,761,0,Right Hemicolectomy,0,,,,No
795,Colon adenocarcinoma (COAD),TCGA-AA-3869,Bevacizumab,DB00112,Partial Response,365,580,0,Right Hemicolectomy,0,,,,No
796,Colon adenocarcinoma (COAD),TCGA-AA-3869,Cetuximab,DB00002,Partial Response,608,761,0,Right Hemicolectomy,0,,,,No


In [250]:
(tcga_df.sample_id.value_counts() == 1).sum()

290

In [267]:
# ClinicalOmics DB https://trials.linkedomics.org/browse/
# Choueiri_CCR_2016.csv - drug is Nivolumab, disease is Kidney
chouerir_df = pd.read_csv("/data/yanrong/druid_data/ClinicalOmics/Choueiri_CCR_2016.csv")
chouerir_df

Unnamed: 0,Sample,clinical_response,5_8S_rRNA,A1BG,A1BG-AS1,A2M,A2M-AS1,A4GALT,AAAS,AACS,...,ZW10,ZWILCH,ZWINT,ZXDA,ZXDB,ZXDC,ZYG11B,ZYX,ZZEF1,ZZZ3
0,Choueiri_CCR_2016-p014-ar-325,1,11.298998,3.663703,4.259892,13.614612,5.271033,9.786223,9.539531,8.95176,...,8.714027,7.686009,8.558936,7.566819,8.840192,8.955534,9.727838,12.012171,10.584371,10.007776
1,Choueiri_CCR_2016-p018-ar-318,0,9.27722,14.506591,1.547488,16.574779,5.074347,7.443581,9.457223,8.251938,...,9.130868,8.12849,8.626544,7.690025,9.632219,8.423358,10.802206,10.748386,9.762066,10.33685
2,Choueiri_CCR_2016-p021-ar-301,1,8.500729,2.932087,4.559269,14.667458,5.893158,7.683111,9.632686,9.759669,...,9.32442,8.698158,9.193108,9.255502,9.674359,9.764085,10.543387,11.992715,10.723015,10.619649
3,Choueiri_CCR_2016-p027-ar-866,0,7.386269,5.098429,5.098429,15.464619,6.375733,9.790675,9.040293,8.510315,...,9.162548,8.965464,9.427022,8.33903,8.663404,8.795906,10.188956,11.268327,10.476243,10.05404
4,Choueiri_CCR_2016-p041-ar-306,0,9.383837,3.019525,6.171704,15.311525,6.508565,8.972078,9.363224,9.630211,...,8.695249,8.132478,8.101696,8.940011,9.210931,9.005718,10.513629,11.13711,10.948455,10.482904
5,Choueiri_CCR_2016-p050-ar-551,1,12.064703,5.995103,6.070965,15.333357,4.780838,8.791335,9.703525,9.916228,...,8.973427,8.278926,9.477597,7.551485,8.621591,7.971544,10.288866,11.931709,10.200282,9.549488
6,Choueiri_CCR_2016-p073-ar-324,0,7.511236,5.753158,6.212111,15.852196,6.291969,8.865857,9.767528,9.002692,...,9.098339,8.329467,9.584074,8.148503,9.229062,8.660555,10.155122,11.582661,9.703264,10.142613
7,Choueiri_CCR_2016-p090-ar-331,0,7.155086,8.17459,5.387686,12.74937,6.637461,7.004545,9.360293,9.349707,...,8.388597,8.728182,10.185444,7.727413,8.481324,8.367751,10.173485,9.317456,7.382541,10.537778
8,Choueiri_CCR_2016-p093-ar-345,1,8.662291,9.847299,6.028912,15.971965,5.67563,8.49307,9.405148,9.824677,...,9.195463,8.700757,8.988138,8.289934,9.136207,8.374769,10.107865,12.592085,10.491013,9.974565
9,Choueiri_CCR_2016-p096-ar-333,0,6.20909,5.025306,7.287994,14.308542,6.882365,10.154459,10.572453,10.032851,...,8.923244,8.767465,8.407307,8.774313,8.722199,10.284683,9.847511,11.362769,11.378561,9.87481


In [266]:
# GSE166449.csv disease-Lung, treatment pembrolizumab (no SMILES)
pd.read_csv("/data/yanrong/druid_data/ClinicalOmics/GSE166449.csv")

Unnamed: 0,Sample,anti_pd1_response,ZFYVE28,IQGAP1,AK1,TEF,DYRK3,GALNT12,MED13,EPS15,...,SNX6,MTERF,CASS4,LY75,C19orf25,IER3IP1.1,MAD1L1,FBXO22,SPG7,GDE1
0,SMC_IO_C001,1,2.411426,8.534575,5.531693,1.807355,1.014355,1.978196,5.704872,7.178117,...,6.114367,3.948601,1.713696,6.039577,2.918386,3.835924,5.587965,5.44228,6.80426,2.761285
1,SMC_IO_C002,1,2.207893,7.491693,6.1127,2.794936,3.144046,3.543496,4.430285,6.434962,...,5.843481,2.687061,1.485427,2.981853,1.565597,5.077243,4.618239,4.899176,5.917432,3.469886
2,SMC_IO_C003,1,2.641546,8.057234,6.233428,2.400538,3.801159,4.624101,5.08151,6.014355,...,5.080658,2.744161,1.887525,3.518535,2.104337,5.386811,4.761285,5.876271,6.368419,3.31904
3,SMC_IO_C004,1,2.735522,7.673556,5.0,2.344828,3.166715,3.02148,4.928844,6.267161,...,5.375039,2.782409,1.545968,3.611172,2.0,5.392317,5.653633,4.978196,6.179511,3.275007
4,SMC_IO_C005,1,1.903038,7.80271,6.945093,1.847997,3.257011,4.986411,4.478972,5.936402,...,5.423578,2.831877,1.400538,3.648465,2.090853,5.003602,5.171527,4.894333,5.644433,3.817623
5,SMC_IO_C006,1,2.344828,7.954778,5.590362,2.500802,2.944858,3.735522,4.465974,6.312883,...,5.791814,2.422233,2.130931,3.347666,1.250962,5.495695,4.285402,5.36247,6.130107,3.867896
6,SMC_IO_C007,1,1.650765,7.180506,4.861955,2.411426,1.594549,3.739848,4.808385,5.546586,...,6.331992,2.422233,1.978196,3.263034,1.941106,4.794936,4.802193,4.663345,5.791293,3.967169
7,SMC_IO_C008,0,1.970854,9.115356,7.851124,4.032101,2.87578,4.638074,4.58376,6.954894,...,5.77663,2.124328,2.778209,3.771886,2.580145,6.06652,5.321207,5.503349,5.514753,4.600508
8,SMC_IO_C009,0,2.028569,6.561632,4.209453,1.286881,1.432959,3.035624,3.944858,5.656496,...,4.590961,2.22033,1.765535,3.754888,1.739848,3.662205,4.559492,4.431623,5.845992,2.395063
9,SMC_IO_C010,0,1.356144,6.029453,2.545968,1.214125,1.695994,2.250962,2.823749,4.053111,...,3.577731,1.389567,1.137504,1.895303,1.15056,2.464668,4.014355,4.148934,4.672425,2.405992


In [235]:
# GSE5462 disease - Breast, drug - Letrozole
pd.read_csv("/data/yanrong/druid_data/ClinicalOmics/GSE5462.csv")

Unnamed: 0,Sample,clinical_response,HR,HER2,SFN,LAMP1,NR1D1,NRTN,SPATA2L,CARD10,...,SNORA70,CEMP1,LOC389906,LOC105372602,RPL26,OR2A9P,LINC00921,EGOT,LAMA1,WHAMM
0,GSM125123,1,1,0,11.253079,12.404476,8.637349,1.536053,8.529821,8.111136,...,7.851124,4.678072,2.232661,5.954196,11.015554,5.970394,6.903279,2.981853,3.643856,5.611763
1,GSM125125,0,1,0,11.905387,12.459483,8.481396,2.608809,8.198199,8.188836,...,7.652486,3.392317,2.560715,5.429616,10.360957,5.78398,6.436295,4.336283,3.963474,5.061776
2,GSM125127,1,1,0,8.60622,12.914908,8.926593,1.201634,8.664269,8.60844,...,8.066089,3.137504,3.201634,5.956521,11.007728,6.108524,6.66249,3.185867,2.867896,6.066089
3,GSM125129,1,1,0,12.208631,12.964684,8.454094,2.432959,8.696968,8.008429,...,7.509379,2.786596,2.232661,5.937815,10.646109,5.274262,6.274262,5.870365,3.277985,6.114783
4,GSM125131,1,1,0,12.319983,12.801264,8.538771,1.722466,8.482606,8.516882,...,6.427941,3.847997,3.307429,6.066089,10.218321,6.228819,5.942515,6.387156,3.432959,5.177918
5,GSM125133,1,1,0,11.210854,12.970465,8.630449,2.321928,8.490249,8.887221,...,6.829088,3.744161,4.626439,5.446256,10.461684,6.920055,6.263034,3.776104,6.087463,6.1918
6,GSM125137,1,1,0,11.426946,12.845118,8.249825,1.201634,8.071462,8.30515,...,6.624978,3.292782,2.887525,5.754888,9.76205,5.60585,5.0268,3.070389,4.754888,3.485427
7,GSM125139,1,1,0,10.804857,12.494281,8.027906,1.584963,8.398744,8.74113,...,6.872829,3.0,2.263034,6.059615,10.395641,6.355792,5.840463,4.711495,3.201634,4.472488
8,GSM125141,1,1,0,10.261507,12.446747,8.510566,0.925999,7.72724,7.912889,...,7.376777,3.263034,2.963474,5.074677,10.305834,5.666757,4.329124,5.314697,3.070389,5.412782
9,GSM125143,1,1,0,10.768019,12.353864,8.197217,1.722466,8.112179,7.867896,...,7.958843,3.765535,2.584963,5.569856,10.883712,5.635174,6.525129,5.683696,2.906891,4.916477


In [251]:
# GSE9782_Dex.csv disease - Myeloma, treatment - dexamethasone
pd.read_csv("/data/yanrong/druid_data/ClinicalOmics/GSE9782_Dex.csv")

Unnamed: 0,Sample,Dex_response,SFN,LAMP1,NR1D1,NRTN,SPATA2L,CARD10,LUZP1,KCNJ4,...,SNORA70,CEMP1,LOC389906,LOC105372602,RPL26,OR2A9P,LINC00921,EGOT,LAMA1,WHAMM
0,GSM246539,0,7.335194,9.894110,8.122051,1.513849,6.930193,5.401142,6.887452,3.365383,...,9.831632,2.666675,5.425422,4.436508,10.746338,5.861050,4.775846,3.440556,3.979129,5.836199
1,GSM246540,1,7.263729,10.472376,7.927227,2.234584,6.577430,6.019039,6.412902,5.243612,...,8.663722,2.730683,4.475202,5.794494,10.890340,4.806927,5.615243,3.848808,4.070716,5.690710
2,GSM246544,1,7.485322,11.287793,8.568051,1.441792,7.409917,4.991082,7.929282,5.545449,...,7.641698,3.896785,5.343120,5.789862,10.293816,5.817644,6.076886,6.145849,5.552088,6.310936
3,GSM246552,1,6.569499,10.624887,8.010713,1.102611,6.365301,5.182633,5.618371,3.853646,...,8.836047,2.275061,2.548915,4.929128,11.821734,5.814945,4.884852,2.610152,4.708932,5.167362
4,GSM246558,0,7.055065,10.445894,8.108770,0.835876,6.940190,6.260893,6.104377,4.453814,...,8.729706,2.553137,1.558831,5.023082,11.115622,5.556058,5.911265,3.415926,4.435762,5.065788
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
65,GSM246775,0,7.343612,11.034634,7.837502,0.883535,6.458266,5.160589,5.127320,4.011174,...,7.208703,2.508058,2.494884,3.176114,11.723776,5.413462,5.458087,4.457936,4.951238,5.467612
66,GSM246776,0,7.570607,9.326025,7.995903,1.383420,6.725850,5.960224,5.949607,5.198985,...,8.294455,3.806726,2.772217,5.075494,11.293983,6.270755,5.390148,4.341004,5.531491,6.100343
67,GSM246778,0,7.890532,11.044544,8.424796,1.644618,6.800110,5.640025,6.376610,3.481557,...,7.154059,3.516166,3.088374,3.507604,10.531869,5.680791,6.267890,2.788512,5.354632,5.439543
68,GSM246779,0,6.931612,10.736799,8.347325,1.056167,6.606257,6.073666,6.316540,2.343121,...,6.571372,2.657919,2.692972,5.866673,11.809242,5.748826,5.044241,2.406134,3.738433,4.536693


In [252]:
# GSE9782_PS341.csv disease - Myeloma treatment - bortezomib
pd.read_csv("/data/yanrong/druid_data/ClinicalOmics/GSE9782_PS341.csv")

Unnamed: 0,Sample,PS341_response,SFN,LAMP1,NR1D1,NRTN,SPATA2L,CARD10,LUZP1,KCNJ4,...,SNORA70,CEMP1,LOC389906,LOC105372602,RPL26,OR2A9P,LINC00921,EGOT,LAMA1,WHAMM
0,GSM246523,0,8.482126,11.826981,8.313577,1.866002,7.817367,6.279247,5.848250,3.952408,...,10.541058,3.654619,4.937763,3.263592,9.463663,4.710239,6.310355,3.557300,5.813579,5.027769
1,GSM246524,0,7.213668,11.529089,8.359170,1.331401,7.451945,4.675031,6.136339,2.496588,...,9.714343,3.231852,4.295929,5.029475,9.876922,4.675206,5.640132,3.671090,5.311914,5.657689
2,GSM246525,1,7.285513,11.356160,7.714115,1.224898,7.543906,4.866023,6.140991,3.726428,...,9.116523,2.392897,4.383359,2.997625,10.983891,6.077519,5.237346,3.073608,3.949395,4.732112
3,GSM246526,0,6.973887,10.969113,7.638863,1.598318,7.393082,4.596965,5.682149,3.392702,...,9.938028,2.568808,4.546573,4.882995,10.895643,4.732839,4.882227,2.770548,4.498410,5.150178
4,GSM246527,1,6.602029,10.632023,8.238634,3.224587,7.353367,7.172987,6.466867,5.969395,...,8.817953,2.581739,2.004929,3.008048,10.638110,6.150350,5.375846,2.034188,2.574312,5.782542
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
164,GSM246781,0,7.548182,11.685204,8.464133,1.606594,8.275864,4.756831,5.289975,5.124659,...,9.736818,3.638723,3.423077,4.927275,11.067811,3.573980,6.466226,3.654138,6.473558,5.842815
165,GSM246782,0,7.160436,9.667156,7.831605,2.670476,7.178515,5.958089,6.549404,3.573896,...,10.564588,3.848838,4.450749,5.436355,10.126446,3.525255,6.333434,3.775261,5.371573,6.431720
166,GSM246783,0,9.251849,10.013616,8.006960,1.316986,6.032974,6.486013,5.104152,3.051534,...,8.832963,2.803089,5.554484,5.608777,11.255583,6.244105,6.868143,4.775272,5.766765,6.207805
167,GSM246784,0,7.566823,9.998139,8.678896,2.727877,7.170466,6.945596,7.903370,4.078439,...,10.237078,4.087802,2.865430,5.888263,10.661814,3.789333,7.261324,3.686209,6.518793,6.637954


In [253]:
# Gide_Cell_2019_nivo.csv disease - Melanoma treatment - Nivolumab
pd.read_csv("/data/yanrong/druid_data/ClinicalOmics/Gide_Cell_2019_nivo.csv")

Unnamed: 0,Sample,clinical_response,5_8S_rRNA,A1BG,A2M,A2M-AS1,A4GALT,AAAS,AACS,AADAC,...,ZWILCH,ZWINT,ZXDA,ZXDB,ZXDC,ZYG11A,ZYG11B,ZYX,ZZEF1,ZZZ3
0,Gide_Cell_2019-PD03-ar-949,0,11.712145,7.047395,14.723051,4.837532,7.537183,11.952532,9.922307,6.039596,...,9.177991,8.704862,9.778644,9.718956,9.13246,2.80699,10.632024,11.198388,12.338031,10.884508
1,Gide_Cell_2019-PD06-ar-973,0,9.663582,4.299306,15.704153,8.316102,8.266273,11.164159,8.707553,0.988138,...,9.146144,8.377124,9.889168,9.485885,8.491943,3.977678,9.351253,9.920182,11.658741,10.871008
2,Gide_Cell_2019-PD07-ar-974,0,12.172984,6.196044,15.67605,5.653039,6.342409,11.105188,7.975597,1.611158,...,10.307043,9.152789,9.456067,10.036824,8.490217,2.617633,10.55676,11.150886,12.071498,10.322174
3,Gide_Cell_2019-PD08-ar-565,0,15.481904,7.076815,13.371691,4.277984,6.100136,11.883483,9.520029,3.510961,...,12.113937,9.112178,8.273409,8.758165,9.710461,2.536052,10.308566,10.731149,11.93214,11.584116
4,Gide_Cell_2019-PD08-ar-975,0,12.462604,7.217744,15.374964,5.16051,7.420281,11.870411,9.073025,3.859974,...,11.574789,8.448331,8.993492,8.755786,9.636819,3.57831,10.489835,11.428797,12.444204,11.514792
5,Gide_Cell_2019-PD23-ar-943,1,12.802586,6.366621,17.216586,4.878111,8.314799,11.994116,9.759571,1.160344,...,9.896736,8.396393,8.725324,9.635434,9.786052,1.160344,10.70638,12.0312,13.0429,10.45668
6,Gide_Cell_2019-PD25-ar-944,1,12.006468,5.946723,15.238367,7.769727,8.33903,11.509801,10.013695,5.024738,...,9.471144,8.142381,8.805272,9.227737,9.707277,4.969926,10.359868,11.358653,12.422931,10.522248
7,Gide_Cell_2019-PD25-ar-945,1,11.418708,7.182579,15.998093,6.375313,7.901724,11.813705,9.520033,2.984952,...,9.829864,8.962963,8.39935,8.758663,9.398559,2.308176,10.721736,10.656109,11.667105,10.957212
8,Gide_Cell_2019-PD36-ar-953,1,10.696707,6.198623,16.286169,6.753395,7.053426,11.693041,8.669512,1.755014,...,10.597768,8.596137,9.716054,10.003858,9.250478,1.755014,10.398924,10.215106,12.028993,11.064164
9,Gide_Cell_2019-PD49-ar-967,1,11.561936,7.197441,15.295617,5.692507,7.857449,11.018836,9.564922,2.901926,...,9.328739,8.951868,8.964656,9.464748,9.233679,3.909534,10.253037,10.395066,11.922698,10.831639


In [254]:
# Gide_Cell_2019_pembro.csv disease - Melanoma treatment - Pembrolizumab
pd.read_csv("/data/yanrong/druid_data/ClinicalOmics/Gide_Cell_2019_pembro.csv")

Unnamed: 0,Sample,clinical_response,5_8S_rRNA,A1BG,A1BG-AS1,A2M,A2M-AS1,A4GALT,AAAS,AACS,...,ZW10,ZWILCH,ZWINT,ZXDA,ZXDB,ZXDC,ZYG11B,ZYX,ZZEF1,ZZZ3
0,Gide_Cell_2019-PD02-ar-941,0,11.209932,6.00077,6.914781,16.329935,6.69438,9.255129,11.756724,9.083925,...,10.050976,8.795637,7.254655,9.338755,9.747913,9.11761,10.664265,11.3296,12.664786,10.156029
1,Gide_Cell_2019-PD04-ar-564,0,15.422135,6.102452,6.126174,16.040018,7.038663,9.264129,11.789631,9.828965,...,9.890999,10.241886,8.025189,9.820603,9.851197,10.135371,10.479353,11.451081,12.967266,11.501544
2,Gide_Cell_2019-PD05-ar-968,0,11.062152,6.848704,6.687922,14.502324,5.73515,7.842432,11.494538,8.5428,...,9.357885,9.790967,8.655812,8.741916,8.802813,8.633908,10.374066,9.193905,12.382804,10.961749
3,Gide_Cell_2019-PD09-ar-977,0,8.629191,5.047872,7.579433,16.301719,6.553366,8.993928,11.723934,9.55054,...,9.476307,8.913472,7.766954,9.445315,9.363779,8.866376,9.995642,11.287652,11.743147,10.409935
4,Gide_Cell_2019-PD10-ar-928,0,11.815815,6.701376,7.24717,15.178335,5.781038,8.259844,11.470562,9.981306,...,10.55583,10.249057,9.418774,9.353474,9.711923,9.603044,10.836046,10.660155,12.337066,10.532117
5,Gide_Cell_2019-PD10-ar-929,0,12.57741,6.819482,7.200409,14.130747,4.676571,6.819482,11.612582,10.020347,...,10.738811,10.52556,10.024131,9.450205,9.811202,9.838693,10.95861,9.722271,12.50903,11.526404
6,Gide_Cell_2019-PD11-ar-930,0,13.263444,5.613682,6.723165,14.228706,5.613682,9.733508,11.527982,11.864087,...,10.277085,8.99356,7.454658,9.609243,9.522582,9.572086,10.401673,10.637141,12.468663,10.181008
7,Gide_Cell_2019-PD12-ar-931,1,10.254509,6.656693,9.29216,14.435264,4.991784,9.906221,12.301428,10.027227,...,10.207895,9.14827,8.018122,9.651137,9.574112,9.471312,10.27872,11.619086,13.068041,10.744999
8,Gide_Cell_2019-PD13-ar-932,0,11.875531,7.081292,6.565085,14.177056,5.858639,5.908408,11.649164,9.831918,...,9.276461,10.150304,9.948019,8.58399,8.757965,8.859035,10.218476,9.800964,11.266389,10.522042
9,Gide_Cell_2019-PD13-ar-933,0,11.60515,7.137768,6.621204,13.622978,6.74104,6.773527,11.640348,10.189664,...,9.557924,9.400427,9.517737,8.890444,9.329019,9.13886,10.302497,9.449003,12.073598,10.883362


In [255]:
# HugoLo_IPRES_2016.csv disease - Melanoma, treatment - pembrolizumab
pd.read_csv("/data/yanrong/druid_data/ClinicalOmics/HugoLo_IPRES_2016.csv")

Unnamed: 0,Sample,clinical_response,A1BG,A1BG-AS1,A2M,A2M-AS1,A4GALT,AAAS,AACS,AADAT,...,ZW10,ZWILCH,ZWINT,ZXDA,ZXDB,ZXDC,ZYG11B,ZYX,ZZEF1,ZZZ3
0,HugoLo_IPRES_2016-Pt01-ar-279,0,9.0229,9.063559,15.080522,5.118215,9.217903,10.50866,8.693845,7.825095,...,9.250329,8.938941,9.562774,7.930369,9.041321,9.846005,10.792908,12.41214,10.730835,11.398149
1,HugoLo_IPRES_2016-Pt02-ar-280,1,13.186978,8.590159,16.061754,7.58,5.174207,10.275232,8.268774,9.119124,...,8.546483,8.438316,8.901668,7.785054,9.405329,9.266431,10.256331,11.284014,10.286236,10.541752
2,HugoLo_IPRES_2016-Pt04-ar-281,1,8.711213,7.744241,13.725075,4.568445,5.80838,10.583853,8.225971,7.364512,...,8.336646,10.202354,10.554892,7.421507,9.412278,9.943133,9.816232,12.23798,11.149466,10.12804
3,HugoLo_IPRES_2016-Pt05-ar-282,1,8.782163,8.460685,12.674962,2.022886,5.757787,10.451952,10.009115,7.891395,...,9.216497,9.818497,9.994745,7.207706,8.988184,9.567613,10.101198,11.980071,10.935806,10.540684
4,HugoLo_IPRES_2016-Pt06-ar-283,1,6.9324,6.741852,14.508349,5.292216,6.752347,10.317898,9.937463,7.184937,...,9.517026,10.414392,10.073688,8.404263,9.946343,9.458495,10.37486,12.710131,11.694521,10.680719
5,HugoLo_IPRES_2016-Pt07-ar-284,0,9.561109,9.496104,12.730434,6.172371,6.214263,9.919806,9.541167,6.498866,...,9.20472,10.279732,10.622938,7.271381,8.807789,9.855221,10.487711,10.352166,10.4276,10.60591
6,HugoLo_IPRES_2016-Pt08-ar-285,1,8.758855,8.38872,14.293621,8.020106,8.476727,10.479809,9.711618,6.911965,...,8.960996,9.761017,9.412055,7.230406,8.447819,9.519962,10.43009,13.25295,11.159027,10.955532
7,HugoLo_IPRES_2016-Pt09-ar-286,1,8.699282,8.207695,15.329277,7.912379,8.120562,10.354946,9.319947,7.505085,...,9.57382,9.844953,9.972969,7.722862,8.900718,9.393436,10.493058,12.173574,11.111407,10.303304
8,HugoLo_IPRES_2016-Pt10-ar-287,0,8.265238,8.291308,16.645193,6.909387,11.279796,10.589661,10.294664,9.490813,...,9.912259,9.495142,10.599331,7.14408,8.118645,10.053384,10.410256,12.738281,10.99038,10.412085
9,HugoLo_IPRES_2016-Pt12-ar-288,0,8.411889,9.264328,16.438588,7.065058,7.05135,10.16534,11.289367,9.022569,...,9.385675,11.264163,11.3901,4.994662,8.309084,8.653366,9.718423,14.629969,10.660709,11.060951


In [256]:
# IMVigor210.csv disease - bladder, treatment - atezolizumab
pd.read_csv("/data/yanrong/druid_data/ClinicalOmics/IMVigor210.csv")

Unnamed: 0,Sample,clinical_response,5_8S_rRNA,A1BG-AS1,A2M,A2M-AS1,A2ML1,A4GALT,AAAS,AACS,...,ZW10,ZWILCH,ZWINT,ZXDA,ZXDB,ZXDC,ZYG11B,ZYX,ZZEF1,ZZZ3
0,IMVigor210-0257bb-ar-0257bbb,1,9.882665,3.000098,15.481894,5.596586,11.014615,8.371134,11.425248,8.528791,...,10.386555,9.670087,9.676748,8.329484,9.089099,10.449023,11.512928,10.541059,12.495160,11.363783
1,IMVigor210-025b45-ar-025b45c,0,11.851834,5.836438,14.942211,6.141405,9.843205,9.520291,11.437804,10.143159,...,9.769722,10.148373,8.045257,8.566312,9.316399,9.550982,11.068240,12.123983,12.339048,11.280849
2,IMVigor210-032c64-ar-032c642,0,12.734691,6.094369,14.024018,5.383708,6.800107,11.621398,12.159945,10.167018,...,9.899037,10.527744,9.797378,8.798547,9.308219,10.007310,10.445814,12.282157,12.571222,10.713758
3,IMVigor210-0571f1-ar-0571f17,1,12.476333,6.717725,14.626249,7.826686,11.074786,9.062797,11.565987,9.028646,...,9.863839,10.165690,9.024033,9.491546,9.651510,9.756645,10.417823,11.152959,13.040836,11.038635
4,IMVigor210-065890-ar-0658907,1,10.044679,3.018964,13.807035,4.401442,11.398448,7.417421,11.453099,10.071934,...,10.150702,10.491788,8.850801,9.076916,9.914161,10.215365,11.071264,10.025149,12.196001,11.450354
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
293,IMVigor210-f82bbd-ar-f82bbdc,0,10.205752,3.352843,13.071358,3.607813,4.012707,10.000059,11.500993,9.721642,...,9.771396,9.690947,7.968704,9.372300,10.349137,10.161352,11.063333,10.546956,11.657657,11.323367
294,IMVigor210-fb7aec-ar-fb7aec7,1,9.523204,4.687685,13.857916,5.938106,14.104508,11.314302,10.620190,10.179063,...,10.055289,10.227172,8.822743,8.864575,9.701462,9.541506,10.240719,11.619485,12.371155,11.478764
295,IMVigor210-fd9476-ar-fd94761,0,11.619708,4.875871,16.214545,5.968406,2.296305,10.920658,11.421696,9.600933,...,9.294379,8.898700,8.142937,8.107489,8.968005,9.463801,10.502224,11.975985,12.074469,10.470753
296,IMVigor210-fed609-ar-fed6099,0,10.309740,4.265073,15.734235,4.645491,11.303277,10.585949,11.056815,9.088787,...,10.296738,9.974383,8.686288,8.354192,9.465734,9.192534,10.881635,11.348899,11.831941,11.228534


In [257]:
# IMmotion150_atezo.csv disease - Kidney, treatment - atezolizumab
pd.read_csv("/data/yanrong/druid_data/ClinicalOmics/IMmotion150_atezo.csv")

Unnamed: 0,Sample,clinical_response,A2M,A2M-AS1,A4GALT,AAAS,AACS,AADAT,AAGAB,AAK1,...,ZW10,ZWILCH,ZWINT,ZXDA,ZXDB,ZXDC,ZYG11B,ZYX,ZZEF1,ZZZ3
0,IMmotion150-p01155-ar-28f34,0,15.456656,4.419819,9.184215,11.304662,9.908063,7.841525,10.410467,12.213613,...,9.834328,8.365535,7.144409,9.245320,9.227191,9.365068,10.651248,11.070982,11.954780,10.369119
1,IMmotion150-p03058-ar-c2dad,1,17.202310,7.751687,9.716414,11.295694,9.358531,8.120121,10.188187,11.609073,...,9.498075,9.129478,6.349212,8.991485,9.031702,8.958174,10.770014,11.589429,12.221761,10.758516
2,IMmotion150-p03363-ar-a74ac,0,16.711731,5.813817,10.742905,11.820036,9.072247,7.550470,9.297958,12.026618,...,9.075944,7.711244,6.266165,9.503326,9.012856,9.404706,10.253390,11.638193,11.923691,9.515035
3,IMmotion150-p05370-ar-27be5,1,15.850463,6.417590,9.679215,11.389934,9.706902,8.948913,9.999266,11.401532,...,10.245683,9.606243,8.225076,9.502400,9.277346,9.425650,10.949885,11.214728,12.395863,10.797847
4,IMmotion150-p06184-ar-828f7,0,15.220157,5.745011,10.472546,11.795077,8.671359,7.985222,9.803071,11.804758,...,9.477197,8.500818,7.024424,9.654323,9.046334,9.626574,10.350966,11.193469,12.427045,10.370651
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
76,IMmotion150-p92869-ar-e2339,0,15.148642,6.143362,8.644950,11.908470,10.281009,8.109894,9.839176,12.106583,...,9.264298,8.760729,8.023659,8.916812,8.985391,9.141504,11.304082,11.600624,12.410137,11.131990
77,IMmotion150-p93090-ar-65a4c,1,15.886985,6.295413,10.263704,11.229983,9.100333,8.510964,9.703356,11.802065,...,10.145913,8.817112,7.189236,10.386933,10.127088,9.806199,10.922066,10.667561,12.459229,10.636372
78,IMmotion150-p96212-ar-544d1,1,15.542653,6.185279,10.384918,11.731540,8.447439,8.570559,9.462598,11.184600,...,9.513066,8.510315,7.114353,9.448020,9.243598,9.559546,10.496634,10.865604,12.253428,10.346996
79,IMmotion150-p98637-ar-bcb5a,1,15.453750,6.158589,9.982868,11.424178,8.939812,7.558420,10.030175,11.595838,...,9.678516,9.170918,8.222162,9.567689,9.246092,9.593998,9.996943,11.687212,12.274197,9.840265


In [258]:
# IMmotion150_sunitinib.csv disease - kidney, treatment - sunitinib
pd.read_csv("/data/yanrong/druid_data/ClinicalOmics/IMmotion150_sunitinib.csv")

Unnamed: 0,Sample,clinical_response,5_8S_rRNA,A1BG-AS1,A2M,A2M-AS1,A4GALT,AAAS,AACS,AADAT,...,ZW10,ZWILCH,ZWINT,ZXDA,ZXDB,ZXDC,ZYG11B,ZYX,ZZEF1,ZZZ3
0,IMmotion150-p01316-ar-e5482,1,11.363103,4.475456,14.297954,6.534373,8.703795,11.439561,8.958494,7.569666,...,9.580910,8.987385,6.368564,9.367067,9.077790,8.942186,10.291607,11.211629,11.443041,10.179224
1,IMmotion150-p01492-ar-b5c6d,0,13.324090,4.407867,16.188498,6.004751,10.148486,10.814786,9.179947,9.065460,...,10.197461,9.871173,5.916917,9.397243,9.093846,9.320648,11.192956,11.730456,12.466822,10.816810
2,IMmotion150-p03170-ar-b28cd,1,11.275702,5.437510,16.447153,6.811590,9.836635,11.807324,10.155027,8.072165,...,10.312745,9.033241,8.346871,8.875298,8.992293,9.524347,10.455714,12.075279,12.382440,10.865854
3,IMmotion150-p05096-ar-a365c,1,11.982754,6.038552,15.280151,5.827910,10.935599,12.016370,9.981631,8.297449,...,9.802098,8.372238,7.600519,9.200860,9.075831,9.987987,10.343176,11.804491,12.539674,10.439080
4,IMmotion150-p05746-ar-0ce2a,0,12.433265,4.988366,14.562128,5.332088,8.605691,11.419795,8.764269,8.514303,...,10.243093,10.012062,8.732757,9.228128,9.353610,9.396454,10.557852,12.381247,12.479224,10.600532
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
77,IMmotion150-p91915-ar-d7992,1,11.715020,4.266858,15.591546,6.053095,9.942911,11.742989,9.711259,8.650189,...,9.660647,8.965378,8.024118,9.667379,9.626385,9.670106,10.623356,11.591602,12.246157,10.593846
78,IMmotion150-p93939-ar-d03c1,0,11.856113,4.258121,15.891972,6.076686,9.331702,11.750595,10.151579,8.312546,...,9.705973,8.142204,7.564199,9.386953,9.130502,9.711927,10.271332,11.444860,12.493167,9.358522
79,IMmotion150-p94113-ar-dd107,0,13.188261,5.631824,15.563242,7.960222,10.196512,11.615884,9.715972,6.995214,...,9.417311,9.148212,7.396526,9.439262,9.474957,9.803201,10.183881,11.312899,12.407338,10.522903
80,IMmotion150-p97176-ar-62eac,1,10.638826,5.061967,16.786064,6.588999,9.761786,11.502432,9.313170,7.911647,...,9.581939,8.982596,7.459304,10.166782,9.921559,9.523429,10.837042,11.654030,12.551299,10.394924


In [259]:
# Kim_NatMed_2018.csv disease - Gastric, treatment - pembrolizumab
pd.read_csv("/data/yanrong/druid_data/ClinicalOmics/Kim_NatMed_2018.csv")

Unnamed: 0,Sample,clinical_response,A1BG,A1BG-AS1,A1CF,A2M,A2M-AS1,A4GALT,AAAS,AACS,...,ZW10,ZWILCH,ZWINT,ZXDA,ZXDB,ZXDC,ZYG11B,ZYX,ZZEF1,ZZZ3
0,Kim_NatMed_2018-p602-ar-987,0,6.747422,6.716226,4.548989,12.675604,4.309142,8.561291,10.087244,10.661819,...,10.083524,8.855444,10.857469,7.477968,9.303432,9.143197,10.288058,10.978571,10.127537,10.467163
1,Kim_NatMed_2018-p603-ar-988,1,4.58302,4.861063,8.263042,14.458063,4.855858,7.309456,9.453802,10.686752,...,9.801667,9.308304,10.388871,7.933881,9.2439,9.120379,10.639626,10.939236,11.975895,10.807112
2,Kim_NatMed_2018-p604-ar-989,0,5.072909,5.371794,10.129806,14.850528,4.298386,7.785308,9.533112,10.245407,...,9.470367,9.546162,10.448574,7.937116,9.301088,9.207284,10.510028,11.252825,12.065053,10.47621
3,Kim_NatMed_2018-p605-ar-990,1,5.825145,5.456636,5.951332,13.60559,4.354426,7.948913,9.644138,11.11869,...,9.674182,10.319696,11.231788,6.80907,8.904471,9.767181,10.80864,10.79082,11.398188,10.971979
4,Kim_NatMed_2018-p618-ar-991,0,8.129293,8.524828,11.094652,11.271648,7.140103,7.734586,11.143474,12.63561,...,9.272658,11.638891,10.276862,7.930421,8.816454,10.596053,10.027923,10.355084,10.542081,11.239172
5,Kim_NatMed_2018-p619-ar-992,1,10.122392,5.173193,9.690702,12.987005,4.347208,9.380586,9.801785,10.913468,...,9.89242,9.527797,10.997549,7.957338,9.159799,9.441897,9.656622,11.577406,11.411437,10.615744
6,Kim_NatMed_2018-p620-ar-993,1,4.351437,3.858176,2.635783,12.012406,1.205636,5.583041,9.488,10.998117,...,9.903845,9.750638,11.392367,4.05671,8.739242,9.384861,9.733126,12.247185,10.571004,10.072291
7,Kim_NatMed_2018-p621-ar-994,1,5.603227,6.279775,8.031988,13.292179,2.637211,7.832966,10.071985,10.980681,...,10.102944,10.7425,7.988444,6.582094,8.031366,8.569042,9.126191,11.037878,9.97304,10.355226
8,Kim_NatMed_2018-p622-ar-995,1,7.413714,6.794053,5.915853,14.915345,4.878855,10.094095,9.221756,10.221948,...,9.285041,9.104097,10.689847,7.714227,9.165764,9.13479,10.104813,13.709664,11.217823,9.787018
9,Kim_NatMed_2018-p623-ar-000,0,5.862391,5.711299,9.329427,13.763308,5.317708,8.520343,9.960193,9.802073,...,9.390848,10.139617,11.317465,7.927041,10.025248,9.897171,9.558499,11.210434,10.683476,10.444956


In [260]:
# Liu_NatMed_2019_nivo.csv diseae - Melanoma, treatment - Nivolumab
pd.read_csv("/data/yanrong/druid_data/ClinicalOmics/Liu_NatMed_2019_nivo.csv")

Unnamed: 0,Sample,clinical_response,5_8S_rRNA,A2M,AAAS,AACS,AAGAB,AAK1,AAMP,AARS1,...,ZSWIM9,ZW10,ZWILCH,ZWINT,ZXDA,ZXDC,ZYG11B,ZYX,ZZEF1,ZZZ3
0,Liu_NatMed_2019-p009-ar-00574,0,11.501109,13.172299,9.867618,8.479543,9.669212,10.99106,10.793166,12.325687,...,7.183083,8.752155,9.521767,8.759101,9.00853,9.837298,10.55911,12.403685,12.666428,11.425582
1,Liu_NatMed_2019-p017-ar-99992,1,12.254727,16.796129,10.472342,9.573562,9.37146,11.080601,10.556168,12.725772,...,7.325193,9.098413,9.132946,7.719918,8.582706,9.807494,9.824498,13.569584,12.204406,9.957624
2,Liu_NatMed_2019-p018-ar-00020,0,11.794743,14.996625,10.437828,9.844415,9.979566,10.921805,11.148013,13.522685,...,7.306989,9.174817,9.977107,8.782608,8.522807,9.873799,10.455602,11.379812,12.563132,11.62927
3,Liu_NatMed_2019-p020-ar-00560,0,12.667855,13.88359,10.273988,9.753924,9.995071,11.825489,10.315263,12.791007,...,7.304548,9.408636,9.573228,8.266694,8.165685,9.738399,10.196582,13.141452,12.008101,10.869291
4,Liu_NatMed_2019-p022-ar-00577,0,11.610235,13.315599,10.46251,8.507728,10.181799,11.211354,10.172883,11.910123,...,7.11931,9.194416,9.55756,7.616885,8.567177,8.487317,10.804376,11.715504,11.895483,11.525924
5,Liu_NatMed_2019-p023-ar-00568,0,11.947728,12.651009,10.49589,10.606706,9.919895,10.98655,10.900726,12.078295,...,7.361831,9.658284,9.63103,8.349101,9.10845,10.415316,10.644673,11.566339,11.723367,11.542313
6,Liu_NatMed_2019-p024-ar-99999,0,11.566479,16.648999,10.691938,10.029775,8.549422,11.081927,11.213503,13.419518,...,7.191417,8.913436,9.062322,7.593983,8.384459,10.071825,8.682806,11.415129,11.939734,9.66919
7,Liu_NatMed_2019-p035-ar-99977,1,11.693297,17.151513,11.008957,9.710044,8.871865,11.114502,10.742118,12.757687,...,7.727215,8.489943,8.244537,6.709989,9.411571,9.275261,9.155881,11.509223,12.345158,10.19627
8,Liu_NatMed_2019-p036-ar-00003,0,9.42826,16.458711,9.489539,8.692772,9.774372,10.935094,10.21457,12.244618,...,5.223564,8.75481,9.234452,6.305871,8.08771,9.007882,11.150844,11.749347,12.236185,12.236387
9,Liu_NatMed_2019-p037-ar-00557,0,11.993984,15.279922,10.537183,8.525932,10.299174,11.681761,9.871882,12.49996,...,6.399412,9.652976,10.254053,8.059405,8.998199,9.326707,10.636341,13.703626,12.246271,11.379612


In [261]:
# Liu_NatMed_2019_pembro.csv disease - melanoma, treatment - pembrolizumab
pd.read_csv("/data/yanrong/druid_data/ClinicalOmics/Liu_NatMed_2019_pembro.csv")

Unnamed: 0,Sample,clinical_response,5_8S_rRNA,A1BG-AS1,A2M,AAAS,AACS,AADAT,AAGAB,AAK1,...,ZUP1,ZW10,ZWILCH,ZWINT,ZXDB,ZXDC,ZYG11B,ZYX,ZZEF1,ZZZ3
0,Liu_NatMed_2019-p001-ar-00001,0,11.952285,2.626876,14.767479,10.683572,8.449459,7.206312,9.207413,10.306911,...,9.215811,10.161244,8.957441,6.719735,9.048510,8.427938,10.013722,10.824774,12.396102,11.547488
1,Liu_NatMed_2019-p004-ar-00012,1,9.161324,6.009035,15.848743,11.144300,8.261053,10.106071,9.897313,11.957437,...,7.184501,9.738312,9.663185,7.848839,8.082772,9.436324,10.951381,12.793219,12.303201,11.224879
2,Liu_NatMed_2019-p006-ar-00567,1,12.375744,7.281138,14.353252,9.917183,9.226887,8.749840,9.114062,11.221194,...,9.068072,10.150847,10.145115,7.648276,9.378464,9.257565,10.410397,11.406790,12.625649,10.055423
3,Liu_NatMed_2019-p007-ar-00597,1,9.968148,7.656823,14.251482,10.799394,8.935600,8.551737,9.571192,10.560179,...,8.457925,9.924928,9.828593,8.799307,8.806292,9.679066,10.367926,11.177139,12.397181,11.742523
4,Liu_NatMed_2019-p008-ar-00594,1,11.438528,7.218767,14.751810,10.267698,8.981206,8.469636,10.434267,11.125566,...,7.979903,8.945393,10.362599,8.329571,9.208676,9.566604,11.372401,11.428414,11.851311,12.480794
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
64,Liu_NatMed_2019-p201-ar-00592,0,12.454507,6.334678,15.306443,10.336811,9.199566,9.291410,9.169622,10.045156,...,6.245633,8.265677,8.961694,7.848426,7.158604,9.003032,10.005021,11.442741,11.651042,9.737517
65,Liu_NatMed_2019-p203-ar-99990,0,10.948840,7.299335,15.711395,9.823397,9.827788,8.336442,9.336260,10.499221,...,7.675732,8.835203,10.086655,8.454726,9.019246,9.595151,10.422904,12.498014,12.388077,10.919735
66,Liu_NatMed_2019-p204-ar-00589,1,10.610940,7.712581,14.468628,10.528024,9.993408,8.383679,9.441910,10.439815,...,8.054008,8.884132,9.675430,8.439758,9.307063,9.108308,10.811644,11.885734,11.921017,10.953971
67,Liu_NatMed_2019-p205-ar-00013,0,12.217805,7.973598,16.437120,10.614270,10.546888,7.642814,9.626986,10.799375,...,6.758838,8.890366,9.782717,7.826785,8.168003,9.096698,10.315430,12.186273,11.604562,10.061017


In [262]:
# Prins_GBM_2019.csv disease - glioblastoma, treatment - pembrolizumab
pd.read_csv('/data/yanrong/druid_data/ClinicalOmics/Prins_GBM_2019.csv')

Unnamed: 0,Sample,clinical_response,5_8S_rRNA,A1BG,A1BG-AS1,A2M,A2M-AS1,A4GALT,AAAS,AACS,...,ZUP1,ZW10,ZWINT,ZXDA,ZXDB,ZXDC,ZYG11B,ZYX,ZZEF1,ZZZ3
0,Prins_GBM_2019-DF02-ar-DF02,0,12.526928,5.363775,6.551259,13.327169,4.900428,6.582603,10.348826,10.520656,...,7.543547,8.60988,7.95665,8.309743,9.343492,10.151118,11.405319,12.199698,11.808383,10.534761
1,Prins_GBM_2019-DF03-ar-DF03,1,11.922276,5.774635,7.003329,15.294117,5.497081,7.603919,11.089645,10.264186,...,7.554933,8.961413,6.943672,8.126313,8.925829,9.685131,10.938384,12.907178,11.714809,10.251942
2,Prins_GBM_2019-DF04-ar-DF04,0,11.382084,7.932292,8.492926,15.452009,3.171392,5.891235,10.863955,10.034876,...,7.763186,8.830463,8.975238,8.936992,9.585287,10.747803,10.936498,13.360595,11.81354,10.536229
3,Prins_GBM_2019-DF05-ar-DF05,1,12.289308,7.340714,7.638886,14.832812,5.360518,9.19844,10.342595,8.881004,...,7.195442,8.429967,6.584043,8.388172,8.418146,10.136318,10.744993,12.27332,11.885582,10.015074
4,Prins_GBM_2019-DF06-ar-DF06,0,14.153604,7.83351,8.252683,13.942851,5.473433,8.463347,10.705116,8.768832,...,5.558984,8.517781,7.036583,7.016843,6.426656,9.931247,9.470531,12.532489,11.724877,9.63552
5,Prins_GBM_2019-DF10-ar-DF10,0,9.640536,7.280825,8.046703,14.158934,3.671856,6.28875,10.854612,9.959955,...,8.350954,9.119899,10.043751,8.341407,9.388232,10.49161,11.428378,13.031323,11.93279,11.265855
6,Prins_GBM_2019-DF12-ar-DF12,0,12.526284,7.175745,7.961866,15.100836,6.08988,9.183726,9.792636,10.556535,...,7.912486,8.430283,7.932451,7.595726,8.6102,10.028767,11.230023,12.019592,11.776314,10.271348
7,Prins_GBM_2019-DF16-ar-DF16,0,12.0925,7.567418,8.079708,14.212354,6.595255,8.377363,10.796548,9.913805,...,8.101109,8.937281,6.575004,7.573127,8.497588,10.081096,10.770072,12.568698,12.060659,10.395921
8,Prins_GBM_2019-DF19-ar-DF19,1,11.449409,7.119433,8.23785,15.382403,6.998239,9.77175,11.243359,9.08294,...,8.293172,9.09077,8.306675,8.25386,8.676443,10.530288,10.40406,12.510354,12.359797,10.551709
9,Prins_GBM_2019-DF22-ar-DF22,0,11.398446,7.494983,9.262011,14.938834,4.913496,7.160325,11.157766,9.207107,...,8.244618,9.057349,8.915836,8.549958,9.401223,10.464507,10.984467,12.769906,11.831474,10.887942


In [263]:
# Riaz_Nivolumab_2017.csv disease - melanoma, treatment - nivolumab
pd.read_csv("/data/yanrong/druid_data/ClinicalOmics/Riaz_Nivolumab_2017.csv")

Unnamed: 0,Sample,clinical_response,A1BG,A1BG-AS1,A2M,A4GALT,AAAS,AACS,AAGAB,AAK1,...,ZW10,ZWILCH,ZWINT,ZXDA,ZXDB,ZXDC,ZYG11B,ZYX,ZZEF1,ZZZ3
0,Riaz_Nivolumab_2017-p001-ar-8813,0,7.183835,8.143511,13.890617,4.914045,9.213588,9.165601,11.263999,9.789844,...,8.569623,10.873645,10.500583,7.159626,8.429797,9.038053,10.358334,10.481111,10.036680,10.960623
1,Riaz_Nivolumab_2017-p001-ar-8814,0,7.672291,8.987114,14.032619,4.530249,10.082463,10.203790,11.828364,10.495160,...,8.836697,11.801268,11.347394,8.053877,9.096751,9.775717,11.088470,10.265550,10.305811,12.098093
2,Riaz_Nivolumab_2017-p002-ar-8815,1,4.717391,6.715913,13.972503,7.157052,10.109045,9.765077,11.051531,10.430333,...,9.208830,10.057115,10.421993,7.657921,8.965692,8.848215,10.473527,11.966144,11.109336,11.032733
3,Riaz_Nivolumab_2017-p002-ar-8816,1,4.977612,7.301891,13.371261,5.834110,9.840664,9.583513,11.096530,10.033668,...,9.311220,10.188171,10.597183,7.589011,8.872088,8.558496,10.629848,11.550715,10.891977,11.298992
4,Riaz_Nivolumab_2017-p003-ar-8877,1,6.146117,7.403843,14.062976,7.728031,10.138854,9.897356,11.054913,10.164437,...,9.192721,9.996009,10.153668,7.470480,8.847994,8.756032,10.330097,11.713995,9.878455,10.490398
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
93,Riaz_Nivolumab_2017-p103-ar-8871,0,8.729955,8.645097,13.888767,7.542091,10.677913,9.189800,10.806293,9.581017,...,8.721101,9.541315,10.073813,8.142305,9.902886,9.784603,11.034938,11.796435,10.971001,11.021775
94,Riaz_Nivolumab_2017-p103-ar-8872,0,10.003245,8.640594,14.652035,9.583323,10.536797,9.201669,10.323435,9.104484,...,8.836531,8.640594,9.608450,7.174246,7.895064,8.724312,9.569612,12.897691,9.724375,9.448541
95,Riaz_Nivolumab_2017-p106-ar-8926,0,6.294452,8.149254,15.040049,7.582340,10.398433,8.672898,11.345670,10.474613,...,9.331581,9.991833,10.270666,7.419989,8.590521,9.456811,10.723168,11.799671,10.980187,10.735190
96,Riaz_Nivolumab_2017-p106-ar-8927,0,6.957294,8.257784,14.697107,7.426536,10.409406,8.814273,11.597873,10.087538,...,9.147856,9.941460,10.154407,7.248929,8.412144,9.143931,10.266030,11.895552,10.646284,10.715342


In [264]:
# VanAllen_antiCTLA4_2015.csv disease - melanoma, treatment - ipilimumab
pd.read_csv("/data/yanrong/druid_data/ClinicalOmics/VanAllen_antiCTLA4_2015.csv")

Unnamed: 0,Sample,clinical_response,5_8S_rRNA,A1BG,A1BG-AS1,A2M,A2M-AS1,A4GALT,AAAS,AACS,...,ZW10,ZWILCH,ZWINT,ZXDA,ZXDB,ZXDC,ZYG11B,ZYX,ZZEF1,ZZZ3
0,VanAllen_antiCTLA4_2015-p002-ar-689710,1,12.507753,8.97741,6.867541,16.375446,5.424824,10.543098,12.53588,8.968659,...,8.561422,7.9299,7.876087,8.077624,8.563296,10.721524,9.489559,14.102196,13.624515,9.269291
1,VanAllen_antiCTLA4_2015-p004-ar-689711,1,12.259755,6.409898,6.410972,15.881883,6.532207,10.118822,11.846739,11.035749,...,10.336565,9.202897,8.095488,9.209944,9.53045,10.9978,10.833888,11.630365,13.634486,11.174792
2,VanAllen_antiCTLA4_2015-p004-ar-780275,1,9.935153,7.920352,6.076814,15.539626,5.320773,6.685038,11.975089,10.128616,...,8.592721,9.997638,10.310563,9.009668,9.538509,10.950181,10.035593,10.498347,12.303976,11.681314
3,VanAllen_antiCTLA4_2015-p006-ar-774280,0,11.290331,7.647879,7.15429,16.715435,7.442555,8.893205,12.297179,9.731211,...,9.670353,9.114487,7.548215,9.031289,9.095027,11.214642,10.2249,11.975703,13.454351,11.125399
4,VanAllen_antiCTLA4_2015-p008-ar-778361,0,10.196577,8.798375,7.195969,16.676806,6.726284,7.455351,12.103498,9.962006,...,9.604697,9.32175,8.586329,8.837378,9.064846,11.421706,10.366507,10.875703,13.461067,11.097292
5,VanAllen_antiCTLA4_2015-p014-ar-771207,0,10.865692,7.453649,5.802167,16.102131,5.528546,9.051459,11.658038,9.377828,...,9.932997,9.905628,9.089788,9.048059,8.301562,9.990569,10.476209,12.76491,12.745747,10.738213
6,VanAllen_antiCTLA4_2015-p015-ar-669446,0,12.838133,7.953308,5.939632,16.611012,5.726188,5.942699,12.124834,9.193402,...,8.909706,9.483022,8.534971,7.58709,8.586996,11.425928,9.831891,11.029215,12.989959,10.11018
7,VanAllen_antiCTLA4_2015-p016-ar-780299,0,11.614345,7.521352,5.256433,13.501273,5.788357,4.802128,11.19595,8.964103,...,9.368095,9.124023,8.523909,7.959617,8.914376,9.927926,10.730287,9.435575,12.681467,11.871232
8,VanAllen_antiCTLA4_2015-p019-ar-774182,0,11.339979,7.910711,6.900281,16.335861,6.459149,9.064697,11.633245,9.412046,...,9.361374,9.69453,9.068914,8.440388,8.744534,10.857384,10.332486,12.273459,13.083427,10.424211
9,VanAllen_antiCTLA4_2015-p020-ar-672095,0,11.779569,7.418587,6.053213,16.885335,6.841696,5.918862,11.742363,10.150602,...,10.995844,10.192177,7.984294,8.075761,8.854906,10.170169,10.854842,11.025687,12.206821,11.532874


In [265]:
# Zhao_NatMed_2019_nivo.csv disease - glioblastoma, treatment - nivolumab
pd.read_csv("/data/yanrong/druid_data/ClinicalOmics/Zhao_NatMed_2019_nivo.csv")

Unnamed: 0,Sample,clinical_response,A1BG,A1BG-AS1,A2M,A2M-AS1,A4GALT,AAAS,AACS,AADAT,...,ZW10,ZWILCH,ZWINT,ZXDA,ZXDB,ZXDC,ZYG11B,ZYX,ZZEF1,ZZZ3
0,Zhao_NatMed_2019-Pt003-ar-228,1,6.910629,7.470526,15.435252,6.020548,9.994411,10.309821,9.537061,7.537195,...,8.836058,8.188701,7.797033,7.84127,9.241982,9.796924,10.800864,13.127592,11.784358,9.969309
1,Zhao_NatMed_2019-Pt003-ar-231,1,6.125742,6.243681,14.875407,5.846974,8.093757,10.508412,9.795348,8.585468,...,8.758065,8.948609,8.687345,8.348859,9.349396,10.042007,10.57288,14.393108,11.533338,10.299066
2,Zhao_NatMed_2019-Pt013-ar-222,0,7.475621,6.964122,14.637464,5.302644,7.702486,10.575443,9.767121,9.262956,...,8.913854,9.352999,10.449265,8.489615,9.358249,10.184677,10.690227,12.150793,11.975209,9.914484
3,Zhao_NatMed_2019-Pt013-ar-224,0,6.611102,7.04812,13.848042,6.270523,7.716578,10.337706,9.838658,8.947648,...,8.453009,8.564607,9.173647,8.806541,9.609744,10.133634,10.965344,11.655728,11.650679,9.784369
4,Zhao_NatMed_2019-Pt013-ar-225,0,6.692782,6.409118,14.46761,5.839748,7.204562,10.261621,8.716026,8.855298,...,8.711454,8.352989,9.034527,9.324315,9.977836,10.321406,10.739537,11.728962,12.203635,9.743746
5,Zhao_NatMed_2019-Pt020-ar-218,0,7.352768,7.373393,14.626042,7.058424,8.355571,9.450892,11.229072,8.870206,...,8.666845,8.654636,8.193982,8.131606,9.349371,9.380931,11.638258,11.425547,11.343847,10.598176
6,Zhao_NatMed_2019-Pt020-ar-219,0,8.477404,8.689505,13.745909,4.183563,6.845184,10.720729,9.895052,8.738266,...,8.686942,9.074577,8.015704,8.05607,9.261479,9.748002,10.893263,13.882914,11.809717,10.354255
7,Zhao_NatMed_2019-Pt020-ar-221,0,7.616167,7.76448,15.532611,5.960978,7.574455,9.942729,9.659257,8.276072,...,8.581285,8.6434,7.853844,8.397116,9.046155,10.124339,11.056782,13.116379,11.873599,10.531316
8,Zhao_NatMed_2019-Pt021-ar-220,0,6.456118,6.918308,15.711662,7.151625,8.999596,10.430029,10.13925,8.53462,...,9.007685,8.465236,8.368674,8.02323,9.174732,10.055888,10.837636,12.298525,11.722742,10.442016
9,Zhao_NatMed_2019-Pt021-ar-223,0,10.319588,8.590267,15.124946,6.265393,7.966455,10.576961,9.828336,9.520057,...,9.607612,9.419072,9.850593,7.230158,8.451594,10.039241,10.416504,13.344089,11.533278,10.619704
