In [10]:
import pandas as pd
drugbank_df = pd.read_csv('../data/indication_gene_link/filtered_drugbank.csv')
drugbank_df.head()

Unnamed: 0,DrugBank ID,Generic Name,Indication,Disease
0,DB00002,Cetuximab,Cetuximab indicated for the treatment of local...,Eating Disorders
1,DB00068,Interferon beta-1b,Interferon beta-1b is a drug used for the trea...,Substance Use Disorders
2,DB00109,Enfuvirtide,Enfuvirtide is an antiretroviral drug used in ...,Substance Use Disorders
3,DB00114,Pyridoxal phosphate,For nutritional supplementation and for treati...,Eating Disorders
4,DB00116,Tetrahydrofolic acid,"For nutritional supplementation, also for trea...",Eating Disorders


In [11]:
stitch_voc_df = pd.read_csv('../data/indication_gene_link/STITCH_to_Drugbank_mapping.csv')

stitch_data_df = pd.read_csv('../data/indication_gene_link/STITCH.9606.actions.v5.0.tsv', sep='\t')  
stitch_voc_df.tail(100)

Unnamed: 0,CID1,CID2,source,drugbank_id
6823,CIDm00002339,CIDs46937178,DrugBank,DB08775
6824,CIDm00739358,CIDs00739358,DrugBank,DB08776
6825,CIDm00098614,CIDs00098614,DrugBank,DB08777
6826,CIDm01484819,CIDs01484819,DrugBank,DB08778
6827,CIDm46937179,CIDs46937179,DrugBank,DB08779
...,...,...,...,...
6918,CIDm46216142,CIDs46216142,DrugBank,DB08930
6919,CIDm16004692,CIDs16004692,DrugBank,DB08932
6920,CIDm00214339,CIDs09863407,DrugBank,DB08933
6921,CIDm25094462,CIDs73425384,DrugBank,DB08934


In [12]:
drug_stitch_df = drugbank_df.merge(stitch_voc_df, left_on='DrugBank ID', right_on='drugbank_id', how='inner')
drug_stitch_df.head(100)

Unnamed: 0,DrugBank ID,Generic Name,Indication,Disease,CID1,CID2,source,drugbank_id
0,DB00114,Pyridoxal phosphate,For nutritional supplementation and for treati...,Eating Disorders,CIDm00001051,CIDs00001051,DrugBank,DB00114
1,DB00116,Tetrahydrofolic acid,"For nutritional supplementation, also for trea...",Eating Disorders,CIDm00001129,CIDs00091443,DrugBank,DB00116
2,DB00118,Ademetionine,S-Adenosylmethionine (SAMe) is used as a drug ...,Major Depressive Disorder,CIDm00001079,CIDs00034755,DrugBank,DB00118
3,DB00119,Pyruvic acid,"For nutritional supplementation, also for trea...",Eating Disorders,CIDm00001060,CIDs00001060,DrugBank,DB00119
4,DB00120,Phenylalanine,L-phenylalanine may be helpful in some with de...,Major Depressive Disorder,CIDm00000994,CIDs00006140,DrugBank,DB00120
...,...,...,...,...,...,...,...,...
95,DB00843,Donepezil,"Donepezil, administered orally[L7916] or via t...",Alzheimer’s Disease,CIDm00003152,CIDs00003152,DrugBank,DB00843
96,DB00849,Methylphenobarbital,"For the relief of anxiety, tension, and appreh...",Anxiety Disorder,CIDm00008271,CIDs00008271,DrugBank,DB00849
97,DB00863,Ranitidine,This drug is used alone or with concomitant an...,Eating Disorders,CIDm00005039,CIDs03001055,DrugBank,DB00863
98,DB00875,Flupentixol,Flupentixol is indicated for maintenance thera...,Anxiety Disorder,CIDm25137855,CIDs25137855,DrugBank,DB00875


In [13]:
stitch_data_df = stitch_data_df[
    (stitch_data_df['score'] >= 700) &
    (~stitch_data_df['action'].isnull()) &
    (stitch_data_df['a_is_acting'] == 't')
]

merged_df = drug_stitch_df.merge(stitch_data_df, left_on='CID1', right_on='item_id_a', how='left')
merged_df = merged_df[["DrugBank ID", "Generic Name", "Indication", "Disease", "item_id_b"]]
merged_df.head()

Unnamed: 0,DrugBank ID,Generic Name,Indication,Disease,item_id_b
0,DB00114,Pyridoxal phosphate,For nutritional supplementation and for treati...,Eating Disorders,9606.ENSP00000355517
1,DB00114,Pyridoxal phosphate,For nutritional supplementation and for treati...,Eating Disorders,9606.ENSP00000268251
2,DB00114,Pyridoxal phosphate,For nutritional supplementation and for treati...,Eating Disorders,9606.ENSP00000260270
3,DB00114,Pyridoxal phosphate,For nutritional supplementation and for treati...,Eating Disorders,9606.ENSP00000353910
4,DB00114,Pyridoxal phosphate,For nutritional supplementation and for treati...,Eating Disorders,9606.ENSP00000355517


In [14]:
prot_gene_ensembl_voc_df = pd.read_csv('../data/Ensembl_transcript_peptide_HGNC.tsv', sep='\t') 

merged_df['ensembl_peptide_id'] = merged_df['item_id_b'].apply(lambda x: x[5:] if isinstance(x, str) else x)

merged_df = merged_df.merge(prot_gene_ensembl_voc_df, on='ensembl_peptide_id')

merged_df.head()

Unnamed: 0,DrugBank ID,Generic Name,Indication,Disease,item_id_b,ensembl_peptide_id,ensembl_gene_id,hgnc_symbol
0,DB00114,Pyridoxal phosphate,For nutritional supplementation and for treati...,Eating Disorders,9606.ENSP00000355517,ENSP00000355517,ENSG00000117009,KMO
1,DB00114,Pyridoxal phosphate,For nutritional supplementation and for treati...,Eating Disorders,9606.ENSP00000355517,ENSP00000355517,ENSG00000117009,KMO
2,DB00114,Pyridoxal phosphate,For nutritional supplementation and for treati...,Eating Disorders,9606.ENSP00000268251,ENSP00000268251,ENSG00000183044,ABAT
3,DB00119,Pyruvic acid,"For nutritional supplementation, also for trea...",Eating Disorders,9606.ENSP00000268251,ENSP00000268251,ENSG00000183044,ABAT
4,DB00313,Valproic acid,**Indicated** for:[Label] \r\n\r\n1) Use as mo...,Bipolar Disorder,9606.ENSP00000268251,ENSP00000268251,ENSG00000183044,ABAT


In [15]:
stitch_df = merged_df[["DrugBank ID", "Generic Name", "Indication", "Disease", 'ensembl_gene_id', 'hgnc_symbol', 'ensembl_peptide_id']]


stitch_df.to_csv('../data/indication_gene_link/STITCH_drugbank.csv', index=False)


In [16]:
print(len(merged_df))

3441
