# Calculate edge circadian scores

In [1]:
import numpy as np
import pandas as pd
import src.calculate_edge_circa_score as cal
import requests

In [2]:
# read in processed CircaDB data 
circa_db = pd.read_csv('data/circa_db_mapped.tsv', sep = '\t')
circa_db.head()

Unnamed: 0,gene_id,Fat SQ_fdr,Fat Visceral_fdr,Aorta_fdr,Artery Coronary_fdr,Artery Tibial_fdr,Colon_fdr,Esophagus_fdr,Heart Atrial_fdr,Liver_fdr,...,Artery Coronary_exp,Artery Tibial_exp,Colon_exp,Esophagus_exp,Heart Atrial_exp,Liver_exp,Lung_exp,Nerve Tibial_exp,Pituitary_exp,Thyroid_exp
0,653635,0.6432,0.1526,0.8443,0.7712,0.9549,0.5059,0.2928,0.6953,0.9732,...,12.3,11.59,12.72,12.3033,5.369,5.406,13.68,19.48,15.84,19.255
1,79854,0.7652,0.2412,0.0473,0.0002,0.6314,0.8602,0.082,0.6481,0.6405,...,5.94,8.419,5.4407,4.6607,2.962,3.24,7.024,12.11,9.898,9.7615
2,643837,0.9075,0.0774,0.7887,0.2173,0.6017,0.3294,0.0696,0.2758,0.3438,...,9.039,11.52,7.1738,7.635,14.35,3.674,6.016,5.872,18.6,6.5845
3,26155,0.8656,0.457,0.5405,0.6391,0.8885,0.3902,0.6801,0.8984,0.3532,...,52.24,62.41,50.845,62.955,30.67,28.42,57.32,69.66,57.56,66.58
4,339451,0.8808,0.6235,0.436,0.4999,0.7446,0.778,0.6238,0.6645,0.9089,...,13.51,10.8,11.62,14.5633,4.87,5.979,15.6,9.224,21.42,15.445


### Pre-process circadian treatments extracted from Ruben et al

In [98]:
# Ruben et al treatment data 
treatment = pd.read_csv('data/HumCircMed2018v2_mapped.tsv', sep = '\t')

# Filter drug~disease pairs
treatment = treatment[~treatment['drug.trtmnt.DrugBankID'].isna()]
treatment = treatment[~treatment['therapeutic.area.DOID'].isna()]

filter_row_id = []
filter_col_id = ['drug.trtmnt','drug.trtmnt.DrugBankID','therapeutic.area',
                 'therapeutic.area.DOID','halflife.hrs','effect']
for i in range(0,len(treatment)):
    drug = treatment['drug.trtmnt.DrugBankID'].iloc[i,]
    disease = treatment['therapeutic.area.DOID'].iloc[i,]
    effect = str(treatment['effect'].iloc[i,])
    if ('more eff' in effect) or ('none' in effect):
        # whether treatment contains only one drug
        if len(drug.split(',')) == 1: 
            filter_row_id.append(i)
treatment = treatment[filter_col_id].iloc[filter_row_id,:]
treatment.head()

Unnamed: 0,drug.trtmnt,drug.trtmnt.DrugBankID,therapeutic.area,therapeutic.area.DOID,halflife.hrs,effect
24,prednisone,DB00635,asthma,DOID:2841,2.5,more eff
27,tulobuterol,DB12248,asthma,DOID:2841,,more eff
29,theophylline,DB00277,asthma,DOID:2841,8.0,more eff
30,epinephrine,DB00668,asthma,DOID:2841,0.03,more eff
31,orciprenaline,DB00816,asthma,DOID:2841,6.0,more eff


### Calculate edge circadian scores of drug~disease pairs in Ruben et al dataset

In [5]:
# obtain names of tissues in CircaDB
circa_cols = list(circa_db.columns)
tissues = []
for i in range(0, len(circa_cols)):
    cols_s = circa_cols[i].split('_amp')
    if len(cols_s) > 1:
        tissues.append(cols_s[0])
tissues = list(np.unique(tissues))

In [12]:
# calculate edge circadian scores 
rb_tissues_scores = []
rb_tissues_notes = []
# iterate drug~disease pair
for tr in range(0, len(treatment)):
    print(tr,'\n')
    drug = treatment['drug.trtmnt.DrugBankID'].iloc[tr,]
    disease = treatment['therapeutic.area.DOID'].iloc[tr,]
    score, note = cal.calculate_edge_circa_score(drug, disease, tissues, circa_db, 0.1, 0.05)
    rb_tissues_scores.append(score)
    rb_tissues_notes.append(note) 

0 

1 

2 

3 

4 

5 

6 

7 

8 

9 

10 

11 

12 

13 

14 

15 

16 

17 

18 

19 

20 

21 

22 

23 

24 

25 

26 

27 

28 

29 

30 

31 

32 

33 

34 

35 

36 

37 

38 

39 

40 

41 

42 

43 

44 

45 

46 

47 

48 

49 

50 

51 

52 

53 

54 



In [99]:
# score dataframe
rb_df = pd.DataFrame(rb_tissues_scores)
rb_df.columns = tissues
# note dataframe
note_df = pd.DataFrame(rb_tissues_notes)
note_df.columns = ['score_note']
# combine dataframes and output 
combine_df = pd.concat([treatment.reset_index(drop = True), rb_df, note_df], axis=1)
combine_df.to_csv('data/HumCircMed2018v2_mapped_edge_circa_scores.tsv', sep = '\t', na_rep = 'NA', 
                  float_format = '%.4f', index = False)
combine_df.head()

Unnamed: 0,drug.trtmnt,drug.trtmnt.DrugBankID,therapeutic.area,therapeutic.area.DOID,halflife.hrs,effect,Aorta,Artery Coronary,Artery Tibial,Colon,Esophagus,Fat SQ,Fat Visceral,Heart Atrial,Liver,Lung,Nerve Tibial,Pituitary,Thyroid,score_note
0,prednisone,DB00635,asthma,DOID:2841,2.5,more eff,0.034652,0.157328,0.01019,0.045076,0.122101,0.0,0.239407,0.242023,0.086513,0.010136,0.030336,0.008045,0.121836,
1,tulobuterol,DB12248,asthma,DOID:2841,,more eff,,,,,,,,,,,,,,query drug not in hetionet
2,theophylline,DB00277,asthma,DOID:2841,8.0,more eff,0.0,0.198379,0.0,0.0,0.253446,0.0,0.0,0.008269,0.0,0.0,0.0,0.0,0.175027,
3,epinephrine,DB00668,asthma,DOID:2841,0.03,more eff,0.059574,0.324315,0.0,0.0,0.233059,0.0,0.25391,0.104975,0.494817,0.0,0.02455,0.0,0.02455,
4,orciprenaline,DB00816,asthma,DOID:2841,6.0,more eff,,,,,,,,,,,,,,
