In [6]:
# written to look at merged networks and identify
# where other non-DME-associated drugs bound genes
# in these networks to suggest novel drug-drug-DME combinations
# re-written 11-8-19 JLW

import pickle, os, csv, math, functools
import pandas as pd
from collections import defaultdict

dd_list = functools.partial(defaultdict, list)
dd_set = functools.partial(defaultdict, set)

rscs_dir = '../rscs/'
data_dir = '../data/'
dme_mdir = os.path.join(data_dir,'dme_merged_data')
coth_dir = os.path.join(data_dir,'cotherapy')

dintf = os.path.join(rscs_dir,'drug_intome_targets.pkl')
dint = pickle.load(open(dintf,'rb')) # to find number of drug targets
del dint['DB03080'] # this entry was revoked May 2017
del dint['DB08696'] # this entry was revoked

froot = '_zipped_net_data.pkl'
all_net_data = [f for f in os.listdir(dme_mdir) if froot in f]
dme_dic = dict([(f.replace(froot,''),os.path.join(dme_mdir,f)) for f in all_net_data])

# Drugbank ID mapping
pfxr_dir = '../PathFX/rscs/'
db2nmf = os.path.join(pfxr_dir,'drugbankid_to_name.pkl')
db2nm = pickle.load(open(db2nmf,'rb'))

In [7]:
# load drug synonyms data
drug_synonyms = defaultdict(list)
synf = os.path.join(data_dir,'drugbank_vocabulary.csv')
dR = csv.DictReader(open(synf,'r'))
db2nm = {}
db2syns = defaultdict(list)
sn2db = {}
nm2db = {}
for r in dR:
    com_name = r['Common name'].capitalize()
    dbid = r['DrugBank ID']
    accnums = r['Accession Numbers']
    add_dbids = [a for a in accnums.split(' | ') if a[0:2]=='DB'] # look for additional DrugBank identifiers
    # store
    db2nm[dbid] = com_name
    nm2db[com_name] = dbid
    for adb in add_dbids:
        db2nm[adb] = com_name
        # nm2db[com_name]
    #store for synonyms
    syns = r['Synonyms'].split(' | ')
    for s in syns:
            drug_synonyms[com_name].append(s.capitalize())
            db2syns[dbid].append(s.capitalize())
            sn2db[s.capitalize()] = dbid
            for adb in add_dbids:
                db2syns[adb].append(s.capitalize())

db2nm['DB09396'] = 'Dextropropoxyphene'

In [8]:
# Debugging notes: add some synonyms manually?
man_add = {'Hydrocortisone acetate':'DB14539',
           "Antihemophilic factor, human recombinant":'DB00025',
           'Coagulation factor vii human':'DB13150',
           'Coagulation factor viia recombinant human':'DB00036',
           'Sodium ascorbate':'DB14482',
           'Coagulation factor vii human':'DB13150',
           'Human serum albumin':'DB00062',
           'Thrombomodulin Alfa':'DB05777',
           'Aspartic acid':'DB00128',
           'Ephedra sinica root':'DB01363',
           'Glutamic acid':'DB00142',
           'Myristic acid':'DB08231',
           'NAD':'DB14128',
           'Pi3k/bet inhibitor ly294002':'DB02656',
           'Low-dose aspirin':'DB00945',
           'Carbachol':'DB00411',
           'Recombinant urokinase':'DB00013',
           'Coagulation factor viia recombinant human'
           'Ephedra intermedia stem':'DB01363',
           'Norepinephrine, dl-':'DB00368',
           'Tafamidis meglumine':'DB11644',
           'C1-inh':'DB06404',
           'Oxytocin measurement':'DB00107',
           'Ascorbic acid':'DB00126',
           'Vitamin C':'DB00126'}

for (k,v) in man_add.items():
    sn2db[k.capitalize()] = v


In [9]:
# Loop through all intermediate genes and find if they are the target of any drugs in DrugBank
writer = pd.ExcelWriter(open(os.path.join(coth_dir,'potential_co_therapies.xlsx'),'wb'))

count_pred = set()
all_possible = defaultdict(dd_list)
for (dme,net_data_f) in dme_dic.items():
    print(dme)
    save_data = []
    net_data = pickle.load(open(net_data_f,'rb'))
   #  (node_list,colors,shapes,sizes,graph_node_types) = zip(*net_data)
    dme_intermed = [node for (node,clr,sh,sz,ntype) in net_data if ntype=='intermediate']
    dme_intermed_noPK = [n for n in dme_intermed if'CYP' not in n and 'ALB' not in n and 'ABC' not in n] # remove PK genes
    for node in dme_intermed_noPK:
        node_drugs = [(dr,ts) for (dr,ts) in dint.items() if node in ts and dr[0:2]=='DB'] # check all drugs that engage the target and remove redundant drugbank identifiers
        for (ndrug,tlist) in node_drugs:
            com_name = db2nm[ndrug]
            save_data.append([com_name,node])
            all_possible[dme][ndrug].append(node)
            count_pred.add((ndrug,node))
        
    df = pd.DataFrame(save_data, columns = ['Drug', 'Drug Target found in DME pathway'])
    df.to_excel(writer,sheet_name=dme) # save the results in an Excel sheet
    
writer.save()
pickle.dump(all_possible,open(os.path.join(coth_dir,'all_possible_cotherapy.pkl'),'wb'))
print("Number of drug combination predictions: "+str(len(count_pred)))


Thrombocytopenia
Hypertension
Deep_vein_thrombosis
Cerebral_infarction
Tardive_dyskinesia
Pneumonia
Hemorrhage
Hemolytic_anemia
Pancreatitis
Peripheral_neuropathy
Myocardial_infarction
Gastric_ulcer
Proteinuria
Delirium
Ventricular_tachycardia
Hyperlipidemia
Agranulocytosis
Myopathy
Sepsis
Edema
Pulmonary_edema
Interstitial_lung_disease
Cardiac_arrest
Hepatic_necrosis
Number of drug combination predictions: 1687


In [12]:
# after reviewing the literature, look at confirmed drug-DME relationships and directionality
eval_file = '../data/Drug-DME_Eval_final.xlsx'
df = pd.read_excel(eval_file,None)
writer = pd.ExcelWriter(open(os.path.join(coth_dir,'co_therapies_with_literature_support.xlsx'),'wb'))

hist_data = defaultdict(dd_set)
for dme in df.keys(): # loop through dmes tested using PubMed abstracts
    print(dme)
    if dme == 'Sleep Disorder': # no sufficient interactions in this pathway
        continue
    dme_df = df[dme] # get dataframe subset
    df_with_data = dme_df[dme_df['Relationship'].notnull()] #only select those that had relationships after reading abstracts
    
    # save the list of drugs, switch to capitalize
    df_with_data['Drug'] = df_with_data['Drug'].str.capitalize()
    drug_list = df_with_data['Drug'].tolist()
    
    # get names of drugs with their targets in the DME pathway
    if dme == 'DVT':
        dme_lookup = 'Deep_vein_thrombosis'
    elif dme == 'ILD':
        dme_lookup = 'Interstitial_lung_disease'
    elif dme == 'Neuropathy':
        dme_lookup = 'Peripheral_neuropathy'
    elif dme == 'Ventricular Tachy':
        dme_lookup = 'Ventricular_tachycardia'
    else:
        dme_lookup = dme.capitalize().replace(' ','_')
    apdic = all_possible[dme_lookup] # look up drugs that bind gene in this dme pathway
    for k in apdic.keys():
        hist_data[dme]['predicted'].add(k) # just count the number of drugs
    
    # create a new column to add to the dataframe
    db_dme_genes = [] # a list of drug-binding DME genes
    for dname in drug_list:
        dname = dname.capitalize()
        if dname in nm2db:
            dbid = nm2db[dname]
        elif dname in sn2db:
            dbid = sn2db[dname]
        else:
            dnid = 'not found'
        if dbid in apdic:
            dtargs = apdic[dbid]
            dtstr = ','.join(dtargs)
            db_dme_genes.append(dtstr)
        else:
            db_dme_genes.append('targets not found')

    
    df_with_data.insert(1,'Drug targets in DME pathway',db_dme_genes)
    df_sum = df_with_data[df_with_data['Drug targets in DME pathway'] != 'targets not found']
    df_sum.to_excel(writer,sheet_name=dme)
writer.save()

pickle.dump(hist_data,open(os.path.join(coth_dir,'predicted_data_for_hist.pkl'),'wb'))

Cardiac Arrest
Cerebral Infarction
Delirium
DVT
Edema
Gastric Ulcer
Hemorrhage
Hypertension
Hyperlipidemia
ILD
Myocardial Infarction
Myopathy
Neuropathy
Pancreatitis
Pneumonia
Proteinuria
Sepsis
Sleep Disorder
Thrombocytopenia
Ventricular Tachy


A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  from ipykernel import kernelapp as app
