In [1]:
# intended to sumarize tables of data:
# merged node counts, node types, and drug interactions
# written 11-14-19 JLW

import pickle,os,csv,functools, matplotlib
import pandas as pd
import numpy as np
from collections import defaultdict
matplotlib.use("AGG")
import matplotlib.pyplot as plt

coth_dir = os.path.join('..','data','cotherapy')
cof = os.path.join(coth_dir,'co_therapies_with_literature_support.xlsx')

ncf = 'supp2_DME_merged_node_counts.xlsx' # this is in the Code directory for now?
tpf = 'drugs_to_dmes_true_positive.txt'

hist_data = pickle.load(open(os.path.join(coth_dir,'predicted_data_for_hist.pkl'),'rb'))

In [2]:
# Read node count file 
# extract intermediate node -> drug relationships
dd_list = functools.partial(defaultdict, list)
all_node_data = {}
df = pd.read_excel(ncf, None)
for dme in df.keys():
    print(dme)
    dme_df = df[dme]
    just_drugs = []
    node_data = defaultdict(dd_list)
    for row in dme_df.itertuples():
        drug_str = row._5
        node_name = row._2
        node_count = row._3
        drug_names = set([dp.split(':')[0] for dp in drug_str.split(';')])
        jd = ','.join(drug_names)
        just_drugs.append(jd)
        node_data[node_name]['count'] = node_count
        node_data[node_name]['drugs'] = jd
    dme_df.insert(1,'Drug Names',just_drugs)
    all_node_data[dme] = node_data

Myopathy
Hemorrhage
Pneumonia
Hypertension
Tardive dyskinesia
Cerebral infarction
Hemolytic anemia
Pancreatitis
Sepsis
Peripheral neuropathy
Gastric ulcer
Hyperlipidemia
Myocardial infarction
Edema
Deep vein thrombosis
Thrombocytopenia
Interstitial lung disease
Pulmonary edema
Delirium
Proteinuria
Ventricular tachycardia
Cardiac arrest
Hepatic necrosis
Agranulocytosis


In [3]:
# open co-therapies table, match co-therapy/node relationships to node-> original drug relationships
dme_mapping = {'DVT':'Deep vein thrombosis', 'ILD':'Interstitial lung disease', 
               'Neuropathy':'Peripheral neuropathy', 'Ventricular Tachy':'Ventricular tachycardia'}
writer = pd.ExcelWriter(open(os.path.join(coth_dir,'supp4_summary_drug_interactions.xlsx'),'wb'))
df2 = pd.read_excel(cof,None)
positive_rels = ['Prevents','Prevcents','Inhibits']
negative_rels = ['Aggravates','Activates','Induces']
for dme in df2.keys():
    print(dme)
    if dme.capitalize() in all_node_data:
        dme_map = dme.capitalize()
    else:
        dme_map = dme_mapping[dme]
    node_data = all_node_data[dme_map]
    just_drugs = []
    node_counts = []
    
    dme_df2 = df2[dme]
    for row in dme_df2.itertuples():
        if row.Relationship in positive_rels:
            hist_data[dme]['positive'].add(row.Drug)
        elif row.Relationship in negative_rels:
            hist_data[dme]['negative'].add(row.Drug)
        node_name = row._3
        if ',' in node_name: # sometimes a predicted combination has multiple targets within the DME pathway
            mcnts = []
            mdrugs = []
            for mnode in node_name.split(','):
                [cnt,jd] = [node_data[mnode]['count'],node_data[mnode]['drugs']]
                mcnts.append(str(cnt))
                mdrugs.append(jd)
            just_drugs.append('|'.join(mdrugs))
            node_counts.append('|'.join(mcnts))       
        else: # when a predicted combination has single targets in the dme pathway
            [cnt,jd] = [node_data[node_name]['count'],node_data[node_name]['drugs']]
            just_drugs.append(jd)
            node_counts.append(cnt)
    dme_df2.insert(1,'Drugs with labeled DME',just_drugs)
    dme_df2.insert(1,'Intermediate node count',node_counts)
    
    # reformat
    preferred_cols = ['Drugs with labeled DME','Drug targets in DME pathway','Intermediate node count',
                      'Drug','PMID','Relationship', 'Notes']
    dme_df2 = dme_df2[preferred_cols]
    # dme_df2.rename(columns={'Drug targets in DME pathway': 'DME pathway intermediate', 'Drug': 'Predicted Cotherapy'})
    dme_df2.columns = ['Drugs with labeled DME','DME pathway intermediate','Intermediate node count',
                      'Predicted Cotherapy','PMID','Relationship', 'Notes']
    dme_df2.to_excel(writer,sheet_name=dme)

writer.save()

Cardiac Arrest
Cerebral Infarction
Delirium
DVT
Edema
Gastric Ulcer
Hemorrhage
Hypertension
Hyperlipidemia
ILD
Myocardial Infarction
Myopathy
Neuropathy
Pancreatitis
Pneumonia
Proteinuria
Sepsis
Thrombocytopenia
Ventricular Tachy


In [7]:
# create a histogram of predicted and tested drug combinations
labels = []
predicted = []
positive = []
negative = []
for (dme,ppn_dic) in sorted(hist_data.items()):
    labels.append(dme)
    pre_n = len(ppn_dic['predicted'])
    predicted.append(pre_n)
    pos_n = len(ppn_dic['positive'])
    positive.append(pos_n)
    neg_n = len(ppn_dic['negative'])
    negative.append(neg_n)
    

fix,ax = plt.subplots()
x = np.arange(len(labels))  # the label locations
width = 0.25  # the width of the bars

fig, ax = plt.subplots()
# rects1 = ax.bar(x - width*1.5, predicted, width, label='Predicted')
rects2 = ax.bar(x, positive, width, label='Mitigate DME', color='blue')
rects3 = ax.bar(x + width*1.5, negative, width, label='Aggravates DME', color='red')

# Add some text for labels, title and custom x-axis tick labels, etc.
ax.set_ylabel('Number of drug combinations')
ax.set_title('Number of combination partners with literature evidence')
ax.set_xticks(x)
ax.set_xticklabels(labels)
ax.set_yticks([0,5.,10.,15.,20.])
ax.legend()
plt.xticks(rotation=90)
plt.subplots_adjust(bottom=0.4)

plt.savefig(os.path.join(coth_dir,'predicted_combos_bar_chart'))
plt.clf()

for (dme,c) in zip(labels,positive):
    print([dme,c])
for (dme,c) in zip(labels,negative):
    print([dme,c])


['Cardiac Arrest', 0]
['Cerebral Infarction', 6]
['DVT', 2]
['Delirium', 3]
['Edema', 5]
['Gastric Ulcer', 1]
['Hemorrhage', 8]
['Hyperlipidemia', 2]
['Hypertension', 2]
['ILD', 0]
['Myocardial Infarction', 17]
['Myopathy', 0]
['Neuropathy', 4]
['Pancreatitis', 2]
['Pneumonia', 5]
['Proteinuria', 3]
['Sepsis', 19]
['Thrombocytopenia', 1]
['Ventricular Tachy', 9]
['Cardiac Arrest', 1]
['Cerebral Infarction', 1]
['DVT', 1]
['Delirium', 3]
['Edema', 2]
['Gastric Ulcer', 5]
['Hemorrhage', 10]
['Hyperlipidemia', 3]
['Hypertension', 2]
['ILD', 0]
['Myocardial Infarction', 6]
['Myopathy', 2]
['Neuropathy', 1]
['Pancreatitis', 1]
['Pneumonia', 0]
['Proteinuria', 2]
['Sepsis', 2]
['Thrombocytopenia', 2]
['Ventricular Tachy', 3]
