# File to parse DrugBanks's drug-drug interactions and severity data #
- 

In [None]:
import pandas as pd
import numpy as np
from preprocessing_functions import *

1. Lowercase for drug names for intersection between DrugComb and DrugBank
2. Get mapping of drug ID, drugbank ID, and drug name, save to file

In [2]:
drugbank_ddi_df, drugbank_drug_ids_to_names = get_drugbank_ddi()

  drugbank_ddi_df = pd.read_csv(drugbank_ddi_fp)


How many interactions are in each severity category in the drugbank database?
severity
1    742987
0    535009
2    152458
Name: count, dtype: int64
Shape of drugbank ddi dataframe:  (1430454, 18)


1. Get the intersection between the drugbank_ddi_df and DrugComb dataset
    - Based on measurements of severity and the descriptions, determined that 0 is lowest severity and 2 is high severity

In [3]:
# Get the intersection of the drugcomb data and the drugbank data
drugcomb_df = get_drug_comb_data(bliss=True, loewe=True, hsa=True, zip=True, s_max=True, s_mean=True, s_sum=True)
drugbank_syntox_df, intersection_major_pairs, intersection_moderate_pairs, intersection_minor_pairs, intersection_unknown_pairs = find_drugcomb_drugbankddi_intersect(drugcomb_df, drugbank_ddi_df)

  drugcomb_df = pd.read_csv('data/DrugComb/drugcomb_summary_v_1_5.csv', sep=',', index_col=False)


Original shape of drugcomb data:  (1432351, 26)
Final shape of filtered drugcomb data:  (722032, 26)
Number of drugs in common between drugcomb and drugbank [lowercase enforced]:  1079
Major pairs in both DrugComb and in DrugBank:  3211
Moderate pairs in both DrugComb and in DrugBank:  5534
Minor pairs in both DrugComb and in DrugBank:  4799
Unknown toxicity pairs in both DrugComb and in DrugBank:  56177
Total common pairs:  69721
Total known pairs:  13544


In [4]:
drugbank_syntox_df = drugbank_syntox_df[drugbank_syntox_df['toxicity_category'] != 'Unknown']
drugbank_syntox_df.to_csv('data_processed/drugbank_syntox_known.csv', index=False)

In [5]:
# How many of the drugcomb-ddinter intersection pairs overlap with the drugcomb-ddinter intersection pairs?
ddinter_syntox_df = pd.read_csv('data_processed/syntox_known.csv')
num_agreeing_pairs = 0
ddinter_pairs_not_drugbank = set()
drugbank_pairs_not_ddinter = set()
disagreeing_pairs = set() # drug 1, drug 2, drugbank_toxicity, ddinter_toxicity

# create a set of all the drug pairs in the drugbank data (flipped and unflipped)
drugbank_pairs = {}
for index, row in drugbank_syntox_df.iterrows():
    drug1 = row['drug_row']
    drug2 = row['drug_col']
    drugbank_tox = row['toxicity_category']
    if (drug1, drug2) not in drugbank_pairs and (drug2, drug1) not in drugbank_pairs:
        drugbank_pairs[(drug1, drug2)] = drugbank_tox
        drugbank_pairs[(drug2, drug1)] = drugbank_tox

ddinter_pairs = {}
for index, row in ddinter_syntox_df.iterrows():
    drug1 = row['drug_row']
    drug2 = row['drug_col']
    ddinter_tox = row['toxicity_category']
    if (drug1, drug2) in drugbank_pairs or (drug2, drug1) in drugbank_pairs:
        if drugbank_pairs[(drug1, drug2)] != ddinter_tox:
            disagreeing_pairs.add((drug1, drug2, drugbank_pairs[(drug1, drug2)], ddinter_tox))
        else:
            num_agreeing_pairs += 1
    else:
        ddinter_pairs_not_drugbank.add((drug1, drug2))
    
    if (drug1, drug2) not in ddinter_pairs and (drug2, drug1) not in ddinter_pairs:
        ddinter_pairs[(drug1, drug2)] = ddinter_tox
        ddinter_pairs[(drug2, drug1)] = ddinter_tox

for index, row in drugbank_syntox_df.iterrows():
    drug1 = row['drug_row']
    drug2 = row['drug_col']
    if (drug1, drug2) not in ddinter_pairs and (drug2, drug1) not in ddinter_pairs:
        drugbank_pairs_not_ddinter.add((drug1, drug2))

print('Number of drug pairs in the intersection of drugcomb and drugbank that agree on toxicity category:', num_agreeing_pairs)
print('Number of drug pairs in the intersection of drugcomb and drugbank that disagree on toxicity category:', len(disagreeing_pairs))
print('Number of drug pairs in the intersection of drugcomb and drugbank that are not in the ddinter data:', len(drugbank_pairs_not_ddinter))
print('Number of drug pairs in the intersection of drugcomb and ddinter that are not in the drugbank data:', len(ddinter_pairs_not_drugbank))
print(disagreeing_pairs) # There are pairs that disagree on toxicity category between DrugBank and DDInter

Number of drug pairs in the intersection of drugcomb and drugbank that agree on toxicity category: 9466
Number of drug pairs in the intersection of drugcomb and drugbank that disagree on toxicity category: 1546
Number of drug pairs in the intersection of drugcomb and drugbank that are not in the ddinter data: 10835
Number of drug pairs in the intersection of drugcomb and ddinter that are not in the drugbank data: 595
{('fostamatinib', 'ruxolitinib', 'Major', 'Moderate'), ('doxorubicin', 'citalopram', 'Moderate', 'Major'), ('methotrexate', 'celecoxib', 'Major', 'Moderate'), ('dronedarone', 'doxorubicin', 'Moderate', 'Major'), ('crizotinib', 'romidepsin', 'Moderate', 'Major'), ('tranylcypromine', 'altretamine', 'Minor', 'Moderate'), ('doxorubicin', 'leflunomide', 'Moderate', 'Major'), ('thiotepa', 'cytarabine', 'Major', 'Moderate'), ('rucaparib', 'ruxolitinib', 'Moderate', 'Minor'), ('docetaxel', 'lapatinib', 'Major', 'Moderate'), ('felbamate', 'docetaxel', 'Major', 'Moderate'), ('palono