# Check on DrugBank DDI vs DDInter DDI #

In [None]:
import pandas as pd
from preprocessing_functions import *

In [None]:
# Assumes that the following files are in the data_processed folder:
drugbankddi_known_syntox = pd.read_csv('data_processed/drugbank_syntox_known.csv')
ddinter_known_syntox = pd.read_csv('data_processed/ddinter_syntox_known.csv')

In [None]:
# How many of the drugcomb-ddinter intersection pairs overlap with the drugcomb-ddinter intersection pairs?
num_agreeing_pairs = 0
ddinter_pairs_not_drugbank = set()
drugbank_pairs_not_ddinter = set()
disagreeing_pairs = set() # drug 1, drug 2, drugbank_toxicity, ddinter_toxicity

# create a set of all the drug pairs in the drugbank data (flipped and unflipped)
drugbank_pairs = {}
for index, row in drugbankddi_known_syntox.iterrows():
    drug1 = row['drug_row']
    drug2 = row['drug_col']
    drugbank_tox = row['toxicity_category']
    if (drug1, drug2) not in drugbank_pairs and (drug2, drug1) not in drugbank_pairs:
        drugbank_pairs[(drug1, drug2)] = drugbank_tox
        drugbank_pairs[(drug2, drug1)] = drugbank_tox

ddinter_pairs = {}
for index, row in ddinter_known_syntox.iterrows():
    drug1 = row['drug_row']
    drug2 = row['drug_col']
    ddinter_tox = row['toxicity_category']
    if (drug1, drug2) in drugbank_pairs or (drug2, drug1) in drugbank_pairs:
        if drugbank_pairs[(drug1, drug2)] != ddinter_tox:
            disagreeing_pairs.add((drug1, drug2, drugbank_pairs[(drug1, drug2)], ddinter_tox))
        else:
            num_agreeing_pairs += 1
    else:
        ddinter_pairs_not_drugbank.add((drug1, drug2))
    
    if (drug1, drug2) not in ddinter_pairs and (drug2, drug1) not in ddinter_pairs:
        ddinter_pairs[(drug1, drug2)] = ddinter_tox
        ddinter_pairs[(drug2, drug1)] = ddinter_tox

for index, row in drugbankddi_known_syntox.iterrows():
    drug1 = row['drug_row']
    drug2 = row['drug_col']
    if (drug1, drug2) not in ddinter_pairs and (drug2, drug1) not in ddinter_pairs:
        drugbank_pairs_not_ddinter.add((drug1, drug2))

print('Number of drug pairs in the intersection of drugcomb and drugbank that agree on toxicity category:', num_agreeing_pairs)
print('Number of drug pairs in the intersection of drugcomb and drugbank that disagree on toxicity category:', len(disagreeing_pairs))
print('Number of drug pairs in the intersection of drugcomb and drugbank that are not in the ddinter data:', len(drugbank_pairs_not_ddinter))
print('Number of drug pairs in the intersection of drugcomb and ddinter that are not in the drugbank data:', len(ddinter_pairs_not_drugbank))
print(disagreeing_pairs) # There are pairs that disagree on toxicity category between DrugBank and DDInter

Number of drug pairs in the intersection of drugcomb and drugbank that agree on toxicity category: 9466
Number of drug pairs in the intersection of drugcomb and drugbank that disagree on toxicity category: 2141
Number of drug pairs in the intersection of drugcomb and drugbank that are not in the ddinter data: 66277
Number of drug pairs in the intersection of drugcomb and ddinter that are not in the drugbank data: 0
{('tinidazole', 'docetaxel', 'Unknown', 'Moderate'), ('fostamatinib', 'ruxolitinib', 'Major', 'Moderate'), ('doxorubicin', 'citalopram', 'Moderate', 'Major'), ('methotrexate', 'celecoxib', 'Major', 'Moderate'), ('dronedarone', 'doxorubicin', 'Moderate', 'Major'), ('zalcitabine', 'clofarabine', 'Unknown', 'Moderate'), ('crizotinib', 'romidepsin', 'Moderate', 'Major'), ('tranylcypromine', 'altretamine', 'Minor', 'Moderate'), ('doxorubicin', 'leflunomide', 'Moderate', 'Major'), ('thiotepa', 'cytarabine', 'Major', 'Moderate'), ('rucaparib', 'ruxolitinib', 'Moderate', 'Minor'), (

In [None]:
print("Number of disagreeing pairs:", len(disagreeing_pairs))

Number of disagreeing pairs: 2141
