### Testing file for preprocessing_functions.py ###

In [None]:
# Import everything needed
from preprocessing_functions import *

Test get_drug_comb_data method

In [None]:
def test_get_drug_comb_df_all_relevant_values():
    drugcomb_df = get_drug_comb_data()
    assert drugcomb_df.shape == (722344, 26)

def test_remove_all_na_values():
    drugcomb_df = get_drug_comb_data(bliss=True, loewe=True, hsa=True, zip=True, s_max=True, s_mean=True, s_sum=True)
    assert drugcomb_df['synergy_bliss'].notna().all()
    assert drugcomb_df['synergy_loewe'].notna().all()
    assert drugcomb_df['synergy_hsa'].notna().all()
    assert drugcomb_df['synergy_zip'].notna().all()
    assert drugcomb_df['S_max'].notna().all()
    assert drugcomb_df['S_mean'].notna().all()
    assert drugcomb_df['S_sum'].notna().all()
    assert drugcomb_df.shape == (722032, 26)


test_get_drug_comb_df_all_relevant_values()
test_remove_all_na_values()

print("All tests passed!")

Test get_sider_data method

In [None]:
def test_get_sider_data():
    sider_cid_to_drugs_df, sider_all_side_effects_df = get_sider_data()
    assert sider_cid_to_drugs_df.shape == (1429, 2)
    assert sider_all_side_effects_df.shape == (309848, 6)

test_get_sider_data()
print("All tests passed!")

Test get_ddinter method

In [None]:
ddinter_df = get_ddinter_data()

def test_get_ddinter_data():
    assert ddinter_df.shape == (160235, 5)

test_get_ddinter_data()
print("All tests passed!")

Test get_drugbank_ddi method

In [None]:
def test_get_drugbank_ddi():
    drugbank_ddi_df, _ = get_drugbank_ddi()
    # Test the severity levels are limited to 3 levels and there are no missing values
    assert len(drugbank_ddi_df['severity'].unique()) == 3
    assert drugbank_ddi_df['severity'].isna().sum() == 0
    # Test all drug names are lower case
    assert drugbank_ddi_df['subject_drug_name'].str.islower().all()
    assert drugbank_ddi_df['affected_drug_name'].str.islower().all()
    # Test no NA values for drug names
    assert drugbank_ddi_df['subject_drug_name'].notna().all()
    assert drugbank_ddi_df['affected_drug_name'].notna().all()
    # Test the shape of the dataframe
    assert drugbank_ddi_df.shape == (1430454, 18)

def test_get_preprocessed_drugbank_ddi():
    preprocessed_drugbank = pd.read_csv('data_processed/drugbank_ddi.csv')
    assert preprocessed_drugbank.shape == (1430454, 18)

test_get_drugbank_ddi()
test_get_preprocessed_drugbank_ddi()
print("All tests passed!")

Test drugcomb_ddinter intersect

In [None]:
drugcomb_df = get_drug_comb_data(bliss=True, loewe=True, hsa=True, zip=True, s_max=True, s_mean=True, s_sum=True)
drug_syntox_df, major_pairs, moderate_pairs, minor_pairs, unknown_pairs = find_drugcomb_ddinter_intersect(drugcomb_df, ddinter_df)
print(drug_syntox_df.shape)

def test_find_drugcomb_ddinter_intersect():
    assert drug_syntox_df.shape == (50108, 8)
    assert len(major_pairs) == 788 # removing duplicates, aka drug A, drug B is considered same as drug B, drug A
    assert len(moderate_pairs) == 2341
    assert len(minor_pairs) == 175

    # Verify if there are any repeated drug pairs
    example_one = ('methotrexate', 'fenoprofen')
    example_two = ('fenoprofen', 'methotrexate')
    assert example_one in major_pairs
    assert example_two not in major_pairs

test_find_drugcomb_ddinter_intersect()
print("All tests passed!")

Test the find_drugcomb_drugbankddi_intersect method

In [None]:
drugcomb_df = get_drug_comb_data(bliss=True, loewe=True, hsa=True, zip=True, s_max=True, s_mean=True, s_sum=True)
drugbank_ddi_df = pd.read_csv('data_processed/drugbank_ddi.csv')

def test_find_drugcomb_drugbank_intersect():
    drug_syntox_df, major_pairs, moderate_pairs, minor_pairs, unknown_pairs = find_drugcomb_drugbankddi_intersect(drugcomb_df, drugbank_ddi_df)
    print(drug_syntox_df.shape)
    print(len(major_pairs))
    print(len(moderate_pairs))
    print(len(minor_pairs))
    
    # assert drug_syntox_df.shape == (50108, 8)
    # assert len(major_pairs) == 788 # removing duplicates, aka drug A, drug B is considered same as drug B, drug A
    # assert len(moderate_pairs) == 2341
    # assert len(minor_pairs) == 175
    assert len(unknown_pairs) == 0

    # # Verify if there are any repeated drug pairs
    # example_one = ('methotrexate', 'fenoprofen')
    # example_two = ('fenoprofen', 'methotrexate')
    # assert example_one in major_pairs
    # assert example_two not in major_pairs

test_find_drugcomb_drugbank_intersect()
print("All tests passed!")

Test jaccard_similarity method

In [None]:
def test_jaccard_similarity_equal():
    # Equal sets
    s1 = set(["Alopecia", "Vomiting", "Diarrhea"])
    s2 = set(["Alopecia", "Vomiting", "Diarrhea"])
    assert jaccard_similarity(s1, s2) == 1.0

def test_jaccard_similarity_disjoint():
    # Disjoint sets
    s1 = set(["Alopecia", "Vomiting", "Diarrhea"])
    s2 = set(["Anorexia", "Hypertension", "Nausea"])
    assert jaccard_similarity(s1, s2) == 0.0

def test_jaccard_similarity_partial_overlap():
    # Partial overlap
    s1 = set(["Alopecia", "Vomiting", "Diarrhea"])
    s2 = set(["Alopecia", "Hypertension", "Nausea"])
    assert jaccard_similarity(s1, s2) == 1/5


test_jaccard_similarity_equal()
test_jaccard_similarity_disjoint()
test_jaccard_similarity_partial_overlap()
print("All tests passed!")