### Testing file for toxicity_ranking.py ###

In [1]:
# Import everything needed
from toxicity_ranking import *

Test get_drug_comb_data method

In [2]:
def test_get_drug_comb_df_all_relevant_values():
    drugcomb_df = get_drug_comb_data()
    assert drugcomb_df.shape == (722344, 26)

def test_get_drug_comb_df_filter_ant_bliss():
    drugcomb_df = get_drug_comb_data(bliss=True)
    assert drugcomb_df.shape == (355965, 26)
    bliss_scores = drugcomb_df['synergy_bliss'].values
    assert all([x >= 0 for x in bliss_scores])

def test_get_drug_comb_df_filter_ant_loewe():
    drugcomb_df = get_drug_comb_data(loewe=True)
    assert drugcomb_df.shape == (220919, 26)
    loewe_scores = drugcomb_df['synergy_loewe'].values
    assert all([x >= 0 for x in loewe_scores])

def test_get_drug_comb_df_filter_ant_hsa():
    drugcomb_df = get_drug_comb_data(hsa=True)
    assert drugcomb_df.shape == (324351, 26)
    hsa_scores = drugcomb_df['synergy_hsa'].values
    assert all([x >= 0 for x in hsa_scores])

def test_get_drug_comb_df_filter_ant_zip():
    drugcomb_df = get_drug_comb_data(zip=True)
    assert drugcomb_df.shape == (336229, 26)
    zip_scores = drugcomb_df['synergy_zip'].values
    assert all([x >= 0 for x in zip_scores])

def test_get_drug_comb_df_filter_ant_all():
    drugcomb_df = get_drug_comb_data(bliss=True, loewe=True, hsa=True, zip=True)
    bliss_scores = drugcomb_df['synergy_bliss'].values
    loewe_scores = drugcomb_df['synergy_loewe'].values
    hsa_scores = drugcomb_df['synergy_hsa'].values
    zip_scores = drugcomb_df['synergy_zip'].values
    assert all([x >= 0 for x in bliss_scores])
    assert all([x >= 0 for x in loewe_scores])
    assert all([x >= 0 for x in hsa_scores])
    assert all([x >= 0 for x in zip_scores])
    assert drugcomb_df.shape == (123882, 26)

    # Original shape of drugcomb data:  (1432351, 26)
    # Shape after filtering out malaria and SARS-CoV-2 and Ebola data:  (722344, 26)
    # Shape after filtering out antagonistic bliss:  (355965, 26)
    # Shape after filtering out antagonistic loewe:  (162283, 26)
    # Shape after filtering out antagonistic hsa:  (149580, 26)
    # Shape after filtering out antagonistic zip:  (123882, 26)
    # Final shape of filtered drugcomb data:  (123882, 26)

test_get_drug_comb_df_all_relevant_values()
test_get_drug_comb_df_filter_ant_bliss()
test_get_drug_comb_df_filter_ant_loewe()
test_get_drug_comb_df_filter_ant_hsa()
test_get_drug_comb_df_filter_ant_zip()

test_get_drug_comb_df_filter_ant_all()

print("All tests passed!")

  drugcomb_df = pd.read_csv('data/DrugComb/drugcomb_summary_v_1_5.csv', sep=',', index_col=False)


Original shape of drugcomb data:  (1432351, 26)
Final shape of filtered drugcomb data:  (722344, 26)


  drugcomb_df = pd.read_csv('data/DrugComb/drugcomb_summary_v_1_5.csv', sep=',', index_col=False)


Original shape of drugcomb data:  (1432351, 26)
Final shape of filtered drugcomb data:  (355965, 26)


  drugcomb_df = pd.read_csv('data/DrugComb/drugcomb_summary_v_1_5.csv', sep=',', index_col=False)


Original shape of drugcomb data:  (1432351, 26)
Final shape of filtered drugcomb data:  (220919, 26)


  drugcomb_df = pd.read_csv('data/DrugComb/drugcomb_summary_v_1_5.csv', sep=',', index_col=False)


Original shape of drugcomb data:  (1432351, 26)
Final shape of filtered drugcomb data:  (324351, 26)


  drugcomb_df = pd.read_csv('data/DrugComb/drugcomb_summary_v_1_5.csv', sep=',', index_col=False)


Original shape of drugcomb data:  (1432351, 26)
Final shape of filtered drugcomb data:  (336229, 26)


  drugcomb_df = pd.read_csv('data/DrugComb/drugcomb_summary_v_1_5.csv', sep=',', index_col=False)


Original shape of drugcomb data:  (1432351, 26)
Final shape of filtered drugcomb data:  (123882, 26)
All tests passed!


Test get_sider_data method

In [3]:
def test_get_sider_data():
    sider_cid_to_drugs_df, sider_all_side_effects_df = get_sider_data()
    assert sider_cid_to_drugs_df.shape == (1429, 2)
    assert sider_all_side_effects_df.shape == (309848, 6)

test_get_sider_data()
print("All tests passed!")

All tests passed!


Test filter_drug_comb_data

In [4]:
drugcomb_df = get_drug_comb_data(bliss=True, loewe=True, hsa=True, zip=True)
sider_cid_to_drugs_df, sider_all_side_effects_df = get_sider_data()

filtered_drug_comb_data, common_drugs, unique_drug_pairs = filter_drug_comb_data_by_sider(drugcomb_df, sider_cid_to_drugs_df)

def test_filter_drug_comb_data():
    assert filtered_drug_comb_data.shape == (20824, 26)
    assert len(unique_drug_pairs) == 6552

    # Verify if there are any repeated drug pairs
    example_one = ('methotrexate', 'fenoprofen')
    example_two = ('fenoprofen', 'methotrexate')
    assert example_one in unique_drug_pairs
    assert example_two not in unique_drug_pairs

test_filter_drug_comb_data()
print("All tests passed!")

  drugcomb_df = pd.read_csv('data/DrugComb/drugcomb_summary_v_1_5.csv', sep=',', index_col=False)


Original shape of drugcomb data:  (1432351, 26)
Final shape of filtered drugcomb data:  (123882, 26)
Original drugcomb data shape:  (123882, 26)
Number of drugs in common between drugcomb and sider [lowercase enforced]:  401
Filtered drugcomb data shape for both drugs being present in sider:  (20824, 26)
Number of unique drug pairs:  6552
All tests passed!


Test the get_drug_to_side_effects method

In [5]:
def test_get_drug_to_side_effects():
    drug_to_side_effects = get_drug_to_side_effects(sider_cid_to_drugs_df, sider_all_side_effects_df)
    assert len(drug_to_side_effects['dutasteride']) == 47
    assert len(drug_to_side_effects['mebendazole']) == 13
    assert len(drug_to_side_effects['testolactone']) == 16

test_get_drug_to_side_effects()

print("All tests passed!")

All tests passed!


Test jaccard_similarity method

In [6]:
def test_jaccard_similarity_equal():
    # Equal sets
    s1 = set(["Alopecia", "Vomiting", "Diarrhea"])
    s2 = set(["Alopecia", "Vomiting", "Diarrhea"])
    assert jaccard_similarity(s1, s2) == 1.0

def test_jaccard_similarity_disjoint():
    # Disjoint sets
    s1 = set(["Alopecia", "Vomiting", "Diarrhea"])
    s2 = set(["Anorexia", "Hypertension", "Nausea"])
    assert jaccard_similarity(s1, s2) == 0.0

def test_jaccard_similarity_partial_overlap():
    # Partial overlap
    s1 = set(["Alopecia", "Vomiting", "Diarrhea"])
    s2 = set(["Alopecia", "Hypertension", "Nausea"])
    assert jaccard_similarity(s1, s2) == 1/5


test_jaccard_similarity_equal()
test_jaccard_similarity_disjoint()
test_jaccard_similarity_partial_overlap()
print("All tests passed!")

All tests passed!


Test drug_pair_to_jaccard_similarity

In [7]:
drug_pair_to_jaccard, drug_pair_to_side_effects = drug_pair_to_jaccard_similarity(unique_drug_pairs, sider_cid_to_drugs_df, sider_all_side_effects_df)
def test_drug_pair_to_jaccard_similarity():
    assert drug_pair_to_jaccard[('clofarabine', 'clofarabine')] == 1.0
    assert drug_pair_to_jaccard[('dopamine', 'chlorzoxazone')] == 0.0

test_drug_pair_to_jaccard_similarity()
print("All tests passed!")

All tests passed!


Test the rank_drug_pairs method

In [8]:
def test_rank_drug_pairs():
    ranked_drug_pairs = rank_drug_pairs(drug_pair_to_jaccard)
    print(ranked_drug_pairs[:10])
    print(ranked_drug_pairs[-10:])

test_rank_drug_pairs()

[(('ponatinib', 'ponatinib'), 1.0), (('vemurafenib', 'vemurafenib'), 1.0), (('trametinib', 'trametinib'), 1.0), (('temozolomide', 'temozolomide'), 1.0), (('clofarabine', 'clofarabine'), 1.0), (('cladribine', 'cladribine'), 1.0), (('acarbose', 'miglitol'), 0.45652173913043476), (('acamprosate', 'tiagabine'), 0.44537815126050423), (('exemestane', 'letrozole'), 0.4423076923076923), (('imatinib', 'nilotinib'), 0.43548387096774194)]
[(('paromomycin', 'doxorubicin'), 0.005758157389635317), (('paromomycin', 'bortezomib'), 0.004815409309791332), (('dutasteride', 'etomidate'), 0.0), (('testolactone', 'dutasteride'), 0.0), (('etomidate', 'paromomycin'), 0.0), (('testolactone', 'mebendazole'), 0.0), (('monobenzone', 'temozolomide'), 0.0), (('mebendazole', 'epinephrine'), 0.0), (('paromomycin', 'disulfiram'), 0.0), (('dopamine', 'chlorzoxazone'), 0.0)]


Test cosine_similarity method

In [9]:
# Get the cosine similarity of two embedding arrays
# INPUT:
#   embedding1: (np.array) the first embedding array
#   embedding2: (np.array) the second embedding array
# OUTPUT:
#   (float) the cosine similarity of the two embedding arrays
def cosine_similarity(embedding1, embedding2):
    # Reshape the arrays to match the expected input shape of cosine_similarity
    # embedding1 = embedding1.reshape(1, -1)
    # embedding2 = embedding2.reshape(1, -1)

    # Calculate the cosine similarity
    cs = cosine_similarity(embedding1, embedding2) 
    print(cs)
    print(cs.shape)