
# One Runthrough

In [2]:
import pandas as pd
from module4.ebc_scoring import EBCScoring


e = EBCScoring()
artifact = e.get_dense_itcc_artifact()

seed, test = e.get_seed_and_test_sets(
    artifact=e.get_dense_itcc_artifact(), sample_size=10
)

df = e.generate_filtered_matrix(test["Drug-Gene"])

scoring = e.generate_ebc_scoring_artifact(
    filtered_matrix=df, seed_pairs=seed["Drug-Gene"]
)

In [3]:

scoring.head()

Unnamed: 0,"(dasatinib,stat3)","(norepinephrine,rbp4)","(aldosterone,nhe-1)","(il-2,stat6)","(flecainide,cyp2d6)","(ritonavir,cyp2d6)","(trimethoprim,cyp2c8)","(danazol,shbg)","(enoxacin,cyp1a2)","(taurine,cyp7a1)",SeedSet
"(flavopiridol,nf-kappab)",181.0,164.0,167.0,534.0,66.0,38.0,35.0,138.0,93.0,147.0,False
"(tnf-r2,tnf-r1)",97.0,204.0,72.0,139.0,53.0,25.0,21.0,60.0,74.0,246.0,False
"(il-2,il-5)",115.0,228.0,79.0,144.0,45.0,25.0,25.0,70.0,105.0,283.0,False
"(il-11,il-10)",121.0,223.0,103.0,114.0,44.0,23.0,15.0,98.0,60.0,235.0,False
"(fgf-7,fgf-2)",170.0,202.0,156.0,113.0,44.0,15.0,10.0,131.0,56.0,190.0,False


The following matrix above gives you the unsorted NxT coocurance matrix.

In [6]:

from module4.visualize import Visualize

v = Visualize()

In [12]:
rankings = e.get_rankings(ebc_scoring_artifact=scoring)
rankings

{'(dasatinib,stat3)': {'(dasatinib,stat3)': 1,
  '(latanoprost,mmp-3)': 2,
  '(dexamethasone,secretagogin)': 3,
  '(mitomycin,rad51)': 4,
  '(mirtazapine,bdnf)': 5,
  '(infliximab,vegf)': 6,
  '(bethanechol,gastrin)': 7,
  '(l-dopa,zif-268)': 8,
  '(il-2,ap-1)': 9,
  '(olanzapine,ghrelin)': 10,
  '(paclitaxel,il-10)': 11,
  '(creatine,glut4)': 12,
  '(cisplatin,hmg1)': 13,
  '(phenytoin,cyp3a4)': 14,
  '(rosuvastatin,apoe)': 15,
  '(etoposide,pc-3)': 16,
  '(fentanyl,mir-190)': 17,
  '(capsaicin,igf-i)': 18,
  '(g-csf,cox-2)': 19,
  '(troglitazone,cyp3a4)': 20,
  '(saha,waf1)': 21,
  '(ramipril,aire)': 22,
  '(docetaxel,dlts)': 23,
  '(gemcitabine,dlts)': 24,
  '(atra,cd34)': 25,
  '(flurbiprofen,cox-2)': 26,
  '(cisplatin,cd44)': 27,
  '(carboplatin,vegf)': 28,
  '(ampicillin,ml-1)': 29,
  '(diltiazem,ml-1)': 30,
  '(doxorubicin,cox-2)': 31,
  '(imiquimod,ifn-alpha)': 32,
  '(glcnac,rft1)': 33,
  '(nifedipine,dhps)': 34,
  '(methacholine,gastrin)': 35,
  '(pioglitazone,apoe)': 36,
  '

Now we actually calculate the test set scores for one test set using the `count_scores` function built in the EBCScoring module. This is just summing up all the ranks of the seed set members associated with that particular test set member.

In [19]:

test_set_scores = e.count_scores(rankings=rankings, ebc_scoring_artifact=scoring)
test_set_scores.keys()

v.bar(x = list(test_set_scores.keys()), y = list(test_set_scores.values()), title = "Sample Ranksum Scores for Size 10 Test Set")

Bar displayed...


In [12]:

test

Unnamed: 0,Drug-Gene,Drug-Gene Cluster,Dependency Path,Dependency path Cluster,DrugBank
2567,(ritonavir/cyp2d6),28.0,,,True
824,(plerixafor/cxcr4),1.0,"[nsubjpass, administered, prep_to, mice, amod]",8.0,True
2607,(diazepam/cyp2c19),15.0,,,True
3339,(warfarin/cyp2c9),15.0,,,True
3180,(dexamethasone/cyp1a1),5.0,,,True
1686,(estradiol/osteocalcin),13.0,,,False
2471,(etoposide/parp),21.0,,,False
2238,(imatinib/pc-3),21.0,,,False
2504,(flurbiprofen/cgrp),21.0,,,False
1943,(formoterol/gm-csf),28.0,,,False


# General Application

Now we repeat this process R times and generate R test-seed sets of varying sizes. Seed set size can go from 5 to a high of 12.

In [3]:
import pandas as pd
from module4.ebc_scoring import EBCScoring


e = EBCScoring()

seed_test_sets, all_test_set_scores = e.run_R_times(low = 10, high = 50, R = 100)

In [5]:
s,t = seed_test_sets[0]
t.head(10)

Unnamed: 0,Drug-Gene,Drug-Gene Cluster,Dependency Path,Dependency path Cluster,DrugBank
2394,(flecainide/cyp2d6),28.0,,,True
2511,(theophylline/cyp2e1),9.0,,,True
3386,(gemcitabine/rrm1),10.0,,,True
3220,(aldosterone/cyp11b1),20.0,,,True
1507,(dicoumarol/nqo1),12.0,,,True
1528,(estradiol/ugt1a1),14.0,,,True
2112,(tamoxifen/cyp2d6),17.0,,,True
1305,(thiotepa/cyp2b6),23.0,,,True
2567,(ritonavir/cyp2d6),28.0,,,True
2074,(dantrolene/ryr1),10.0,,,True


In [6]:

all_test_set_scores[0]

{'(cetuximab,egfr)': 3325,
 '(mitoxantrone,abcg2)': 3407,
 '(protamine,tfpi)': 3505,
 '(urokinase,pai-2)': 3391,
 '(rapamycin,aicar)': 3507,
 '(carnitine,crat)': 3474,
 '(ticagrelor,p2y12)': 3248,
 '(gm-csf,stat1)': 3377,
 '(omeprazole,cyp3a4)': 3493,
 '(amitriptyline,eaat3)': 3466,
 '(thiotepa,cyp2b6)': 3466,
 '(dicoumarol,nqo1)': 3507,
 '(estradiol,ugt1a1)': 3356,
 '(corticosteroids,adrenocorticotropin)': 3483,
 '(omeprazole,cyp1a1)': 3509,
 '(ketamine,hsp70)': 3499,
 '(pertuzumab,her-2)': 3256,
 '(dantrolene,ryr1)': 3327,
 '(dasatinib,epha2)': 3497,
 '(tamoxifen,cyp2d6)': 3270,
 '(flutamide,cyp1a2)': 3499,
 '(menadione,sod1)': 3482,
 '(hyaluronan,habp1)': 3425,
 '(capecitabine,her2)': 3461,
 '(flecainide,cyp2d6)': 3274,
 '(norepinephrine,phospholipase)': 3508,
 '(methylprednisolone,il-10)': 3504,
 '(resveratrol,pkm2)': 3509,
 '(theophylline,cyp2e1)': 3489,
 '(ritonavir,cyp2d6)': 3177,
 '(ezetimibe,statin)': 3453,
 '(etodolac,il-1)': 3511,
 '(clozapine,cyp2d6)': 3465,
 '(naltrexone,o

In [7]:
len(all_test_set_scores)

100

In [9]:

import pickle
from datetime import datetime

time = datetime.now().strftime("%Y-%m-%d,%H:%M")

with open("../data/artifacts/scores/test_sets" + f"_{time}.txt", "wb") as fp:   #Pickling
    pickle.dump(all_test_set_scores, fp)

# Saving corresponding seed test sets
with open("../data/artifacts/scores/seed_test_sets" + f"_{time}.txt", "wb") as fp:
    pickle.dump(seed_test_sets, fp)


In [13]:
with open("/Users/mtaruno/Documents/DevZone/Stem-Away-group-5/data/artifacts/scores/test_sets_2021-08-14,08:29.txt", "rb") as fp:   # Unpickling
    test_sets = pickle.load(fp)

[{'(dexamethasone,cyp2e1)': 3204,
  '(lamotrigine,ugt1a4)': 523,
  '(celecoxib,egfr)': 2645,
  '(pioglitazone,pai-1)': 2807},
 {'(sorafenib,braf)': 3261,
  '(cisplatin,tlr4)': 3241,
  '(naltrexone,oprm1)': 2123,
  '(dhea,g6pdh)': 3443},
 {'(risperidone,cyp2d6)': 2608,
  '(lamotrigine,ugt1a4)': 2804,
  '(dapsone,cyp2c9)': 3462,
  '(curcumin,hsp70)': 3447,
  '(menadione,nadph)': 3302,
  '(gm-csf,cd40)': 3447},
 {'(pge2,inositol)': 3459,
  '(cilastatin,dehydropeptidase-i)': 2136,
  '(lapatinib,erbb2)': 2101,
  '(levodopa,catechol-o-methyltransferase)': 1900,
  '(lidocaine,egfr)': 3147,
  '(il-2,rantes)': 3424,
  '(diazepam,cyp3a4)': 3394,
  '(vasopressin,avpr1b)': 3468,
  '(aldosterone,cyp11b2)': 3473,
  '(indomethacin,endothelin-1)': 3441},
 {'(cyclophosphamide,cyp2b6)': 3454,
  '(cisplatin,cd44)': 3480,
  '(danazol,sulfatase)': 2548,
  '(diethylcarbamazine,leukotriene)': 3048,
  '(resveratrol,cyp1a1)': 1382,
  '(flecainide,cyp2d6)': 2134,
  '(rolipram,pde4b)': 2595,
  '(dexamethasone,p3