# Interactor Finder
The `InteractorFinder` class is used to identify specifically linked nodes to your starting case. Here, we show how one initializes this class by choosing the MAPT protein with phosphorylation modification, and finds all causally linked neighbors.

In [1]:
#!pip install drugintfinder
from drugintfinder.finder import InteractorFinder

import pandas as pd

# Initialize with base information
finder = InteractorFinder(symbol="MAPT", pmods=["pho"], edge="causal")

# Select for matching starting protein nodes (i.e. MAPT protein) and find all interactors
neighbors = finder.find_interactors(target_type="protein")
neighbors

Collecting all CT data: 100%|████████████████████████████████████████████████████████████| 1/1 [00:00<00:00,  1.29it/s]
Parsing and importing clinical trials data: 100%|███████████████████████████████| 1600/1600 [00:00<00:00, 69571.70it/s]
Parsing and importing drug data: 100%|█████████████████████████████████████████| 10000/10000 [00:04<00:00, 2280.53it/s]
Parsing and importing drug data: 100%|███████████████████████████████████████████| 3580/3580 [00:01<00:00, 2682.83it/s]


Unnamed: 0,target_species,pmid,pmc,interactor_type,interactor_name,interactor_bel,relation_type,target_bel,target_type,target_symbol,pmod_type
0,9606,28768545,PMC5541421,protein,TREM2,"p(HGNC:""TREM2"",var(""p.Arg47His""))",increases,"p(HGNC:""MAPT"",loc(MESHA:""Cerebrospinal Fluid"")...",protein,MAPT,pho
1,10116,24270208,,protein,Dkk1,"p(MGI:""Dkk1"")",increases,"p(RGD:""Mapt"",pmod(Ph,S,199))",protein,Mapt,pho
2,0,14642273,,activity,,"act(p(MGI:""Cdk5""),ma(kin))",directly_increases,"p(MGI:""Mapt"",pmod(Ph,S,239))",protein,Mapt,pho
3,9606,17389597,,activity,,"act(p(HGNC:""GSK3B""),ma(kin))",increases,"p(HGNC:""MAPT"",pmod(Ph,S,357))",protein,MAPT,pho
4,9606,17360711,,activity,,"act(p(HGNC:""GSK3B""),ma(kin))",increases,"p(HGNC:""MAPT"",pmod(Ph,S,357))",protein,MAPT,pho
...,...,...,...,...,...,...,...,...,...,...,...
1147,9606,23362255,PMC3597833,protein,PIN1,"p(HGNC:""PIN1"")",decreases,"p(HGNC:""MAPT"",pmod(Ph,T))",protein,MAPT,pho
1148,9606,23362255,PMC3597833,activity,,"act(p(HGNC:""PPP2CA""),ma(phos))",decreases,"p(HGNC:""MAPT"",pmod(Ph,T))",protein,MAPT,pho
1149,9606,29661268,PMC6033068,protein,MAPT,"p(HGNC:""MAPT"",loc(CONSO:""KXGS motif""),pmod(Ac,...",decreases,"p(HGNC:""MAPT"",loc(CONSO:""microtubule-binding r...",protein,MAPT,pho
1150,9606,30935091,PMC6480207,protein,HDAC6,"p(HGNC:""HDAC6"")",increases,"p(HGNC:""MAPT"",pmod(Ph,S,369))",protein,MAPT,pho


### Druggable Interactors
While knowing the neighbors of select nodes is useful, knowing which ones can be targeted by drugs and compounds is even more informative. The InteractorFinder class has a method for searching out those special neighbors. By default, these neighbors are isolated to proteins as the KG restricts drug-target interactions to those occurring between proteins and compounds.

In [2]:
druggable_ints = finder.druggable_interactors()
druggable_ints

Unnamed: 0,drug,capsule_interactor_type,capsule_interactor_bel,interactor_bel,interactor_type,interactor_name,relation_type,target_bel,target_symbol,target_type,pmid,pmc,rel_pub_year,rel_rid,drug_rel_rid,drug_rel_actions,drugbank_id,chembl_id,pubchem_id,pmod_type
0,"N'-(Pyrrolidino[2,1-B]Isoindolin-4-On-8-Yl)-N-...",,,"p(HGNC:""CDK2"")",protein,CDK2,directly_increases,"p(HGNC:""MAPT"",pmod(Ph,S,199))",MAPT,protein,8282104,,1994,#570:10,#1898:10624,,DB04186,CHEMBL141247,445840.0,pho
1,"1-(3,5-DICHLOROPHENYL)-5-METHYL-1H-1,2,4-TRIAZ...",,,"p(HGNC:""CDK2"")",protein,CDK2,directly_increases,"p(HGNC:""MAPT"",pmod(Ph,S,199))",MAPT,protein,8282104,,1994,#570:10,#1898:10917,,DB07852,,2763754.0,pho
2,N(6)-dimethylallyladenine,,,"p(HGNC:""CDK2"")",protein,CDK2,directly_increases,"p(HGNC:""MAPT"",pmod(Ph,S,199))",MAPT,protein,8282104,,1994,#570:10,#1898:11306,,DB08768,CHEMBL476189,92180.0,pho
3,"(5E)-2-Amino-5-(2-pyridinylmethylene)-1,3-thia...",,,"p(HGNC:""CDK2"")",protein,CDK2,directly_increases,"p(HGNC:""MAPT"",pmod(Ph,S,199))",MAPT,protein,8282104,,1994,#570:10,#1898:11332,,DB07529,,46937079.0,pho
4,"4-{5-[(Z)-(2-IMINO-4-OXO-1,3-THIAZOLIDIN-5-YLI...",,,"p(HGNC:""CDK2"")",protein,CDK2,directly_increases,"p(HGNC:""MAPT"",pmod(Ph,S,199))",MAPT,protein,8282104,,1994,#570:10,#1899:10523,,DB07534,CHEMBL233149,5729339.0,pho
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
54760,Bufexamac,protein,"p(HGNC:""HDAC6"")","p(HGNC:""HDAC6"")",protein,HDAC6,increases,"p(HGNC:""MAPT"",pmod(Ph,S,369))",MAPT,protein,30935091,PMC6480207,2019,#557:788,#1900:11395,inhibitor,DB13346,CHEMBL94394,2466.0,pho
54761,Debio-1347,protein,"p(HGNC:""HDAC6"")","p(HGNC:""HDAC6"")",protein,HDAC6,increases,"p(HGNC:""MAPT"",pmod(Ph,S,369))",MAPT,protein,30935091,PMC6480207,2019,#557:788,#1911:11461,inhibitor,,,,pho
54762,indirubin-3'-monoxime,protein,"p(HGNC:""HDAC6"")","p(HGNC:""HDAC6"")",protein,HDAC6,increases,"p(HGNC:""MAPT"",pmod(Ph,S,369))",MAPT,protein,30935091,PMC6480207,2019,#557:788,#1912:11461,inhibitor,,,,pho
54763,quercetin,protein,"p(HGNC:""HDAC6"")","p(HGNC:""HDAC6"")",protein,HDAC6,increases,"p(HGNC:""MAPT"",pmod(Ph,S,369))",MAPT,protein,30935091,PMC6480207,2019,#557:788,#1913:11461,inhibitor,,,,pho


# Ranker
The `Ranker` class is used to generate useful statistics about identified druggable interactors. Because this scoring is restricted to druggable interactors, only the starting node and pmods are needed to initialize.  

The ranking algorithm requires a bit of information to score everything, the first time it is used, it will need to download information on BioAssays and other resources which it will then store in a locally made SQLite database. The total space used is less than 100 MB.  

The download will take a couple of minutes during the first ranking, but subsequent rankings will be much faster.

In [3]:
from drugintfinder.ranker import Ranker

ranker = Ranker(symbol="MAPT", pmods=["pho"], reward=1, penalty=-1)
ranker.rank()  # Performs the ranking
summary = ranker.summarize()  # Create a summary os the statistics
summary

Counting BioAssays for targets: 100%|██████████████████████████████████████████████████| 80/80 [01:18<00:00,  1.02it/s]
Counting edges: 100%|██████████████████████████████████████████████████████████████████| 80/80 [02:48<00:00,  2.11s/it]


Unnamed: 0,Drug,Target,Synergizes,Number of BioAssays for Target,Number of Causal Edges for Target,Drug Patent Ongoing,Generic Version of Drug Available,Number of Drug Targets
0,[4-({4-[(5-cyclopropyl-1H-pyrazol-3-yl)amino]q...,SRC,,1380,50,No,No,1
1,3-fluoro-N-1H-indol-5-yl-5-morpholin-4-ylbenza...,MAPK14,,1508,7,No,No,1
2,Epinephrine,TNF,,128,222,No,Yes,8
3,Cevimeline,CHRM1,Yes,1130,9,Yes,Yes,2
4,Binimetinib,IL1B,,5,202,No,No,5
...,...,...,...,...,...,...,...,...
853,Minocycline,MAPK12,,653,6,No,Yes,23
854,Aluminium phosphate,APP,,1135,208,No,No,4
855,Acetylsalicylic acid,MAPK1,,983,30,No,Yes,28
856,{4-[(2S)-2-Acetamido-3-({(1S)-1-[3-carbamoyl-4...,LCK,,969,8,No,No,1


In [23]:
bioassays_per_drug = []

for interactor in summary.Target.unique():
    drugs = len(summary[summary.Target == interactor].Drug.unique())
    bioassays = summary[summary.Target == interactor]["Number of BioAssays for Target"].iloc[0]
    bioassays_per_drug.append({"Protein": interactor, "BioAssays per Drug": bioassays // drugs})

ratio_df = pd.DataFrame(bioassays_per_drug)
ratio_df.sort_values("BioAssays per Drug", ascending=False)[:10]

Unnamed: 0,Protein,BioAssays per Drug
70,F2,1106
64,STAT3,613
63,RPS6KB1,387
73,CSNK1D,367
42,GSK3A,365
62,MAPK11,235
40,MAPK13,207
33,HDAC6,204
54,CDK5R1,201
76,MARK1,188


In [20]:
import pandas as pd
summary[summary.Target == "APP"]["Number of BioAssays for Target"].iloc[0]

1135

In [14]:
summary.sort_values(by="BioAssays per Known Drug Target", ascending=False)

Unnamed: 0,Drug,Target,Synergizes,Number of BioAssays for Target,Number of Causal Edges for Target,Drug Patent Ongoing,Generic Version of Drug Available,Number of Drug Targets,BioAssays per Known Drug Target
802,"5-[3-(2-METHOXYPHENYL)-1H-PYRROLO[2,3-B]PYRIDI...",ABL1,,1970,10,No,No,1,1970
713,"2-{[(6-OXO-1,6-DIHYDROPYRIDIN-3-YL)METHYL]AMIN...",ABL1,,1970,10,No,No,1,1970
758,Radotinib,ABL1,,1970,10,No,No,1,1970
242,2-amino-5-[3-(1-ethyl-1H-pyrazol-5-yl)-1H-pyrr...,ABL1,,1970,10,No,No,1,1970
612,PD-166326,ABL1,,1970,10,No,No,1,1970
...,...,...,...,...,...,...,...,...,...
296,quercetin,CAMK2B,,181,10,No,No,,0
289,Pyridoxal phosphate,DDC,No,8,2,No,Yes,56,0
777,Polaprezinc,IL6,,7,99,No,No,8,0
279,Foreskin fibroblast (neonatal),IL6,No,7,99,No,No,11,0


# PPI Analysis
Next, we perform an analysis of the identified proteins using information gathered by [e(BE:L)](https://github.com/e-bel/ebel).
The following commands will download data from 4 major PPI databases: BioGRID, Pathway Commons, StringDB, and IntAct,
and check which pathways/interactions are known for every identified secondary target.

In [3]:
# Uncomment the following line if you need to install e(BE:L)
#!pip install ebel git+https://github.com/orientechnologies/pyorient

import pandas as pd
from ebel import Bel
bel = Bel()

Please insert OrientDB root password: ········


## Download PPI Information
The following cell downloads information from the PPI databases and inserts into a RDBMS (SQLlite [default] or MySQL).  
**WARNING** This step may take awhile.

In [None]:
bel.biogrid.update()
bel.intact.update()
bel.stringdb.update()
bel.pathway_commons.update()

## Gather Hits
Now we check each PPI database for associated information on each secondary target.

In [None]:
proteins = list(summary.Target.unique())

### Pathway Commons

In [None]:
sql = f"""Select
    pc.participant_a a,
    pc.interaction_type int_type,
    pc.participant_b b,
    group_concat(distinct pn.name) pathway_names,
    group_concat(distinct s.source) sources,
    group_concat(distinct p.pmid) pmids
from
    pathway_commons pc left join
    pathway_commons__pathway_name pc_pn on (pc.id=pc_pn.pathway_commons_id) left join
    pathway_commons_pathway_name pn on (pc_pn.pathway_commons_pathway_name_id = pn.id) left join
    pathway_commons__source pc_s on (pc.id=pc_s.pathway_commons_id) left join
    pathway_commons_source s on (pc_s.pathway_commons_source_id=s.id) left join
    pathway_commons_pmid p on (p.pathway_commons_id=pc.id)
where
    (pc.participant_a in {proteins} and pc.participant_b = 'MAPT') or
    (pc.participant_b in {proteins} and pc.participant_a = 'MAPT')
group by
    pc.participant_a, pc.interaction_type, pc.participant_b"""

pc_hits = pd.read_sql(sql, engine)

### BioGRID

In [None]:
sql = f"""Select
    ia.symbol a,
    ib.symbol b,
    bes.experimental_system,
    bes.experimental_system_type
from
    biogrid b inner join
    biogrid_interactor ia on (b.biogrid_a_id=ia.biogrid_id) inner join
    biogrid_interactor ib on (b.biogrid_b_id=ib.biogrid_id) inner join
    biogrid_experimental_system bes on (b.experimental_system_id=bes.id)
where
    (ia.symbol = 'MAPT' and ib.symbol in {proteins}) or
    (ib.symbol = 'MAPT' and ia.symbol in {proteins})"""

biogrid_hits = pd.read_sql(sql, engine)

### IntAct

In [None]:
sql = f"""Select
    ha.symbol as symbol_a,
    hb.symbol as symbol_b,
    i.confidence_value, 
    i.detection_method, 
    i.interaction_type, 
    i.pmid
from 
    intact i inner join 
    hgnc_uniprot hua on (i.int_a_uniprot_id=hua.accession) inner join 
    hgnc ha on (hua.hgnc_id=ha.id) inner join 
    hgnc_uniprot hub on (i.int_b_uniprot_id=hub.accession) inner join 
    hgnc hb on (hub.hgnc_id=hb.id)
where 
    (ha.symbol='MAPT' and hb.symbol in {proteins}) or
    (hb.symbol='MAPT' and ha.symbol in {proteins})
order by confidence_value desc
"""
intact_hits = pd.read_sql(sql, engine)

### StringDB

In [None]:
sql = f"""Select * 
from 
    stringdb 
where 
    (symbol1='MAPT' and symbol2 in {proteins}) or
    (symbol2='MAPT' and symbol1 in {proteins})
order by combined_score desc
"""
stringdb_hits = pd.read_sql(sql, engine)

# Connecting to a Different Knowledge Graph
By default, this package connects to the Alzheimer's Disease based Knowledge Graph (KG) developed under the MAVO project, available at https://graphstore.scai.fraunhofer.de. There are other KGs available, however, and here you can choose to connect to a different one if desired.

The commented out code shows how one can connect instead to the COVID KG.

In [None]:
from ebel_rest import connect
connect(user="covid_user", password="covid", db_name="covid", server="https://graphstore.scai.fraunhofer.de")