In [127]:
import os
import gilda
import pickle
import pandas as pd
from indra.sources import tas
from collections import defaultdict
import indra.tools.assemble_corpus as ac
from indra.ontology.bio import bio_ontology
from indra.sources import indra_db_rest as idr
from indra.assemblers.html import HtmlAssembler

In [63]:
def html_assembler(indra_stmts, fname):
    """Assemble INDRA statements into a HTML report"""
    html_assembler = HtmlAssembler(indra_stmts,
                                   db_rest_url='https://db.indra.bio')
    assembled_html_report = html_assembler.make_model(no_redundancy=True)
    html_assembler.save_model(fname)
    return assembled_html_report

In [83]:
HERE = os.path.join(os.getcwd())
top_hits_df = pd.read_csv(os.path.join(HERE, 'input', 'okl_top_hits.csv'))

if not os.path.isfile(os.path.join(HERE, 'input', 'tas.pkl')):
    tp = tas.process_from_web()
    with open(os.path.join(HERE, 'input', 'tas.pkl'), 'wb') as fh:
        pickle.dump(tp, fh)
else:
    with open(os.path.join(HERE, 'input', 'tas.pkl'), 'rb') as fh:
        tp = pickle.load(fh)

In [109]:
compound_names = [compounds.split()[0] for compounds in top_hits_df['Compound Name']]
compound_targets = defaultdict(set)
tas_stmts = []

chembl_ids = {
    'CHEMBL214253': 'PD-407824',
    'CHEMBL254760': 'MGCD265',
    'CHEMBL363648': 'TAK-715',
    'CHEMBL373751': 'KIN001-135',
    'CHEMBL603469': 'Lestaurtinib',
    'CHEMBL377300': 'Brivanib',
    'CHEMBL565612': 'Sotrastaurin',
    'CHEMBL571948': 'Y39983',
    'CHEMBL2028663': 'Dabrafenib',
    'CHEMBL1078178': 'Momelotinib'   
}

# Get target statements
for stmt in tp.statements:
    subj = stmt.subj.name
    obj = stmt.obj.name
    if subj in compound_names:
        compound_targets[(subj)].add(obj)
        tas_stmts.append(stmt)
    elif stmt.subj.db_refs.get('CHEMBL') in chembl_ids:
        compound_targets[(chembl_ids[stmt.subj.db_refs.get('CHEMBL')])].add(obj)


# Create a new column for tas targets
top_hits_df['tas_targets'] = None

# Update tas_targets with hits
for r,c in top_hits_df.iterrows():
    cname = c[3].split()[0]
    if cname in compound_targets:
        top_hits_df['tas_targets'][r] = ", ".join(compound_targets[cname])

# Write the dataframe to a new csv
top_hits_df.to_csv(os.path.join(HERE, 'output', 'tas_hits.csv'))

indra_stmts = ac.run_preassembly(tas_stmts,
                                 run_refinement=False)

# Assemble the statements into HTML formatted report and save into a file
indra_op_html_report = \
    html_assembler(
        indra_stmts,
        fname=os.path.join(HERE, 'output', 'tas_statements.html'))

A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  top_hits_df['tas_targets'][r] = ", ".join(compound_targets[cname])
INFO: [2021-08-23 20:40:41] indra.tools.assemble_corpus - Combining duplicates on 358 statements...
INFO: [2021-08-23 20:40:41] indra.tools.assemble_corpus - 358 unique statements


In [226]:
## INDRA DB SEARCH

indra_stmts = []
indra_hits = defaultdict(set)


for r, c in top_hits_df.iterrows():
    # Grounding subject using Gilda
    subj = c[3].split()[0]

    gilda_subj = gilda.ground(subj)
    gilda_subj = gilda_subj[0].term.entry_name if gilda_subj else 'NA'
    if gilda_subj == 'KW 2449':
        gilda_subj = 'KW2449'
    # Downloading statements using INDRA REST API
    if gilda_subj != 'NA':
        idrp = idr.get_statements(subject=gilda_subj)
        indra_stmts = indra_stmts + idrp.statements
        for stmts in idrp.statements:
            if type(stmts).__name__ == 'Inhibition':
                subj = stmts.subj.name
                obj = stmts.obj.name
                indra_hits[(c[3].split()[0])].add(obj)
    elif gilda_subj == 'NA':
        indra_hits[(c[3].split()[0])].add('NA')
        
# Create a new column for indra_db targets
top_hits_df['indra_db_targets'] = None

# Update indra_db_targets with hits
for r,c in top_hits_df.iterrows():
    cname = c[3].split()[0]
    if cname in indra_hits:
        top_hits_df['indra_db_targets'][r] = ", ".join(indra_hits[cname])

# Write the dataframe to a new csv
top_hits_df.to_csv(os.path.join(HERE, 'output', 'tas_indra_db_hits.csv'))


indra_stmts = ac.run_preassembly(indra_stmts,
                                 run_refinement=False)

# Assemble the statements into HTML formatted report and save into a file
indra_op_html_report = \
    html_assembler(
        indra_stmts,
        fname=os.path.join(HERE, 'output', 'indra_db_statements.html'))

INFO: [2021-08-23 22:22:24] indra_db_rest.query_processor - Retrieving statements that have an agent where NAME=PD 407824 with role=SUBJECT.
INFO: [2021-08-23 22:22:24] indra_db_rest.request_logs - Running 0th request for statements
INFO: [2021-08-23 22:22:24] indra_db_rest.request_logs -   LIMIT: None
INFO: [2021-08-23 22:22:24] indra_db_rest.request_logs -   OFFSET: 0
INFO: [2021-08-23 22:22:25] indra_db_rest.query_processor - Retrieving statements that have an agent where NAME=LY-2157299 with role=SUBJECT.
INFO: [2021-08-23 22:22:25] indra_db_rest.request_logs - Running 0th request for statements
INFO: [2021-08-23 22:22:25] indra_db_rest.request_logs -   LIMIT: None
INFO: [2021-08-23 22:22:25] indra_db_rest.request_logs -   OFFSET: 0
INFO: [2021-08-23 22:22:25] indra_db_rest.query_processor - Retrieving statements that have an agent where NAME=A-674563 with role=SUBJECT.
INFO: [2021-08-23 22:22:25] indra_db_rest.request_logs - Running 0th request for statements
INFO: [2021-08-23 22: