# Viewing Demo Results

## Setup 

In [1]:
## CX: allows multiple lines of code to print from one code block
from IPython.core.interactiveshell import InteractiveShell
InteractiveShell.ast_node_interactivity = "all"

import pathlib

import json
import pprint

import copy

## A2

### Setting up

In [27]:
json_path = pathlib.Path.home().joinpath("Desktop", "SavedDemoRuns",
                                                "A2_RHOBTB2_twohop.json")

with open(json_path) as file:
    demo_response = json.load(file)

In [28]:
## first level keys are logs, message, workflow
logs = demo_response["logs"]

## second level keys are query_graph, knowledge_graph, results
knowledge_graph = demo_response["message"]["knowledge_graph"]
results = demo_response["message"]["results"]

In [29]:
## knowledge_graph has nodes and edges
len(knowledge_graph['nodes'])
len(knowledge_graph['edges'])
len(results)

1728

4634

4344

In [30]:
count = 0
for i in knowledge_graph['nodes'].keys():
    print(i)
    count += 1
    if count > 10:
        break

NCBIGene:23221
NCBIGene:6794
NCBIGene:595
NCBIGene:894
NCBIGene:388
NCBIGene:896
NCBIGene:25855
NCBIGene:10765
NCBIGene:22836
NCBIGene:8065
NCBIGene:8452


In [31]:
pprint.pprint(knowledge_graph['nodes']['NCBIGene:23221'])

{'attributes': [{'attribute_type_id': 'biolink:xref',
                 'value': ['NCBIGene:23221',
                           'ENSEMBL:ENSG00000008853',
                           'HGNC:18756',
                           'OMIM:607352',
                           'UMLS:C1425762',
                           'UniProtKB:Q9BYZ6',
                           'PR:Q9BYZ6']},
                {'attribute_type_id': 'biolink:synonym',
                 'value': ['RHOBTB2',
                           'RHOBTB2 gene',
                           'RHBT2_HUMAN Rho-related BTB domain-containing '
                           'protein 2 (sprot)',
                           'Rho-related BTB domain-containing protein 2 '
                           '(human)']},
                {'attribute_type_id': 'num_source_nodes', 'value': 16},
                {'attribute_type_id': 'num_target_nodes', 'value': 0},
                {'attribute_type_id': 'source_qg_nodes', 'value': ['n2']},
                {'attribute_type_id':

In [32]:
genes = list()
chemicals = list()

for k,v in knowledge_graph["nodes"].items():
    ## the orientation of the query was RHOBTB2 <- Gene <- Chemical.
    ## so intermediates / RHOBTB2 should have a source node count
    if v["attributes"][2]["value"] != 0:
        genes.append(v)
    else:
        chemicals.append(v)

In [33]:
len(genes)
len(chemicals)

21

1707

### Genes

In [34]:
pprint.pprint(genes[0])

{'attributes': [{'attribute_type_id': 'biolink:xref',
                 'value': ['NCBIGene:23221',
                           'ENSEMBL:ENSG00000008853',
                           'HGNC:18756',
                           'OMIM:607352',
                           'UMLS:C1425762',
                           'UniProtKB:Q9BYZ6',
                           'PR:Q9BYZ6']},
                {'attribute_type_id': 'biolink:synonym',
                 'value': ['RHOBTB2',
                           'RHOBTB2 gene',
                           'RHBT2_HUMAN Rho-related BTB domain-containing '
                           'protein 2 (sprot)',
                           'Rho-related BTB domain-containing protein 2 '
                           '(human)']},
                {'attribute_type_id': 'num_source_nodes', 'value': 16},
                {'attribute_type_id': 'num_target_nodes', 'value': 0},
                {'attribute_type_id': 'source_qg_nodes', 'value': ['n2']},
                {'attribute_type_id':

In [35]:
for dict_entry in genes:
    print(dict_entry['attributes'][0]['value'][0])
    for attr in dict_entry['attributes']:
        if attr['attribute_type_id'] == 'num_source_nodes':
            print(dict_entry['name'], attr['value'])
            print("\n")

NCBIGene:23221
RHOBTB2 16


NCBIGene:6794
STK11 283


NCBIGene:595
CCND1 942


NCBIGene:894
CCND2 907


NCBIGene:388
RHOB 131


NCBIGene:896
CCND3 897


NCBIGene:25855
BRMS1 48


NCBIGene:10765
KDM5B 139


NCBIGene:22836
RHOBTB3 20


NCBIGene:8065
CUL5 43


NCBIGene:8452
CUL3 60


NCBIGene:23145
SSPOP 10


NCBIGene:284217
LAMA1 30


NCBIGene:114784
CSMD2 11


NCBIGene:3645
INSRR 27


NCBIGene:1462
VCAN 95


NCBIGene:29919
RMC1 66


NCBIGene:672
BRCA1 584


NCBIGene:65267
WNK3 25


NCBIGene:26960
NBEA 15


NCBIGene:23379
ICE1 11




### Exploring RHOBTB2

In [44]:
rhobtb2_as_n0 = list()
for k,v in knowledge_graph['edges'].items():
    if (v['object'] == 'NCBIGene:23221') and ('NCBI' in v['subject']):
        print(v['subject'])
        temp = {"id": k }
        temp.update(v)
        
        rhobtb2_as_n0.append(temp)

NCBIGene:6794
NCBIGene:595
NCBIGene:894
NCBIGene:388
NCBIGene:896
NCBIGene:25855
NCBIGene:10765
NCBIGene:22836
NCBIGene:8065
NCBIGene:8452
NCBIGene:23145
NCBIGene:284217
NCBIGene:114784
NCBIGene:3645
NCBIGene:1462
NCBIGene:29919
NCBIGene:672
NCBIGene:65267
NCBIGene:26960
NCBIGene:23379


In [45]:
len(rhobtb2_as_n0)  ## so go to idx 36

20

In [46]:
rhobtb2_as_n1 = list()
for k,v in knowledge_graph['edges'].items():
    if (v['object'] == 'NCBIGene:23221') and not ('NCBI' in v['subject']):
        print(v['subject'])
        temp = {"id": k }
        temp.update(v)
        
        rhobtb2_as_n1.append(temp)

PUBCHEM.COMPOUND:387447
PUBCHEM.COMPOUND:24621
PUBCHEM.COMPOUND:5288382
PUBCHEM.COMPOUND:5394
PUBCHEM.COMPOUND:36314
PUBCHEM.COMPOUND:60750
PUBCHEM.COMPOUND:5311497
PUBCHEM.COMPOUND:60838
PUBCHEM.COMPOUND:176870
PUBCHEM.COMPOUND:148123
CHEMBL.COMPOUND:CHEMBL1201583
PUBCHEM.COMPOUND:135410875
CHEMBL.COMPOUND:CHEMBL2107909
PUBCHEM.COMPOUND:34755
UMLS:C0032136
PUBCHEM.COMPOUND:387447
PUBCHEM.COMPOUND:3121


In [47]:
len(rhobtb2_as_n1)  ## so go to idx 36

17

In [None]:
pprint.pprint(knowledge_graph['nodes']['PUBCHEM.COMPOUND:5394'])

### Chemicals

In [None]:
chem_connections = dict()
for dict_entry in chemicals:
    for attr in dict_entry['attributes']:
        if attr['attribute_type_id'] == 'num_target_nodes':
            chem_connections[ dict_entry['name']] = attr['value']

In [None]:
for k,v in chem_connections.items():
    if v > 10:
        print(k,v)

Those are all general cancer drugs...

### Checking size of parts w/ dumps

In [None]:
A2_without_edges = copy.deepcopy(demo_response)
del A2_without_edges["message"]['knowledge_graph']["edges"]
A2_without_edges["message"]['knowledge_graph'].keys()

In [None]:
A2_without_edges_path = pathlib.Path.home().joinpath("Desktop", "SavedDemoRuns", "A2_without_edges.json")

with open(A2_without_edges_path, "w") as write_file:
    json.dump(A2_without_edges, write_file) # encode dict into JSON

In [None]:
A2_without_edges_path.stat().st_size
## 2.6 MB...so EDGES was the majority of the size...

## A2a_expanded

In [16]:
json_path = pathlib.Path.home().joinpath("Desktop", "SavedDemoRuns",
                                                "A2a_expanded_RHOBTB2_twohop.json")

with open(json_path) as file:
    demo_response = json.load(file)

In [17]:
## first level keys are logs, message, workflow
logs = demo_response["logs"]

## second level keys are query_graph, knowledge_graph, results
knowledge_graph = demo_response["message"]["knowledge_graph"]
results = demo_response["message"]["results"]

In [18]:
## knowledge_graph has nodes and edges
len(knowledge_graph['nodes'])
len(knowledge_graph['edges'])
len(results)

48

55

49

In [19]:
genes = list()
chemicals = list()

for k,v in knowledge_graph["nodes"].items():
    ## the orientation of the query was RHOBTB2 <- Gene <- Chemical.
    ## so intermediates / RHOBTB2 should have a source node count
    if v["attributes"][2]["value"] != 0:
        genes.append(v)
    else:
        chemicals.append(v)

In [20]:
len(genes)
len(chemicals)

6

42

## B2a-B2f analysis

In [253]:
json_path = pathlib.Path.home().joinpath("Desktop", "SavedDemoRuns",
                                                "C1-new.json")

with open(json_path) as file:
    demo_response = json.load(file)

In [254]:
## first level keys are logs, message, workflow
# logs = demo_response["logs"]

## second level keys are query_graph, knowledge_graph, results
knowledge_graph = demo_response["message"]["knowledge_graph"]
results = demo_response["message"]["results"]

In [255]:
## knowledge_graph has nodes and edges
len(knowledge_graph['nodes'])
len(knowledge_graph['edges'])
len(results)

378

989

907

In [256]:
names = list()

for k,v in knowledge_graph['nodes'].items():
    names.append((v['name'], v['attributes'][0]['value'][0]))

In [257]:
names[0:50]

# ## interesting nodes:
# ## Drug, drug, Liver, liver, Hep, hep, Cirr, cirr, 

# for i in names:
#     if 'cirr' in i[0]:
#         print(i)

[('2-Methyspiro(1,3-oxathiolane-5,3)quinuclidine',
  'PUBCHEM.COMPOUND:25137844'),
 ('Sjogren syndrome', 'MONDO:0010030'),
 ('Mercaptopurine', 'PUBCHEM.COMPOUND:667490'),
 ('Crohn disease', 'MONDO:0005011'),
 ('Treprostinil', 'PUBCHEM.COMPOUND:6918140'),
 ('systemic sclerosis', 'MONDO:0005100'),
 ('Apremilast', 'PUBCHEM.COMPOUND:11561674'),
 ('psoriatic arthritis', 'MONDO:0011849'),
 ('Ambrisentan', 'PUBCHEM.COMPOUND:6918493'),
 ('Mesalamine', 'PUBCHEM.COMPOUND:4075'),
 ('Hydroxychloroquine', 'PUBCHEM.COMPOUND:3652'),
 ('systemic lupus erythematosus (disease)', 'MONDO:0007915'),
 ('ulcerative colitis (disease)', 'MONDO:0005101'),
 ('BALSALAZIDE', 'CHEMBL.COMPOUND:CHEMBL1201346'),
 ('Macitentan', 'PUBCHEM.COMPOUND:16004692'),
 ('Methotrexate', 'PUBCHEM.COMPOUND:126941'),
 ('Calcipotriol', 'PUBCHEM.COMPOUND:5288783'),
 ('psoriasis', 'MONDO:0005083'),
 ('SULFASALAZINE', 'CHEMBL.COMPOUND:CHEMBL421'),
 ('ankylosing spondylitis', 'MONDO:0005306'),
 ('Bosentan', 'PUBCHEM.COMPOUND:104865'),
 (

In [185]:
for k,v in knowledge_graph['edges'].items():
    if v['object'] == 'MESH:D008106':
        pprint.pprint(v)

{'attributes': [{'attribute_type_id': 'biolink:aggregator_knowledge_source',
                 'value': ['infores:translator-biothings-explorer'],
                 'value_type_id': 'biolink:InformationResource'},
                {'attribute_source': None,
                 'attribute_type_id': 'biolink:aggregator_knowledge_source',
                 'attributes': None,
                 'description': None,
                 'original_attribute_name': 'biolink:aggregator_knowledge_source',
                 'value': ['infores:sri-reference-kg'],
                 'value_type_id': 'biolink:InformationResource',
                 'value_url': None},
                {'attribute_source': None,
                 'attribute_type_id': 'biolink:relation',
                 'attributes': None,
                 'description': None,
                 'original_attribute_name': 'relation',
                 'value': 'RO:0002606',
                 'value_type_id': 'EDAM:data_0006',
                 'value_url'

## B3a-B3b analysis

In [242]:
json_path = pathlib.Path.home().joinpath("Desktop", "SavedDemoRuns", "UnclearIsSet",
                                                "B3A_two-hop_chem.json")

with open(json_path) as file:
    demo_response = json.load(file)

In [243]:
## first level keys are logs, message, workflow
logs = demo_response["logs"]

## second level keys are query_graph, knowledge_graph, results
knowledge_graph = demo_response["message"]["knowledge_graph"]
results = demo_response["message"]["results"]

In [244]:
## knowledge_graph has nodes and edges
len(knowledge_graph['nodes'])
len(knowledge_graph['edges'])
len(results)

16313

58669

69336

In [245]:
DoP = list()
chemicals = list()

for k,v in knowledge_graph["nodes"].items():
    ## the orientation of the query was DILI -> DoP <- Chemical.
    ## so chemicals / DILI should have a source node count
    if v["attributes"][2]["value"] != 0:
        DoP.append(v)
    else:
        chemicals.append(v)

In [246]:
len(DoP)
len(chemicals)

174

16139

In [247]:
## remove the first 2 elements of chemicals (the DILI starting nodes)
## move to the starting point array

starting_dili_nodes = chemicals[0:2]
chemicals = chemicals[2:]

In [248]:
names = list()

for i in chemicals:
    if not ('UMLS' in i['name']): 
        names.append((i['name'], i['attributes'][0]['value'][0]))

In [249]:
len(names)

7496

In [251]:
names[50:100]

[('tocilizumab', 'UMLS:C1609165'),
 ('progabide', 'UMLS:C0072076'),
 ('alteplase', 'UMLS:C0032143'),
 ('IL6', 'NCBIGene:3569'),
 ('copeptins', 'UMLS:C0056279'),
 ('Arginine', 'UMLS:C0003765'),
 ('Tryptophan', 'UMLS:C0041249'),
 ('N,N-dimethylarginine', 'UMLS:C0067385'),
 ('Carnitine', 'UMLS:C0007258'),
 ('anakinra', 'UMLS:C0245109'),
 ('DEFB1', 'NCBIGene:1672'),
 ('LY 303366', 'UMLS:C0532508'),
 ('Zenapax', 'UMLS:C0281549'),
 ('Caspofungin', 'UMLS:C0537894'),
 ('argatroban', 'UMLS:C0048470'),
 ('Glutamic Acid', 'UMLS:C0061472'),
 ('prothrombin complex concentrates', 'UMLS:C0072434'),
 ('angiopeptin', 'UMLS:C0051846'),
 ('Immune Sera', 'UMLS:C0020960'),
 ('Acetylmuramyl-Alanyl-Isoglutamine', 'UMLS:C0001060'),
 ('ADM', 'NCBIGene:133'),
 ('oritavancin', 'UMLS:C1144403'),
 ('HA-1A', 'UMLS:C0120659'),
 ('Recombinant human granulocyte colony stimulating factor', 'UMLS:C1271481'),
 ('bactericidal permeability increasing protein', 'UMLS:C0052897'),
 ('Urokinase', 'UMLS:C0042071'),
 ('eculizuma

## C1-new analysis

In [258]:
json_path = pathlib.Path.home().joinpath("Desktop", "SavedDemoRuns",
                                                "C1-new.json")

with open(json_path) as file:
    demo_response = json.load(file)

In [259]:
## first level keys are logs, message, workflow
# logs = demo_response["logs"]

## second level keys are query_graph, knowledge_graph, results
knowledge_graph = demo_response["message"]["knowledge_graph"]
results = demo_response["message"]["results"]

In [260]:
## knowledge_graph has nodes and edges
len(knowledge_graph['nodes'])
len(knowledge_graph['edges'])
len(results)

378

989

907

In [264]:
starting_nodes = list()
chemicals = list()

for k,v in knowledge_graph["nodes"].items():
    ## the orientation of the query was Diseases IDs -> Chemical.
    ## so chemicals should have a source node count
    if v["attributes"][2]["value"] != 0:
        chemicals.append(v)
    else:
        starting_nodes.append(v)

In [265]:
len(starting_nodes)
len(chemicals)

11

367

In [269]:
names = list()

for i in chemicals:
    if not ('UMLS' in i['name']): 
        names.append((i['name'], i['attributes'][0]['value'][0]))

In [270]:
len(names)

367

In [271]:
names[0:50]

[('2-Methyspiro(1,3-oxathiolane-5,3)quinuclidine',
  'PUBCHEM.COMPOUND:25137844'),
 ('Mercaptopurine', 'PUBCHEM.COMPOUND:667490'),
 ('Treprostinil', 'PUBCHEM.COMPOUND:6918140'),
 ('Apremilast', 'PUBCHEM.COMPOUND:11561674'),
 ('Ambrisentan', 'PUBCHEM.COMPOUND:6918493'),
 ('Mesalamine', 'PUBCHEM.COMPOUND:4075'),
 ('Hydroxychloroquine', 'PUBCHEM.COMPOUND:3652'),
 ('BALSALAZIDE', 'CHEMBL.COMPOUND:CHEMBL1201346'),
 ('Macitentan', 'PUBCHEM.COMPOUND:16004692'),
 ('Methotrexate', 'PUBCHEM.COMPOUND:126941'),
 ('Calcipotriol', 'PUBCHEM.COMPOUND:5288783'),
 ('SULFASALAZINE', 'CHEMBL.COMPOUND:CHEMBL421'),
 ('Bosentan', 'PUBCHEM.COMPOUND:104865'),
 ('Acitretin', 'PUBCHEM.COMPOUND:5284513'),
 ('Mycophenolate mofetil', 'PUBCHEM.COMPOUND:5281078'),
 ('Azathioprine', 'PUBCHEM.COMPOUND:2265'),
 ('Leflunomide', 'PUBCHEM.COMPOUND:3899'),
 ('Pilocarpine', 'PUBCHEM.COMPOUND:5910'),
 ('Mycophenolic acid', 'PUBCHEM.COMPOUND:446541'),
 ('Chloroquine', 'PUBCHEM.COMPOUND:2719'),
 ('Iloprost', 'PUBCHEM.COMPOUND:5

## Checking size of parts w/ dumps

In [6]:
json_path = pathlib.Path.home().joinpath("Desktop", "SavedDemoRuns", "C3-new.json")

with open(json_path) as file:
    demo_response = json.load(file)

In [7]:
## first level keys are logs, message, workflow
# logs = demo_response["logs"]

## second level keys are query_graph, knowledge_graph, results
knowledge_graph = demo_response["message"]["knowledge_graph"]
results = demo_response["message"]["results"]

In [8]:
## knowledge_graph has nodes and edges
len(knowledge_graph['nodes'])
len(knowledge_graph['edges'])
len(results)

7505

38946

45469

In [None]:
B2a_without_edges = copy.deepcopy(demo_response)
del B2a_without_edges["message"]['knowledge_graph']["edges"]
B2a_without_edges["message"]['knowledge_graph'].keys()

In [None]:
B2a_without_edges_path = pathlib.Path.home().joinpath("Desktop", "SavedDemoRuns", "D6_without_edges.json")

with open(B2a_without_edges_path, "w") as write_file:
    json.dump(B2a_without_edges, write_file) # encode dict into JSON

In [None]:
B2a_without_edges_path.stat().st_size
## 1.4 MB...so EDGES was the majority of the size...