In [None]:
from indra_cogex.sources.odinson.grammars import Rule
from indra_cogex.sources.odinson.client import process_rules
import gilda
import pandas as pd
from collections import defaultdict
from gilda.process import normalize
from tqdm.auto import tqdm
from pyobo.gilda_utils import get_gilda_terms
import numpy as np
import nltk
nltk.download('stopwords')
from nltk.corpus import stopwords
import textwrap
import random
import difflib

import indra_cogex

In [None]:
# Create grounder with spine terms
import spine_ner
grounder = spine_ner.grounder

In [None]:
# Create rule sets
from itertools import product
import rule_gen 

brain_regions = rule_gen.brain_regions
phenotypes = rule_gen.phenotypes

br_rules = []
ph_rules = []

br_br_rules = [rule_gen.create_br_br_rules(br_1,br_2) for br_1,br_2 in product(brain_regions, brain_regions)]

br_ph_rules = [rule_gen.create_br_ph_rules(br,ph) for br,ph in product(brain_regions, phenotypes)]

for rule_set in br_br_rules:
    for individual_rule in rule_set:
        br_rules.append(individual_rule)

for rule_set in br_ph_rules:
    for individual_rule in rule_set:
        ph_rules.append(individual_rule)

In [None]:
import rule_gen
br_rules = rule_gen.permutations()['br']
ph_rules = rule_gen.permutations()['ph']
print(len(br_rules))
print(len(ph_rules))

In [None]:
# Create all sets of words to be excluded
#import stop_words 

sw_nltk = stopwords.words('english')
#false_phrases = stop_words.false_phrases
#exclude_words = stop_words.exclude_words

with open('exclude_words.txt', 'r') as file:
    exclude_words = [line.strip() for line in file if line.strip()]
    
with open('false_phrases.txt', 'r') as file:
    false_phrases = [line.strip() for line in file if line.strip()]

In [None]:
# Create the set of brain region-brain region relations
relations = []

# Go through each rule and make it a rule object
for rule_text in tqdm(br_rules):
    rule = Rule("anatomical connection", "Exp", "basic", rule_text)
    #Make sure it is a functional Odinson rule
    try:
        rule_output = process_rules([rule],"http://localhost:9000")
    
    except Exception as e:
        print('failed', rule)
        print(e)
        
    # Get the start and end characters for each term pulled out by the rule
    for sentence in rule_output['mentions']:
        relation = ()
        words = sentence['words']
        string_words = ' '.join(words)
        for element in sentence['match']:  
            for entity in element['namedCaptures']:
                start = entity['capturedMatch']['start']
                end = entity['capturedMatch']['end']
                # Remove stop words
                processed_term = [word for word in words[start:end] if word.lower() not in sw_nltk]
                word = ' '.join(processed_term)
                if word.lower() not in exclude_words:                   
                    # Create tuples with curies for terms that can be grounded
                    spine_scored_match = grounder.ground(word)
                    gilda_scored_match = gilda.ground(word)
    
                    if len(spine_scored_match)>0:
                        best_curie = spine_scored_match[0].term.get_curie()
                    elif len(gilda_scored_match)>0:
                        best_curie = gilda_scored_match[0].term.get_curie()
                    else:
                        best_curie = None

                    if word != '' and best_curie != None:
                        relation += ((best_curie, word),)  
        if len(relation) > 1 and relation not in relations and relation[0][1] != relation[1][1]:
            relations.append(relation)

In [None]:
len(relations)

In [None]:
# Create the set of brain region-phenotype relations
ph_relations = []
# Go through each rule and make it a rule object
for rule_text in tqdm(ph_rules):
    rule = Rule("phenotype", "Exp", "basic", rule_text)
    #Make sure it is a functional Odinson rule
    try:
        rule_output = process_rules([rule],"http://localhost:9000")
    
    except Exception as e:
        print('failed', rule)
        print(e)

    # Get the start and end characters for each term pulled out by the rule
    for sentence in rule_output['mentions']:
        relation = ()
        words = sentence['words']
        string_words = ' '.join(words)
        for element in sentence['match']:
            for entity in element['namedCaptures']:
                start = entity['capturedMatch']['start']
                end = entity['capturedMatch']['end']
                # Remove stop words
                processed_term = [word for word in words[start:end] if word.lower() not in sw_nltk]
                word = ' '.join(processed_term)
                if word.lower() not in exclude_words:                   
                    # Create tuples with curies for terms that can be grounded
                    spine_scored_match = grounder.ground(word)
                    gilda_scored_match = gilda.ground(word)
    
                    if len(gilda_scored_match)>0:
                        best_curie = gilda_scored_match[0].term.get_curie()
                    elif len(spine_scored_match)>0:
                        best_curie = spine_scored_match[0].term.get_curie()
                    else:
                        best_curie = None

                    if word != '' and best_curie != None:
                        relation += ((best_curie, word),)  
        if len(relation) > 1 and relation not in ph_relations and relation[0][1] != relation[1][1]:
            ph_relations.append(relation)

In [None]:
len(ph_relations)

In [None]:
# Create an interaction map of relationships between terms
import networkx as nx
import pygraphviz as pgv
import matplotlib.pyplot as plt
G = nx.Graph()
plt.figure(figsize=(50,50))
G.add_edges_from(relations, len=4,color='red')
G.add_edges_from(ph_relations, len=4,color='blue')

edge_colors = [G.edges[edge]['color'] for edge in G.edges]

pos = nx.nx_agraph.graphviz_layout(G, prog='neato')
labels = {}
for k in pos.keys():
    labels[k] = k[1]

nx.draw_networkx_nodes(G, pos, node_size=100, node_color='white', node_shape='o')
nx.draw_networkx_edges(G, pos, width=1.0, edge_color=edge_colors, style='solid')
labels = nx.draw_networkx_labels(G, pos, labels = labels, font_size=8, font_color='k', font_family='sans-serif', font_weight='normal')

print()
plt.savefig("X.pdf")