In [1]:
import spacy
from spacy import displacy
from collections import Counter
from py2neo import *
import matplotlib.pyplot as plt
import en_core_web_sm
nlp = en_core_web_sm.load()
from spacy.lang.en.stop_words import STOP_WORDS

###### 1. Who are the entities and officers in US?
###### 3. Which countries have the most addresses?
###### 4. Which jurisdictions is BOSHEN connected to?
###### 5. How are X and Y connected/related?
###### 6. Which jurisdictions has the least/most entities? / Most popular jurisdictions
###### 7. Which locations are in/come under X jurisdiction?
###### 8. Under which jurisdiction is location X?
###### 9. Most influential/important/ entities in the graph?
###### 10. Which officers have the same addresses?
###### 10. Which officers in X region have the same addresses?

In [2]:
def connect():
    global graph
    graph = Graph("bolt://localhost:7687", auth = ("neo4j", "soham"))
    tx = graph.begin()
    print('Connected...')

In [3]:
connect()

Connected...


In [4]:
def ask_question():
    global question
    question = input("INPUT: ")
    print("\n")
    tag_question()
    

In [5]:
def tag_question():
    global tokens,ner
    doc = nlp(question)
    tokens = [token.text for token in doc]
    pos = [pos.pos_ for pos in doc]
    tags = zip(tokens,pos)
    tags = list(tags)
    ner = [(ner.text,ner.label_) for ner in doc.ents]
    print("Tokens: ", tokens)
    print("Tags are: ",tags,'\n',"Number of tags: ",len(tags),'\n',"NER: ",ner)
    displacy.render(doc,style='dep',jupyter=True)
    displacy.render(doc,style = 'ent',jupyter=True)
    parms_builder()
    

In [9]:
def parms_builder():
    global parms, parms_2
    if len(ner) == 1:
        if (ner[0][1] == 'GPE') or (ner[0][1] == 'LOC'):
            if (ner[0][0] == "US") or (ner[0][0] == "USA"):
                country_ = 'United States'
            elif (ner[0][0] == "UK"):
                country_ = 'United Kingdom'
            else:
                country = ner[0][0]
                parms = {}
                parms["country"] = country
                print(parms)
        elif (ner[0][1] == 'ORG'):
            org = ner[0][0]
            parms = {}
            parms["org"] = org
            print(parms)
        elif (ner[0][1] == 'PERSON'):
            person = ner[0][0]
            parms = {}
            parms["person"] = person
            print(parms)
    elif len(ner) > 1:
        name1 = ner[0][0]
        name2 = ner[1][0]
        parms_2 = {"name1":name1, "name2":name2}
        print(parms_2)
    query_picker()

In [14]:
def query_picker():

    label_entity = "(entity:Entity)"
    label_officer = "(officer:Officer)"
    label_address = "(address:Address)"
    label_intermediary = "(interm:Intermediary)"

    query_sp_officer = "MATCH (o1:Officer{ name: $name1 }),(o2:Officer{ name: $name2 }), o = shortestPath((o1)-[*..15]-(o2)) RETURN o1.name, o2.name, o"
    query_sp_entity = "MATCH (e1:Entity{ name: $name1 }),(e2:Entity{ name: $name2 }), e = shortestPath((e1)-[*..15]-(e2)) RETURN e1.name, e2.name, e"

    for token in tokens:
        if token in ["officer","officers"] and token in ['entity','entities']:
            label_1 = label_officer
            label_2 = label_entity
            relationship_1 = "-[r:OFFICER_OF]->"
            match_1 = "MATCH + {} + {} + {}".format(label_1,relationship_1,label_2)
            query_skeleton_1 = match_1 + " RETURN collect(" + "{}".format(label_1) + ".name), " + "({}".format(label_1) + ".countries), " + "({}".format(label_entity) + ".name) LIMIT 10" 
            print(graph.run(query_skeleton_1).to_table())
        if token in ['country','countries'] and ['address','addresses']:
            label_1 = label_address
            match_0 = "MATCH {}".format(label_1)
            query_skeleton_0 = match_0 + " RETURN address.countries as Country, address.country_codes as Codes, count(*) AS Count ORDER BY Count DESC LIMIT 15"
            print(graph.run(query_skeleton_0).to_table())
        if token in ['jurisdiction','jurisdictions'] and ['connected','related'] and (ner[0][1]=='ORG'):
            label_1 = label_officer
            label_2 = label_entity
            relationship_1 = "-[]->"
            match_1 = "MATCH  {}  {}  {}".format(label_1,relationship_1,label_2)
            query_skeleton_1 = match_1 + " WHERE officer.name contains $org RETURN entity.jurisdiction_description as Juris, count(*) as Number_of_entities ORDER BY Number_of_entities DESC"
            print(query_skeleton_1)
            print(graph.run(query_skeleton_1,parms).to_table())
        if token in ['connected'] and len(ner) > 1:
            print(query_sp,'\n')
            print(graph.run(query_sp,parms_2).to_table())
            g = query_sp.get_graph()
            x.draw(g)
        if token in ['influential','important'] and ['entities']:
            label_1 = label_entity
            query_page_rank = "MATCH" + "{}".format(label_1) + "WHERE exists(entity.pagerank_g) RETURN entity.name AS entity, entity.jurisdiction_description AS jurisdiction, entity.pagerank_g AS pagerank ORDER BY pagerank DESC LIMIT 15"
            print(query_page_rank)
            print(graph.run(query_page_rank).to_table())
        if token in (all(['come','under']) and ['jurisdiction']):
            label_1 = label_entity
            match_0 = "MATCH {}".format(label_1)
            query_skeleton_0 = match_0 + "WHERE entity.jurisdiction_description CONTAINS $country RETURN collect(distinct entity.countries) as Locations, entity.jurisdiction_description as Jurisdiction limit 10"
            print(query_skeleton_0,'\n')
            print(graph.run(query_skeleton_0,parms).to_table())
        if token in ['Under'] and ['jurisdictions']:
            label_1 = label_entity
            match_0 = "MATCH {}".format(label_1)
            query_skeleton_0 = match_0 + "WHERE entity.countries CONTAINS $country RETURN distinct entity.countries as Location, entity.jurisdiction_description as Jurisdiction, collect(entity.name) limit 5"
            print(query_skeleton_0)
            print(graph.run(query_skeleton_0,parms).to_table())
        if token in (all(['same','address']) and ['officers']):
            label_1 = label_officer
            label_2 = label_address
            relationship_1 = '-[r:REGISTERED_ADDRESS]->'
            match_1 = "MATCH {} {} {}".format(label_1,relationship_1,label_2)
            query = match_1 + "RETURN collect(officer.name), address.address"
            print(query)
            print(graph.run(query).to_table())
            

In [15]:
ask_question()

INPUT: Which locations come under Panama jurisdiction?


Tokens:  ['Which', 'locations', 'come', 'under', 'Panama', 'jurisdiction', '?']
Tags are:  [('Which', 'DET'), ('locations', 'NOUN'), ('come', 'VERB'), ('under', 'ADP'), ('Panama', 'PROPN'), ('jurisdiction', 'NOUN'), ('?', 'PUNCT')] 
 Number of tags:  7 
 NER:  [('Panama', 'GPE')]


{'country': 'Panama'}
MATCH (entity:Entity)WHERE entity.jurisdiction_description CONTAINS $country RETURN collect(distinct entity.countries) as Locations, entity.jurisdiction_description as Jurisdiction limit 10 

 Locations                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                        