In [13]:
Sentence = 'Born in Philadelphia, he served in World War II and attended the University\
of Pennsylvania Law School. After graduating, he joined the law firm of Drinker Biddle & Reath\
and remained with them for his entire career. Sawyer worked as a corporate lawyer but is best\
known for his advocacy of civil liberties, especially in First Amendment cases.'

# Working with Spacy dependency parser

In [14]:
import networkx as nx
import spacy
nlp = spacy.load('en')

In [15]:
doc = nlp(Sentence)
edges =[]
for tok in doc:
    for child in tok.children:
        edges.append(('{0}-{1}'.format(tok.lower_,tok.i),
                      '{0}-{1}'.format(child.lower_,child.i)))
        
graph = nx.Graph(edges)        

In [23]:
# graph.edges
# graph.nodes

In [20]:
print(nx.shortest_path_length(graph,source='born-0',target='universityof-13'))

4


In [21]:
print(nx.shortest_path(graph,source='born-0',target='universityof-13'))

['born-0', 'served-5', 'attended-11', 'school-16', 'universityof-13']


# Working with StanfordCoreNLP dependency parser

*!sudo pip install pycorenlp*

In [27]:
import networkx as nx
from pycorenlp import StanfordCoreNLP
from pprint import pprint

*After downloading StanfordCoreNLP must go inside the folder and run the following:*
*java -mx4g -cp "*" edu.stanford.nlp.pipeline.StanfordCoreNLPServer -port 9000 -timeout 50000*

In [34]:
nlp = StanfordCoreNLP('http://localhost:{0}'.format(9000))

In [35]:
def get_stanford_annotations(text, port=9000,
                             annotators='tokenize,ssplit,pos,lemma,depparse,parse'):
    output = nlp.annotate(text, properties={
        "timeout": "10000",
        "ssplit.newlineIsSentenceBreak": "two",
        'annotators': annotators,
        'outputFormat': 'json'
    })
    return output
print('document: {0}'.format(Sentence))


document: Born in Philadelphia, he served in World War II and attended the Universityof Pennsylvania Law School. After graduating, he joined the law firm of Drinker Biddle & Reathand remained with them for his entire career. Sawyer worked as a corporate lawyer but is bestknown for his advocacy of civil liberties, especially in First Amendment cases.


In [36]:
annotations = get_stanford_annotations(Sentence, port=9000,
                                       annotators='tokenize,ssplit,pos,lemma,depparse')
tokens = annotations['sentences'][0]['tokens']

In [52]:
#  annotations['sentences'][0]

In [50]:
# Load Stanford CoreNLP's dependency tree into a networkx graph
edges = []
dependencies = {}
for edge in annotations['sentences'][0]['basicDependencies']:
    edges.append((edge['governor'], edge['dependent']))
    dependencies[(min(edge['governor'], edge['dependent']),
                  max(edge['governor'], edge['dependent']))] = edge
graph = nx.Graph(edges)
#pprint(dependencies)
#print('edges: {0}'.format(edges))

# Find the shortest path
token1 = 'Born'
token2 = 'Universityof'
for token in tokens:
    
    if token1 == token['originalText']:
        token1_index = token['index']
    if token2 == token['originalText']:
        token2_index = token['index']
path = nx.shortest_path(graph, source=token1_index, target=token2_index)
print('path: {0}'.format(path))


path: [1, 6, 12, 17, 14]


In [51]:
for token_id in path:
    token = tokens[token_id-1]
    token_text = token['originalText']
    print('Node {0}\ttoken_text: {1}'.format(token_id,token_text))

Node 1	token_text: Born
Node 6	token_text: served
Node 12	token_text: attended
Node 17	token_text: School
Node 14	token_text: Universityof
