In [1]:
import spacy


In [2]:
nlp = spacy.load("en")

In [3]:
sentence = 'Smith will join the board as an executive director September 12'
#doc2 = nlp(sentence)

In [4]:
from spacy import displacy
doc = nlp(sentence)
displacy.render(doc, style="dep",  jupyter=True, options={'distance': 100})

#Extract a path of dependency relations from the ROOT to a token

In [5]:
def extractRootPath(t, doc):
    path = []
    existing = t
    root = 'ROOT'
    existing_dependency = existing.dep_
    # jump from a token to its head until the root is reached, saving the path
    if not existing_dependency == root:
      # add the token at the start of the list
        path.append([(0, existing_dependency)])
        existing = existing.head
    # add the root
    path.append([(0, root)])
    return path

def sentroot(sentence):
    doc = nlp(sentence)
        # for each token in doc get its path using token_path_to_root
    paths = {t.text: extractRootPath(t, doc) for t in doc}
    return paths


#Extract subtree of a dependents given a token

In [6]:
def extractSubTreeDep(sentence):
    # for each token in the sentence get its subtree and convert it to a list
    doc = nlp(sentence)
    chtree = {}
    for sentence in doc.sents:
        for token in sentence:
            chtree[token] = [tksb for tksb in token.subtree]
    return chtree
    

#Check if a given list of tokens (segment of a sentence) forms a subtree

In [7]:
def checkSegofSenSubtree(sentence, words):
    doc = nlp(sentence)
    
    ## if string in given just converting input sentence to list format tokens
    sentList = []
    for t in doc:
        for word in words:
            if t.text == word:
                sentList.append(t)
                
    tokens = sorted(sentList)

    for token in tokens:
        # check if the token list and the subtree list are equal
        if sorted(token.subtree) == tokens:
            # if equal Return True
            return True
    # else False
    return False


#Identify head of a span, given its tokens

In [8]:
def headofspan(words):
#  next() is used so that its take the input as first sentence
    return next(nlp(' '.join(words)).sents).root


#Extract sentence subject, direct object and indirect object spans

In [9]:
def sub_dobj_iobj(sentence):
  doc = nlp(sentence)

  nsubj, dobj, iobj = [], [], []
 
  for token in doc:
    #if token is subject append into list
    if(token.dep_ == 'nsubj'):
      nsubj.append(token.text)
    #if token is direct object append into list
    elif(token.dep_ == 'dobj'):
      dobj.append(token.text)
    #if token is  ndirect object append into list
    elif(token.dep_ == 'iobj'):
      iobj.append(token.text)

  print("nsubj, Stand for subject = : ",nsubj)
  print("dobj, Stand for direct object = : ",dobj)
  print("iobj, Stand for indirect object = : ",iobj)

##Executing Function

In [10]:
#Calling first function to execute
sentroot(sentence)

{'12': [[(0, 'nummod')], [(0, 'ROOT')]],
 'September': [[(0, 'npadvmod')], [(0, 'ROOT')]],
 'Smith': [[(0, 'nsubj')], [(0, 'ROOT')]],
 'an': [[(0, 'det')], [(0, 'ROOT')]],
 'as': [[(0, 'prep')], [(0, 'ROOT')]],
 'board': [[(0, 'dobj')], [(0, 'ROOT')]],
 'director': [[(0, 'pobj')], [(0, 'ROOT')]],
 'executive': [[(0, 'amod')], [(0, 'ROOT')]],
 'join': [[(0, 'ROOT')]],
 'the': [[(0, 'det')], [(0, 'ROOT')]],
 'will': [[(0, 'aux')], [(0, 'ROOT')]]}

In [11]:
#Calling second function to execute
extractSubTreeDep(sentence)

{Smith: [Smith],
 will: [will],
 join: [Smith,
  will,
  join,
  the,
  board,
  as,
  an,
  executive,
  director,
  September,
  12],
 the: [the],
 board: [the, board],
 as: [as, an, executive, director],
 an: [an],
 executive: [executive],
 director: [an, executive, director],
 September: [September, 12],
 12: [12]}

In [12]:
#Calling third function to execute
checklist1 = ['an', 'executive', 'director']
print(checkSegofSenSubtree(sentence,checklist1))
checklist2 = ['Smith', 'director','September']
print(checkSegofSenSubtree(sentence, checklist2))

True
False


In [13]:
#Calling forth function to execute
print(sentence)
headofspan(sentence.split())

Smith will join the board as an executive director September 12


join

In [14]:
#Calling fifth function to execute
sub_dobj_iobj(sentence)

nsubj, Stand for subject = :  ['Smith']
dobj, Stand for direct object = :  ['board']
iobj, Stand for indirect object = :  []
