# Please note: Using spacy version 2.0

In [4]:
%load_ext autoreload

The autoreload extension is already loaded. To reload it, use:
  %reload_ext autoreload


In [5]:
%autoreload

from xml.etree.ElementTree import tostring
import re
import nltk
nltk.download('wordnet')
from nltk import wordnet as wn
import spacy
import numpy as np

import sys
sys.path.append('..')
import utils

[nltk_data] Downloading package wordnet to
[nltk_data]     /afs/csail.mit.edu/u/g/geeticka/nltk_data...
[nltk_data]   Package wordnet is already up-to-date!


Assume in this case that all sentences have been tokenized

## Tokenization methods

In [6]:
# Set up spaCy
# from spacy.en import English
# from spacy.symbols import ORTH, LEMMA, POS
#parser = English()
parser = spacy.load('en')
def word_tokenize_features(parsedData):
    words = []
    lemmas = []
    pos = []
    for span in parsedData.sents:
        sent = [parsedData[i] for i in range(span.start, span.end)]
        for token in sent:
            words.append(token.orth_)
            lemmas.append(token.lemma_)
            pos.append(token.pos_)
            print(token.head.text, token.dep_, token.text, token.head.i, token.i)
    return words, lemmas, pos
  
def stringify_tokenized(tokenizedSentence):
    return " ".join(tokenizedSentence)

tokenized = ['The', 'bear', 'ran', 'home']
sentence = stringify_tokenized(tokenized)
parsedData = parser(sentence)
word_tokenize_features(parsedData)

bear det The 1 0
ran nsubj bear 2 1
ran ROOT ran 2 2
ran advmod home 2 3


(['The', 'bear', 'ran', 'home'],
 ['the', 'bear', 'run', 'home'],
 ['DET', 'NOUN', 'VERB', 'ADV'])

## Different ways of getting the hypernyms

In [7]:
#input must be a tokenized sentence in the form ['There', 'is', 'a', 'dog', '.']
def get_hypernyms(tokenizedSentence):
    hypernyms = []
    for word in tokenizedSentence:
        hypernym_perword = []
        if len(wn.wordnet.synsets(word)) == 0:
            hypernyms.append("")
            continue
        foundhypernym = 0
        for synset in wn.wordnet.synsets(word):
            if len(synset.hypernyms()) > 0:
                hypernym = synset.hypernyms()[0]
                foundhypernym += 1
                hypernyms.append(hypernym.name())
                break
        if foundhypernym == 0:
            hypernyms.append("")
    return hypernyms

# given a part of speech as a string, return a wordnet part of speech
def toWordnetPOS(POS):
    if(POS == 'VERB'):
        return wn.wordnet.VERB
    elif(POS == 'NOUN'):
        return wn.wordnet.NOUN
    elif(POS == 'ADJ'):
        return wn.wordnet.ADJ
    elif(POS == 'ADV'):
        return wn.wordnet.ADV
    return None
    
# the following hypernym method takes in the words with their respective part of speech to give a hypernym that fits accordingly
def get_hypernyms_usingPOS(tokenized):
    hypernyms = []
    sentence = stringify_tokenized(tokenized)
    parsedData = parser(sentence)
    _, _, POS = word_tokenize_features(parsedData)
    for i in range(len(tokenized)):
        word = tokenized[i]
        print(POS[i])
        pos = toWordnetPOS(POS[i])
        hypernym_perword = []
        if len(wn.wordnet.synsets(word, pos=pos)) == 0:
            hypernyms.append("")
            continue
        foundhypernym = 0
        for synset in wn.wordnet.synsets(word, pos=pos):
            if len(synset.hypernyms()) > 0:
                hypernym = synset.hypernyms()[0]
                foundhypernym += 1
                hypernyms.append(hypernym.name())
                break
        if foundhypernym == 0:
            hypernyms.append("")
    return hypernyms
        

In [8]:
get_hypernyms(['The', 'bear', 'ran', 'home'])

['', 'carnivore.n.01', 'travel_rapidly.v.01', 'residence.n.01']

In [9]:
#wn.wordnet.synsets('bear', pos=wn.wordnet.VERB)
# NOUN, ADJ and ADV are the other possibilities

get_hypernyms_usingPOS(['The', 'bear', 'ran', 'home'])
# this reveals that I should keep it simple for now

bear det The 1 0
ran nsubj bear 2 1
ran ROOT ran 2 2
ran advmod home 2 3
DET
NOUN
VERB
ADV


['', 'carnivore.n.01', 'travel_rapidly.v.01', '']

In [10]:
onesentence = stringify_tokenized(['There', 'is', 'a', 'dog', 'across', 'the', 'street', '.'])
parsedData = parser(onesentence)
words, lemmas, pos = word_tokenize_features(parsedData)
print(words, lemmas, pos)

is expl There 1 0
is ROOT is 1 1
dog det a 3 2
is attr dog 1 3
dog prep across 3 4
street det the 6 5
across pobj street 4 6
is punct . 1 7
['There', 'is', 'a', 'dog', 'across', 'the', 'street', '.'] ['there', 'be', 'a', 'dog', 'across', 'the', 'street', '.'] ['ADV', 'VERB', 'DET', 'NOUN', 'ADP', 'DET', 'NOUN', 'PUNCT']


### Remove the n, 01, _ from the strings

In [11]:
st = "travel_rapidly.v.01"
word1 = " ".join(re.findall("[a-zA-Z]+", st))

In [12]:
word1

'travel rapidly v'

In [13]:
wn.wordnet.VERB

'v'

In [14]:
wn.wordnet.NOUN

'n'

In [15]:
wn.wordnet.ADJ

'a'

In [16]:
wn.wordnet.ADV

'r'

In [17]:
last_character = word1[len(word1)-1:]
if(last_character == 'v' or last_character == 'n' or last_character == 'a' or last_character == 'r'):
    word1 = word1[:len(word1)-1]
    word1 = word1.strip()

In [18]:
word1

'travel rapidly'

In [19]:
word1.split()[0]

'travel'

In [20]:
# this function generates the hypernyms of a tokenized sentence and splits out only the words
# it has a flag to indicate whether to give only the first word or all of them
def get_hypernyms_onlywords(tokenizedSentence, onlyFirstWord=False):
    hypernyms = get_hypernyms(tokenizedSentence)
    hypernyms_onlywords = []
    for hypernym in hypernyms:
        if hypernym == '':
            hypernyms_onlywords.append(hypernym)
            continue
        word = " ".join(re.findall("[a-zA-Z]+", hypernym))
        last_character = word[len(word)-1:]
        if(last_character == 'v' or last_character == 'n' or last_character == 'a' or last_character == 'r'):
            word = word[:len(word)-1]
            word = word.strip()
        if onlyFirstWord == True:
            word = word.split()[0]
        hypernyms_onlywords.append(word)
    return hypernyms_onlywords

In [21]:
get_hypernyms_onlywords(['The', 'bear', 'ran', 'home'])

['', 'carnivore', 'travel rapidly', 'residence']

In [22]:
get_hypernyms_onlywords(['The', 'bear', 'ran', 'home'], True)

['', 'carnivore', 'travel', 'residence']

Testing Di's split_data_cut_sentence method

## Implement Data Augmentation

In [23]:
# 0 and 11 are opposites
# 1 and 16 are opposites
# 2 and 8 are opposites
# 3 and 10 are opposites
# 4 and 17 are opposites
# 5 and 15 are opposites
# 6 and 12 are opposites
# 7 and 13 are opposites
# 9 and 14 are opposites
# 18 does not have an opposite
# given a relation as a number, return the number of the opposite relation
def give_reverse_relation(relation):
    reverse_dict = {0: 11, 11:0, 1:16, 16:1, 2:8, 8:2, 3:10, 10:3, 4:17, 
                    17:4, 5:15, 15:5, 6:12, 12:6, 7:13, 13:7, 9:14, 14:9, 18:18}
    return reverse_dict[relation]
give_reverse_relation(18)   

18

In [24]:
data = ['1 1 2 3 4 Hi my name is beaver', '2 1 2 4 5 My name is tomorrow blah blah']

In [25]:
splitted = utils.split_data_cut_sentence(data)

In [26]:
def augment_data(data):
    sentences = data[0]
    relations = data[1]
    e1_pos = data[2]
    e2_pos = data[3]
    augmented_sentences = []
    augmented_relations = []
    augmented_e1_pos = []
    augmented_e2_pos = []
    augmented_relations = []
    for idx, (sent, pos1, pos2, rel) in enumerate(zip(sentences, e1_pos, e2_pos, relations)):
        reversed_sent = list(reversed(sent))
        reversed_rel = give_reverse_relation(rel)
        reversed_pos1_first = len(sent) - (pos2[1] + 1)
        reversed_pos1_second = len(sent) - (pos2[0] + 1)
        reversed_pos2_first = len(sent) - (pos1[1] + 1)
        reversed_pos2_second = len(sent) - (pos1[0] + 1)
        reversed_pos1 = (reversed_pos1_first, reversed_pos1_second)
        reversed_pos2 = (reversed_pos2_first, reversed_pos2_second)
        augmented_sentences.append(reversed_sent)
        augmented_relations.append(reversed_rel)
        augmented_e1_pos.append(reversed_pos1)
        augmented_e2_pos.append(reversed_pos2)
    sentences = sentences + augmented_sentences
    relations = relations + augmented_relations
    e1_pos = e1_pos + augmented_e1_pos
    e2_pos = e2_pos + augmented_e2_pos
    
    return sentences, relations, e1_pos, e2_pos

In [27]:
augment_data(splitted)

([['hi', 'my', 'name', 'is', 'beaver'],
  ['my', 'name', 'is', 'tomorrow', 'blah', 'blah'],
  ['beaver', 'is', 'name', 'my', 'hi'],
  ['blah', 'blah', 'tomorrow', 'is', 'name', 'my']],
 [1, 2, 16, 8],
 [(1, 2), (1, 2), (0, 1), (0, 1)],
 [(3, 4), (4, 5), (2, 3), (3, 4)])

# Get the shortest dependency path

First, how to get the dependency parse itself

In [28]:
# tokenized = ['The', 'bear', 'ran', 'home']
tokenized = ["Autonomous", "cars", "shift", "insurance", "liability", "toward", "manufacturers"]
sentence = stringify_tokenized(tokenized)
parsedData = parser(sentence)
word_tokenize_features(parsedData)

cars amod Autonomous 1 0
shift nsubj cars 2 1
shift ROOT shift 2 2
liability compound insurance 4 3
shift dobj liability 2 4
shift prep toward 2 5
toward pobj manufacturers 5 6


(['Autonomous',
  'cars',
  'shift',
  'insurance',
  'liability',
  'toward',
  'manufacturers'],
 ['autonomous',
  'car',
  'shift',
  'insurance',
  'liability',
  'toward',
  'manufacturer'],
 ['ADJ', 'NOUN', 'VERB', 'NOUN', 'NOUN', 'ADP', 'NOUN'])

In [29]:
for chunk in parsedData.noun_chunks:
    print(chunk.text, chunk.root.text, chunk.root.dep_,
          chunk.root.head.text)

Autonomous cars cars nsubj shift
insurance liability liability dobj shift
manufacturers manufacturers pobj toward


In [30]:
e1_pos = (1,1)
e2_pos = (3,3)

In [31]:
# https://stackoverflow.com/questions/32835291/how-to-find-the-shortest-dependency-path-between-two-words-in-python
# above is very useful in knowing how to calculate the shortest dependency path using the graphs package as well as spacy

In [32]:
import networkx as nx
import spacy
# nlp = spacy.load('en')

# # https://spacy.io/docs/usage/processing-text
# document = nlp(u'Robots in popular culture are there to remind us of the awesomeness of unbound human agency.', parse=True)

# print('document: {0}'.format(document))

# Load spacy's dependency tree into a networkx graph
edges = []
for token in parsedData:
    # FYI https://spacy.io/docs/api/token
    for child in token.children:
        edges.append(('{0}_{1}'.format(token.lower_,token.i),
                      '{0}_{1}'.format(child.lower_,child.i), 
                     {'name': token.dep_}))
#         edges.append(({'word':token.lower_, 'pos':str(token.i)}, {'word':child.lower_, 'pos':str(child.i)}))

graph = nx.Graph(edges)
entity1 = tokenized[e1_pos[0]]+ "_" + str(e1_pos[0]) #if they are of size 1
entity2 = tokenized[e2_pos[0]]+ "_" + str(e2_pos[0])
# https://networkx.github.io/documentation/networkx-1.10/reference/algorithms.shortest_paths.html
print(nx.shortest_path_length(graph, source=entity1, target=entity2))
print(nx.shortest_path(graph, source=entity1, target=entity2))
# print(nx.shortest_path(graph, source='robots-0', target='agency-15'))

3
['cars_1', 'shift_2', 'liability_4', 'insurance_3']


can get away with an embedding of size 10 or 5 because the number of possible dependencies are on the order of 50

Visualizing dependencies

In [33]:
# from spacy import displacy

# nlp = spacy.load('en_core_web_sm')
# doc = nlp(u"Autonomous cars shift insurance liability toward manufacturers")
# displacy.render(doc, style='dep', jupyter=True)

In [34]:
edge_attributes = nx.get_edge_attributes(graph, 'name')

In [35]:
edge_attributes

{('cars_1', 'autonomous_0'): 'nsubj',
 ('cars_1', 'shift_2'): 'ROOT',
 ('shift_2', 'liability_4'): 'ROOT',
 ('shift_2', 'toward_5'): 'ROOT',
 ('liability_4', 'insurance_3'): 'dobj',
 ('toward_5', 'manufacturers_6'): 'prep'}

In [36]:
edge_attributes[('cars_1', 'autonomous_0')]

'nsubj'

In [37]:
path = nx.shortest_path(graph, source=entity1, target=entity2)

In [38]:
path

['cars_1', 'shift_2', 'liability_4', 'insurance_3']

In [39]:
path_with_names = []
for i in range(0, len(path)-1):
    path_with_names.append(path[i])
    edge = edge_attributes[(path[i], path[i+1])]
    path_with_names.append(edge)
path_with_names.append(path[-1])

In [40]:
path_with_names

['cars_1', 'ROOT', 'shift_2', 'ROOT', 'liability_4', 'dobj', 'insurance_3']

### Final method

In [41]:
import networkx as nx
import spacy

def get_path_length(entity1, entity2, graph):
    if nx.has_path(graph, entity1, entity2):
        length = nx.shortest_path_length(graph, source=entity1, target=entity2)
        return length
    else: 
        return None

def get_path_with_edge_name(graph, path):
    edge_attributes = nx.get_edge_attributes(graph, 'name')
    path_with_edge_names = []
    for i in range(0, len(path)-1):
        path_with_edge_names.append(path[i])
        w1 = path[i]
        w2 = path[i+1]
        #edge_attributes give a great idea of directionality
        # 0 means direction is from left to right (where left is the head)
        # 1 means direction is from right to left (where right is the head)
        edge = edge_attributes[(w1,w2) if (w1,w2) in edge_attributes else (w2, w1)] \
        + "_" + str(0) if (w1,w2) in edge_attributes else str(1)
        path_with_edge_names.append(edge)
    path_with_edge_names.append(path[-1])
    return path_with_edge_names

In [42]:
import networkx as nx
import spacy
def get_shortest_dependency_path(tokenizedSentence, e1_pos, e2_pos):
    parser = spacy.load('en')
    sentence = stringify_tokenized(tokenizedSentence)
    parsedData = parser(sentence)
    edges = []
    print("Tokenized sentence, sentence", tokenizedSentence, sentence)
    # it is possible that spacy is giving extra tokens to the sentence than are needed
    for token in parsedData:
        # FYI https://spacy.io/docs/api/token
        for child in token.children:
            edges.append(('{0}_{1}'.format(token.lower_,token.i), #head
                          '{0}_{1}'.format(child.lower_,child.i), #child
                         {'name': token.dep_})) #name of the dependency

    graph = nx.Graph(edges)
    #TODO: handle the case with multiple words in the entity
    # basically do below, but in a loop
    # only do path_with_edge names for the shortest dependency path. 
    # length can be gotten using nx.shortest_path_length(graph, source=entity1, target=entity2)
    entity1 = [tokenizedSentence[e1_pos[i]]+ "_" + str(e1_pos[i]) for i in range(0, len(e1_pos))]
#     entity1 = tokenizedSentence[e1_pos[0]]+ "_" + str(e1_pos[0]) #if they are of size 1
    entity2 = [tokenizedSentence[e2_pos[i]]+ "_" + str(e2_pos[i]) for i in range(0, len(e2_pos))]
#     entity2 = tokenizedSentence[e2_pos[0]]+ "_" + str(e2_pos[0])
    path_length = []
    for e1 in entity1:
        for e2 in entity2:
            length = get_path_length(e1, e2, graph)
            if length is not None:
                path_length.append({'len': length, 'e1': e1, 'e2': e2})
    print("Possible path lengths are ", path_length)
    # TODO: make above more efficient; what is the maximum possible path length in dependency path in a sentence
    if not path_length: # means list is empty which means that path didnt exist at all
        return None, None
    minItem = min(path_length, key=lambda x: x['len'])
    path = nx.shortest_path(graph, source=minItem['e1'], target=minItem['e2'])
    path_with_edge_names = get_path_with_edge_name(graph, path)
#     if nx.has_path(graph,entity1,entity2):
#         path = nx.shortest_path(graph, source=entity1, target=entity2)
#     else: 
#         return None, None
    # https://networkx.github.io/documentation/networkx-1.10/reference/algorithms.shortest_paths.html
#     edge_attributes = nx.get_edge_attributes(graph, 'name')
#     path_with_edge_names = []
#     for i in range(0, len(path)-1):
#         path_with_edge_names.append(path[i])
#         w1 = path[i]
#         w2=path[i+1]
#         edge = edge_attributes[(w1,w2) if (w1,w2) in edge_attributes else (w2, w1)]
#         path_with_edge_names.append(edge)
#     path_with_edge_names.append(path[-1])
    return path, path_with_edge_names

In [43]:
get_shortest_dependency_path(["Autonomous", "cars", "shift", "insurance", "liability", "toward", "manufacturers"], (1,1), (3,3))

Tokenized sentence, sentence ['Autonomous', 'cars', 'shift', 'insurance', 'liability', 'toward', 'manufacturers'] Autonomous cars shift insurance liability toward manufacturers
Possible path lengths are  [{'len': 3, 'e1': 'cars_1', 'e2': 'insurance_3'}, {'len': 3, 'e1': 'cars_1', 'e2': 'insurance_3'}, {'len': 3, 'e1': 'cars_1', 'e2': 'insurance_3'}, {'len': 3, 'e1': 'cars_1', 'e2': 'insurance_3'}]


(['cars_1', 'shift_2', 'liability_4', 'insurance_3'],
 ['cars_1',
  'ROOT_0',
  'shift_2',
  'ROOT_0',
  'liability_4',
  'dobj_0',
  'insurance_3'])

In [44]:
def get_only_word(string):
    return " ".join(re.findall("[a-zA-Z]+", string))

def get_only_number(string):
    return " ".join(re.findall("[0-9]+", string))

def get_indiv_pos(position, words_num_dict, words_new_num_dict):
    if str(position) not in words_num_dict:
        return None
    word = words_num_dict[str(position)]
    return words_new_num_dict[word]

def get_new_entity_position(words, e1_pos, e2_pos, numbers):
    d1 = {numbers[i]: word for i, word in enumerate(words)} # words_num_dict
    d2 = {word: i for i, word in enumerate(words)} # words_new_num_dict
#     print("e1_pos, e2_pos", e1_pos, e2_pos)
    # TODO: new_position may include just one word inside of the entities
    # which means that some word between the start and end might be in the path
    #i.e. we need to check for every entity word if None is returned by get_indiv_pos
    # because exactly one word's val returned will not be None
    for i in range(e1_pos[0], e1_pos[1] + 1):
        new_pos = get_indiv_pos(i, d1, d2)
        if new_pos is not None:
            new_e1_pos = (new_pos, new_pos)
    for i in range(e2_pos[0], e2_pos[1] + 1):
        new_pos = get_indiv_pos(i, d1, d2)
        if new_pos is not None:
            new_e2_pos = (new_pos, new_pos)
#     new_e1_pos = (get_indiv_pos(e1_pos[0], d1, d2), get_indiv_pos(e1_pos[1], d1, d2))
#     new_e2_pos = (get_indiv_pos(e2_pos[0], d1, d2), get_indiv_pos(e2_pos[1], d1, d2))
    return new_e1_pos, new_e2_pos

Change the augment method to incorporate the dependency path information

In [45]:
def augment_data(data, simple=True):
    sentences = data[0]
    relations = data[1]
    e1_pos = data[2]
    e2_pos = data[3]
    augmented_sentences = []
    augmented_relations = []
    augmented_e1_pos = []
    augmented_e2_pos = []
    augmented_relations = []
    for idx, (sent, pos1, pos2, rel) in enumerate(zip(sentences, e1_pos, e2_pos, relations)):
        if simple is True: # in this case everything is reversed
            augmented_sent = list(reversed(sent))
            augmented_rel = give_reverse_relation(rel)
            augmented_pos1_first = len(sent) - (pos2[1] + 1)
            augmented_pos1_second = len(sent) - (pos2[0] + 1)
            augmented_pos2_first = len(sent) - (pos1[1] + 1)
            augmented_pos2_second = len(sent) - (pos1[0] + 1)
            augmented_pos1 = (reversed_pos1_first, reversed_pos1_second)
            augmented_pos2 = (reversed_pos2_first, reversed_pos2_second)
        else: # in this case only the shortest dependency path is considered
            print("Sentence", sent)
            path = get_shortest_dependency_path(sent, pos1, pos2)[0]
            if path is None:
                continue
            print("Path", path)
            oldnums = [get_only_number(word) for word in path]
            print("Old indexing", oldnums)
            augmented_sent = [get_only_word(word) for word in path]
            augmented_pos1, augmented_pos2 = get_new_entity_position(augmented_sent, pos1, pos2, oldnums)
            augmented_rel = rel
        augmented_sentences.append(augmented_sent)
        augmented_relations.append(augmented_rel)
        augmented_e1_pos.append(augmented_pos1)
        augmented_e2_pos.append(augmented_pos2)
    sentences = sentences + augmented_sentences
    relations = relations + augmented_relations
    e1_pos = e1_pos + augmented_e1_pos
    e2_pos = e2_pos + augmented_e2_pos
    
    return sentences, relations, e1_pos, e2_pos

Old data

In [46]:
splitted

([['hi', 'my', 'name', 'is', 'beaver'],
  ['my', 'name', 'is', 'tomorrow', 'blah', 'blah']],
 [1, 2],
 [(1, 2), (1, 2)],
 [(3, 4), (4, 5)])

Augmented data check

In [47]:
augment_data(splitted, False)

Sentence ['hi', 'my', 'name', 'is', 'beaver']
Tokenized sentence, sentence ['hi', 'my', 'name', 'is', 'beaver'] hi my name is beaver
Possible path lengths are  [{'len': 2, 'e1': 'my_1', 'e2': 'is_3'}, {'len': 3, 'e1': 'my_1', 'e2': 'beaver_4'}, {'len': 1, 'e1': 'name_2', 'e2': 'is_3'}, {'len': 2, 'e1': 'name_2', 'e2': 'beaver_4'}]
Path ['name_2', 'is_3']
Old indexing ['2', '3']
Sentence ['my', 'name', 'is', 'tomorrow', 'blah', 'blah']
Tokenized sentence, sentence ['my', 'name', 'is', 'tomorrow', 'blah', 'blah'] my name is tomorrow blah blah
Possible path lengths are  []


([['hi', 'my', 'name', 'is', 'beaver'],
  ['my', 'name', 'is', 'tomorrow', 'blah', 'blah'],
  ['name', 'is']],
 [1, 2, 1],
 [(1, 2), (1, 2), (0, 0)],
 [(3, 4), (4, 5), (1, 1)])

I think this works

How to disable the tokenizer in spacy

In [48]:
sentence = ['ms.', 'sendler', 'made', 'lists', 'of', 'these', 'children', 'and', 'placed', 'the', \
            'lists', 'in', 'a', 'jar', 'that', 'she', 'buried', 'in', 'a', 'garden', '.']
e1_pos= (10, 10)
e2_pos= (13,13)
parser = spacy.load('en')
from spacy.tokens import Doc
doc = Doc(parser.vocab, words=sentence)
parser.tagger(doc)
parser.parser(doc)
for token in doc:
    print(token.lower_, ":", token.i, ":", token.dep_)
    
# for the pipeline, look at this https://spacy.io/usage/processing-pipelines

ms. : 0 : compound
sendler : 1 : nsubj
made : 2 : ROOT
lists : 3 : dobj
of : 4 : prep
these : 5 : det
children : 6 : pobj
and : 7 : cc
placed : 8 : conj
the : 9 : det
lists : 10 : dobj
in : 11 : prep
a : 12 : det
jar : 13 : pobj
that : 14 : dobj
she : 15 : nsubj
buried : 16 : relcl
in : 17 : prep
a : 18 : det
garden : 19 : pobj
. : 20 : punct


# Converting position embeddings from path without labels to path with labels