In [1]:
!source ./setup_spacy.sh

Collecting en-core-web-sm==3.5.0
  Downloading https://github.com/explosion/spacy-models/releases/download/en_core_web_sm-3.5.0/en_core_web_sm-3.5.0-py3-none-any.whl (12.8 MB)
[2K     [38;2;114;156;31m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m12.8/12.8 MB[0m [31m4.7 MB/s[0m eta [36m0:00:00[0mm eta [36m0:00:01[0m[36m0:00:01[0m
[38;5;2m✔ Download and installation successful[0m
You can now load the package via spacy.load('en_core_web_sm')


In [2]:
from parse_dep import *
import pandas as pd
from spacy import displacy

In [3]:
nlp = get_spacy_pipeline()

In [4]:
# This is how we can read in the data from the conllulex repo
train_df = pd.read_json('datasets/streusle.ud_train.json')
dev_df = pd.read_json('datasets/streusle.ud_dev.json')
test_df = pd.read_json('datasets/streusle.ud_test.json')

In [5]:
# Example from LSTMs compose and learn bottom up
doc = nlp("Socrates asked the student trick questions")
print(doc)

S ocrates Ġasked Ġthe Ġstudent Ġtrick Ġquestions 


In [6]:
# Get the dependencies
train_df['dependencies'] = train_df.apply(lambda x: parse_dependencies(nlp, x['text']), axis=1)

In [7]:
train_df['dependencies'].head(10)

0    {'B': 'npadvmod', 'illing': 'amod', 'ĠIssues':...
1    {'I': 'nsubj', 'Ġhad': 'ROOT', 'Ġa': 'dobj', '...
2    {'My': 'poss', 'Ġinsurance': 'compound', 'Ġcom...
3    {'Then': 'advmod', 'ĠI': 'compound', 'Ġgot': '...
4    {'The': 'det', 'Ġdoctor': 'poss', ''s': 'case'...
5    {'Blue': 'compound', 'Ġcross': 'compound', 'Ġh...
6    {'The': 'det', 'Ġoffice': 'npadvmod', 'Ġrefuse...
7    {'They': 'nsubj', 'Ġeventually': 'advmod', 'Ġt...
8    {'I': 'nmod', 'Ġeventually': 'advmod', 'Ġdecid...
9    {'It': 'nsubj', 'Ġwas': 'ROOT', 'Ġan': 'ROOT',...
Name: dependencies, dtype: object

In [8]:
displacy.render(doc, style='dep', options={'compact': True, 'distance': 100})

In [9]:
get_syntactic_distance(doc)

{(S, ocrates): 1,
 (S, Ġasked): 3,
 (S, Ġthe): 4,
 (S, Ġstudent): 4,
 (S, Ġtrick): 3,
 (S, Ġquestions): 2,
 (ocrates, Ġasked): 2,
 (ocrates, Ġthe): 3,
 (ocrates, Ġstudent): 3,
 (ocrates, Ġtrick): 2,
 (ocrates, Ġquestions): 1,
 (Ġasked, Ġthe): 3,
 (Ġasked, Ġstudent): 3,
 (Ġasked, Ġtrick): 2,
 (Ġasked, Ġquestions): 1,
 (Ġthe, Ġstudent): 2,
 (Ġthe, Ġtrick): 1,
 (Ġthe, Ġquestions): 2,
 (Ġstudent, Ġtrick): 1,
 (Ġstudent, Ġquestions): 2,
 (Ġtrick, Ġquestions): 1}