In [1]:
from pprint import pprint
import nltk
nltk.download('dependency_treebank')
from nltk.corpus import dependency_treebank

[nltk_data] Downloading package dependency_treebank to
[nltk_data]     C:\Users\farih\AppData\Roaming\nltk_data...
[nltk_data]   Package dependency_treebank is already up-to-date!


In [2]:
# Spacy version 
from nltk.parse.dependencygraph import DependencyGraph
import spacy 
from nltk.parse import DependencyEvaluator
import spacy_conll
from spacy.tokenizer import Tokenizer
import pandas

# Load the spacy model
nlp = spacy.load("en_core_web_sm")

nlp.tokenizer = Tokenizer(nlp.vocab)

#dg = dependency_treebank.parsed_sents()[:100]

# Set up the conll formatter 
config = {"ext_names": {"conll_pd": "pandas"},
          "conversion_maps": {"deprel": {"nsubj": "subj"}}}

# Add the formatter to the pipeline
nlp.add_pipe("conll_formatter", config=config, last=True)
nlp.tokenizer = Tokenizer(nlp.vocab)

sentences = dependency_treebank.sents()[-100:]
docs = []
dps = []
for sent in sentences:
    # Parse the sentence
    docs.append(nlp(' '.join(sent)))

# Convert doc to a pandas object
for doc in docs:
    df = doc._.pandas

# Select the columns accoroding to Malt-Tab format
    tmp = df[["FORM",'XPOS','HEAD','DEPREL']].to_string(header=False, index=False)

# See the outcome
    print(tmp)
    
# Get finally our the DepencecyGraph
    dp = DependencyGraph(tmp)
    print('\033[1mTree:\033[0m')
    dp.tree().pretty_print(unicodelines=True, nodedist=4)
    dps.append(dp)

de = DependencyEvaluator(dps, dependency_treebank.parsed_sents()[-100:])
las, uas = de.eval()
# no labels, thus identical
print("\033[1mLAS\033[0m")
print("{:.3}".format(las))
print("\033[1mUAS\033[0m")
print("{:.3}".format(uas))

  from .autonotebook import tqdm as notebook_tqdm


       The  DT  3      det
      Army NNP  3 compound
     Corps NNP  5    nsubj
        is VBZ  5      aux
   cutting VBG  0     ROOT
       the  DT  7      det
      flow  NN  5     dobj
        of  IN  7     prep
       the  DT 11      det
  Missouri NNP 11 compound
     River NNP  8     pobj
     about  RB 13   advmod
       two  CD 14   nummod
     weeks NNS 15 npadvmod
   earlier RBR  5   advmod
      than  IN 15     prep
    normal  JJ 16     amod
   because  IN  5     prep
        of  IN 18    pcomp
       low  JJ 22     amod
     water  NN 22 compound
    levels NNS 18     pobj
        in  IN 22     prep
       the  DT 25      det
reservoirs NNS 23     pobj
      that WDT 27    nsubj
      feed VBP 25    relcl
        it PRP 27     dobj
         .   .  5    punct
[1mTree:[0m
                                                                          cutting                                                                                     
 ┌──────┬──────────────┬────────────

                                                                        said                                                                                          
 ┌────────┬─────────┬────────────────────────────────────────────────────┴────────────────┐                                                                               
 │        │         │                                                                  dropped                                                                        
 │        │         │      ┌──────────────────┬──────────────────────────┬────────────────┴───────────────┬──────────────────────┬──────────────────────┐                 
 │        │         │      │                Profit                       │                                │                      │                      │             
 │        │         │      │        ┌─────────┴─────────┐                │                                │                      │                      │    

[1mLAS[0m
0.0
[1mUAS[0m
0.693


In [3]:
# Stanza
from nltk.parse.dependencygraph import DependencyGraph
import stanza
import spacy_stanza

# Download the stanza model if necessary
# stanza.download("en")

# Set up the conll formatter 
#tokenize_pretokenized used to tokenize by whitespace 
nlp = spacy_stanza.load_pipeline("en", verbose=False, tokenize_pretokenized=True)
config = {"ext_names": {"conll_pd": "pandas"},
          "conversion_maps": {"DEPREL": {"nsubj": "subj", "root" : "ROOT"}}}

# Add the formatter to the pipeline
nlp.add_pipe("conll_formatter", config=config, last=True)

sentences = dependency_treebank.sents()[-100:]
docs = []
dps = []
for sent in sentences:
    # Parse the sentence
    docs.append(nlp(' '.join(sent)))

# Convert doc to a pandas object
for doc in docs:
    df = doc._.pandas

# Select the columns accoroding to Malt-Tab format
    tmp = df[["FORM",'XPOS','HEAD','DEPREL']].to_string(header=False, index=False)

# See the outcome
    print(tmp)

# Get finally our the DepencecyGraph
    dp = DependencyGraph(tmp)
    print('\033[1mTree:\033[0m')
    dp.tree().pretty_print(unicodelines=True, nodedist=4)
    dps.append(dp)

de = DependencyEvaluator(dps, dependency_treebank.parsed_sents()[-100:])
las, uas = de.eval()
# no labels, thus identical
print("\033[1mLAS\033[0m")
print("{:.3}".format(las))
print("\033[1mUAS\033[0m")
print("{:.3}".format(uas))

       The  DT  3       det
      Army NNP  3  compound
     Corps NNP  5      subj
        is VBZ  5       aux
   cutting VBG  0      ROOT
       the  DT  7       det
      flow  NN  5       obj
        of  IN 11      case
       the  DT 11       det
  Missouri NNP 11  compound
     River NNP  7      nmod
     about  RB 13    advmod
       two  CD 14    nummod
     weeks NNS 15 obl:npmod
   earlier RBR  5    advmod
      than  IN 17      case
    normal  JJ 15       obl
   because  IN 22      case
        of  IN 18     fixed
       low  JJ 22      amod
     water  NN 22  compound
    levels NNS  5       obl
        in  IN 25      case
       the  DT 25       det
reservoirs NNS 22      nmod
      that WDT 27      subj
      feed VBP 25 acl:relcl
        it PRP 27       obj
         .   .  5     punct
[1mTree:[0m
                                                                                 cutting                                                                                      

                                                     rose                                                                             
  ┌────────┬──────────────────────────────────────────┴─────────┐                                                                         
  │        │                                                   news                                                                   
  │        │      ┌──────┬──────────────────────────────────────┴───────────┐                                                             
  │        │      │      │                                               damaged                                                      
  │        │      │      │      ┌───────┬──────┬────────────────┬───────────┴───────────────┬────────────────────────┐                    
  │        │      │      │      │       │      │                │                           │                    tightening           
  │        │      │      │      │       │  

                                                                                                                                        said                                                                                                                                   
 ┌───────┬───────┬────────────────────────────────────────────────────┬──────────────────────────────────────────────────────────────────┴─────────────────────────────────────────────┐                                                                                           
 │       │       │                                                  called                                                                                                           shared                                                                                    
 │       │       │      ┌───────┬─────────────────┬───────────────────┴───────────┬───────────────────────────────┐               ┌──────┬───────┬──────┬──────┬────────────────────

[1mLAS[0m
0.0
[1mUAS[0m
0.468


In [4]:
print("\033[1mAs we can see from the output, the dependency tags for Spacy and Stanza are not same\033[0m")

[1mAs we can see from the output, the dependency tags for Spacy and Stanza are not same[0m
