# Comparing LAL parser and AJ parser

### Install additional modules

In [32]:
!pip install PyStanfordDependencies
!pip install PYEVALB
!pip install apted
!pip install stanza

Looking in indexes: https://pypi.org/simple, https://us-python.pkg.dev/colab-wheels/public/simple/
Looking in indexes: https://pypi.org/simple, https://us-python.pkg.dev/colab-wheels/public/simple/
Looking in indexes: https://pypi.org/simple, https://us-python.pkg.dev/colab-wheels/public/simple/
Looking in indexes: https://pypi.org/simple, https://us-python.pkg.dev/colab-wheels/public/simple/


### Import

In [33]:
import StanfordDependencies
import spacy
from spacy import displacy
import statistics
from spacy import displacy
from spacy.tokens import Doc
from PYEVALB import parser
from PYEVALB.scorer import Scorer
from nltk.tree import Tree as Tree2
from apted import APTED, Config
import numpy as np
from pathlib import Path
import os


### Load dir

In [34]:
!git clone https://github.com/gabriele-dominici/L95_assignment.git

fatal: destination path 'L95_assignment' already exists and is not an empty directory.


### Initialisation of some variables

In [35]:
# Object to translate constituency parser in dependency parser
sd = StanfordDependencies.get_instance(backend='subprocess', version='3.5.1')
# Object needed to visulise dependency trees
nlp = spacy.load("en_core_web_sm")
# Object to computer Parseval 
scorer = Scorer()

In [36]:
#@title Modify some class of APTED
from collections import deque
import logging
import re

from stanza.models.common import utils
from stanza.models.constituency.parse_tree import Tree

tqdm = utils.get_tqdm()

OPEN_PAREN = "("
CLOSE_PAREN = ")"

logger = logging.getLogger('stanza.constituency')

# A few specific exception types to clarify parsing errors
# They store the line number where the error occurred

class UnclosedTreeError(ValueError):
    """
    A tree looked like (Foo
    """
    def __init__(self, line_num):
        super().__init__("Found an unfinished tree (missing close brackets).  Tree started on line %d" % line_num)
        self.line_num = line_num

class ExtraCloseTreeError(ValueError):
    """
    A tree looked like (Foo))
    """
    def __init__(self, line_num):
        super().__init__("Found a broken tree (extra close brackets).  Tree started on line %d" % line_num)
        self.line_num = line_num

class UnlabeledTreeError(ValueError):
    """
    A tree had no label, such as ((Foo) (Bar))
    This does not actually happen at the root, btw, as ROOT is silently added
    """
    def __init__(self, line_num):
        super().__init__("Found a tree with no label on a node!  Line number %d" % line_num)
        self.line_num = line_num

class MixedTreeError(ValueError):
    """
    Leaf and constituent children are mixed in the same node
    """
    def __init__(self, line_num, child_label, children):
        super().__init__("Found a tree with both text children and bracketed children!  Line number {}  Child label {}  Children {}".format(line_num, child_label, children))
        self.line_num = line_num
        self.child_label = child_label
        self.children = children

def normalize(text):
    return text.replace("-LRB-", "(").replace("-RRB-", ")")

def read_single_tree(token_iterator, broken_ok):
    """
    Build a tree from the tokens in the token_iterator
    """
    # we were called here at a open paren, so start the stack of
    # children with one empty list already on it
    children_stack = deque()
    children_stack.append([])
    text_stack = deque()
    text_stack.append([])

    token = next(token_iterator, None)
    token_iterator.set_mark()
    while token is not None:
        if token == OPEN_PAREN:
            children_stack.append([])
            text_stack.append([])
        elif token == CLOSE_PAREN:
            text = text_stack.pop()
            children = children_stack.pop()
            if text:
                pieces = " ".join(text).split()
                if len(pieces) == 1:
                    child = Tree(pieces[0], children)
                else:
                    # the assumption here is that a language such as VI may
                    # have spaces in the words, but it still represents
                    # just one child
                    label = pieces[0]
                    child_label = " ".join(pieces[1:])
                    if children:
                        if broken_ok:
                            child = Tree(label, children + [Tree(normalize(child_label))])
                        else:
                            raise MixedTreeError(token_iterator.line_num, child_label, children)
                    else:
                        child = Tree(label, Tree(normalize(child_label)))
                if not children_stack:
                    return child
            else:
                if not children_stack:
                    return Tree("ROOT", children)
                elif broken_ok:
                    child = Tree(None, children)
                else:
                    raise UnlabeledTreeError(token_iterator.line_num)
            children_stack[-1].append(child)
        else:
            text_stack[-1].append(token)
        token = next(token_iterator, None)
    raise UnclosedTreeError(token_iterator.get_mark())

LINE_SPLIT_RE = re.compile(r"([()])")

class TokenIterator:
    """
    A specific iterator for reading trees from a tree file
    The idea is that this will keep track of which line
    we are processing, so that an error can be logged
    from the correct line
    """
    def __init__(self, text):
        self.lines = text.split("\n")
        self.num_lines = len(self.lines)
        self.line_num = -1
        if self.num_lines > 1000:
            self.line_iterator = iter(tqdm(self.lines))
        else:
            self.line_iterator = iter(self.lines)
        self.token_iterator = iter([])
        self.mark = None

    def set_mark(self):
        self.mark = self.line_num

    def get_mark(self):
        if self.mark is None:
            raise ValueError("No mark set!")
        return self.mark

    def __iter__(self):
        return self

    def __next__(self):
        n = next(self.token_iterator, None)
        while n is None:
            self.line_num = self.line_num + 1
            if self.line_num >= self.num_lines:
                next(self.line_iterator, "")
                raise StopIteration

            line = next(self.line_iterator, "").strip()
            if not line:
                continue

            pieces = LINE_SPLIT_RE.split(line)
            pieces = [x.strip() for x in pieces]
            pieces = [x for x in pieces if x]
            self.token_iterator = iter(pieces)
            n = next(self.token_iterator, None)

        return n

def read_trees(text, broken_ok=False):
    """
    Reads multiple trees from the text
    TODO: some of the error cases we hit can be recovered from
    """
    trees = []
    token_iterator = TokenIterator(text)
    token = next(token_iterator, None)
    while token:
        if token == OPEN_PAREN:
            next_tree = read_single_tree(token_iterator, broken_ok=broken_ok)
            if next_tree is None:
                raise ValueError("Tree reader somehow created a None tree!  Line number %d" % token_iterator.line_num)
            trees.append(next_tree)
            token = next(token_iterator, None)
        elif token == CLOSE_PAREN:
            raise ExtraCloseTreeError(token_iterator.line_num)
        else:
            raise ValueError("Tree document had text between trees!  Line number %d" % token_iterator.line_num)

    return trees

def read_tree_file(filename):
    """
    Read all of the trees in the given file
    """
    with open(filename) as fin:
        trees = read_trees(fin.read())
    return trees

def read_treebank(filename):
    """
    Read a treebank and alter the trees to be a simpler format for learning to parse
    """
    logger.info("Reading trees from %s", filename)
    trees = read_tree_file(filename)
    trees = [t.prune_none().simplify_labels() for t in trees]

    illegal_trees = [t for t in trees if len(t.children) > 1]
    if len(illegal_trees) > 0:
        raise ValueError("Found {} tree(s) which had non-unary transitions at the ROOT.  First illegal tree: {}".format(len(illegal_trees), illegal_trees[0]))

    return trees

### Load and preprocessing gold standard

In [37]:
f = open("./L95_assignment/data/final_gold.txt", "r")
read = f.readlines()

In [38]:
# Preprocessing to store pos tags, costituency 
# and entire dependency parsing
count = 2
pos_tags = []
cp_gold = []
dp_gold = []
last = count
tmp_dep = ''
for row in read:
  if row == '\n':
    last = count
    count = (count + 1) % 3
  elif count == 0:
    pos_tags += [row]
    last = count
  elif count == 1:
    cp_gold += [row]
    last = count
  elif count == 2 and last == count:
    tmp_dep += [row]
    last = count
  elif count == 2:
    if tmp_dep != '':
      dp_gold += [tmp_dep]
    tmp_dep = [row]
    last = count
dp_gold += [tmp_dep]

In [39]:
# Preprocessing to store words and dependency trees
final_dp_gold = []
final_words_w_index = []
for s in dp_gold:
  tmp_words = []
  for el in s:
    tmp_el = el[1:-3].split('| ')
    k = len(tmp_el) - 1
    tmp_words += [tmp_el[k][1:]]
  final_words_w_index += [tmp_words]

for s, w in zip(dp_gold, final_words_w_index):
  tmp_dp = []
  for el in s:
    tmp_el = el[1:-3].split('| ')
    # relation, head, child
    k = len(tmp_el) - 2
    h = tmp_el[k][1:]
    if len(tmp_el) > 3:
      tmp_el = [tmp_el[0][1:]+':'+tmp_el[1][1:],
                w.index(h),
                int(tmp_el[3][1:].split(":")[1].split('_')[0])]
    elif len(tmp_el) == 2:
      tmp_el = [tmp_el[0][1:],
                None,
                int(tmp_el[1][1:].split(":")[1].split('_')[0])]
    else:
      tmp_el = [tmp_el[0][1:],
                w.index(h),
                int(tmp_el[2][1:].split(":")[1].split('_')[0])]
    tmp_dp += [tmp_el]
  
  tmp_dp.sort(key = lambda x: x[2])
  final_dp_gold += [tmp_dp]

for s in range(len(final_words_w_index)):
  for i in range(len(final_words_w_index[s])):
    final_words_w_index[s][i] = final_words_w_index[s][i].split(':')[0]


dp_gold_to_use = []
for i in final_dp_gold:
  dp_gold_to_use += [list(zip(*i))]


In [40]:
gold_pos_tag = []
words = []
for s in pos_tags:
  print(s)
  tmp = []
  tmp_word = {}
  for el in s[:-1].split(' '):
    tmp += [el[:-1].split('_')[-1]]
    tmp_word[el[:-1].split('_')[0][1:]] = ''.join(el[:-1].split('_')[0].split(':')[0].split('+'))[1:]
  gold_pos_tag += [tmp]
  words += [tmp_word]

|My:0_PRP$| |aunt:1_NN| |'s+:2_POS| |can:3_NN| |opener:4_NN| |can:5_MD| |open:6_VB| |a:7_DT| |drum:8_NN| |.:9_.|

|The:0_DT| |old:1_JJ| |car:2_NN| |broke:3_VBD| |down:4_RP| |in:5_IN| |the:6_DT| |car:7_NN| |park:8_NN| |.:9_.|

|At:0_RB| |least:1_RBS| |two:2_CD| |men:3_NNS| |broke:4_VBD| |in:5_RP| |and:6_CC| |stole:7_VBD| |my:8_PRP$| |TV:9_NN| |.:10_.|

|Kim:0_NNP| |and:1_CC| |Sandy:2_NNP| |both:3_RB| |broke:4_VBD| |up:5_RP| |with:6_IN| |their:7_PRP$| |partner+s:8_NNS| |.:9_.|

|The:0_DT| |horse:1_NN| |as:2_RB| |well:3_RB| |as:4_RB| |the:5_DT| |rabbit+s:6_NNS| |which:7_WDT| |we:8_PRP| |want+ed:9_VBD| |to:10_TO| |eat:11_VB| |has:12_VBZ| |escaped:13_VBN| |.:14_.|

|It:0_PRP| |was:1_VBD| |my:2_PRP$| |aunt:3_NN| |'s+:4_POS| |car:5_NN| |which:6_WDT| |we:7_PRP| |sold:8_VBD| |at:9_IN| |auction:10_NN| |last:11_RB| |year:12_NN| |in:13_IN| |February:14_NNP| |.:15_.|

|Natural:0_JJ| |disaster+s:1_NNS| |–:2_:| |storm+s:3_NNS| |,:4_,| |flood+ing:5_NN| |,:6_,| |hurricane+s:7_NNS| |–:8_:| |occur:9_VBP|

In [41]:
# Final preprocessing for constituency trees
cp_gold_final = []
for index, s in enumerate(cp_gold):
  sentence = s[:-1]
  for w in words[index].items():
    sentence = sentence.replace(w[0], w[1], 1)
  cp_gold_final += [sentence]

### Preprocessing and load of LAL parser tree

In [42]:
# Load of constituency trees
f = open("./L95_assignment/data/LAL_constituency.txt", "r")
cd_tree_p1 = f.readlines()

In [43]:
# Load of dependency heads
f = open("./L95_assignment/data/LAL_heads.txt", "r")
dep_heads = f.readlines()

In [44]:
# Preprocessing heads
new_dep_heads = []
for s in dep_heads:
  temp = s[1:-2].split(', ')
  dep_heads_s = []
  for el in temp:
    if int(el) == 0:
      dep_heads_s += [None]
    else:
      dep_heads_s += [int(el)-1]
  new_dep_heads += [dep_heads_s]

In [45]:
# Load of dependency relations
f = open("./L95_assignment/data/LAL_deps.txt", "r")
dep_labels = f.readlines()

In [46]:
# Preprocessing relations
new_dep_labels = []
for s in dep_labels:
  temp = s[1:-2].split(', ')
  dep_labels_s = []
  for el in temp:
    if el[1:-1] == 'root':
      dep_labels_s += ['ROOT']
    else:
      dep_labels_s += [el[1:-1]]
  new_dep_labels += [dep_labels_s]

In [47]:
# Load original sentences
f = open("./L95_assignment/data/AJ_input.txt", "r")
sentences = f.readlines()

In [48]:
# Sentences words
words_to_use = []
for s in sentences:
  words_to_use += [s[:-1].split(" ")]

### Load and preprocessing of AJ constituency parsing

In [49]:
# Load of constituency parsing
f = open("./L95_assignment/data/AJ_constituency.txt", "r")
cd_tree_p2 = f.readlines()

In [19]:
# Converting constituency predictions in dependency trees
dp_tree_p2 = []
for i in cd_tree_p2:
  print(i)
  try:
    dp_tree_p2 += [sd.convert_tree(i)]
  except Exception as e:
    print('ERROR')
    print(e)

(S (NP (NP (PRP$ My) (NN aunt) (POS 's)) (MD can) (NN opener)) (VP (MD can) (VP (VB open) (NP (DT a) (NN drum)))) (. .))

(S (NP (DT The) (JJ old) (NN car)) (VP (VBD broke) (PRT (RP down)) (PP (IN in) (NP (DT the) (NN car) (NN park)))) (. .))

(S (NP (QP (RB At) (JJS least) (CD two)) (NNS men)) (VP (VP (VBD broke) (PRT (RP in))) (CC and) (VP (VBD stole) (NP (PRP$ my) (NN TV)))) (. .))

(S (NP (NNP Kim) (CC and) (NNP Sandy)) (DT both) (VP (VBD broke) (PRT (RP up)) (PP (IN with) (NP (PRP$ their) (NNS partners)))) (. .))

(S (NP (NP (DT The) (NN horse)) (CONJP (RB as) (RB well) (IN as)) (NP (NP (DT the) (NNS rabbits)) (SBAR (WHNP (WDT which)) (S (NP (PRP we)) (VP (VBD wanted) (S (VP (TO to) (VP (VB eat))))))))) (VP (VBZ has) (VP (VBN escaped))) (. .))

(S (NP (PRP It)) (VP (VBD was) (NP (NP (NP (PRP$ my) (NN aunt) (POS 's)) (NN car)) (SBAR (WHNP (WDT which)) (S (NP (PRP we)) (VP (VBD sold) (PP (IN at) (NP (NN auction))) (NP (JJ last) (NN year)) (PP (IN in) (NP (NNP February)))))))) (. .))

### Comparing constituency parsing

In [20]:
# Custom Config to use with Apted
class CustomConfig(Config):
  def rename(self, node1, node2):
      """Compares attribute .value of trees"""
      return int(node1.label != node2.label)

  def children(self, node):
      """Get left and right children of binary tree"""
      return getattr(node, 'children', [])

In [21]:
# func to print tree in a format usable in Latex
def tranlsate_tree_latex(tree):
  tree = tree.replace('(', '[')
  tree = tree.replace(')', ']')
  return tree

In [22]:
# Initialisation of some variables
ed_1g = []
ed_2g = []
p_1g = []
p_2g = []
r_1g = []
r_2g = []
f1_1g = []
f1_2g = []
cb_1g = []
cb_2g = []
pos_1g = []
pos_2g = []
# Comparing trees
for i, j, g in zip(cd_tree_p1, cd_tree_p2, cp_gold_final):

  # Create trees usable by different metrics 
  print('GOLD')
  # Tree for Latex
  print(tranlsate_tree_latex(g))
  # Tree visualisation
  treeg = Tree2.fromstring(g)
  treeg.pretty_print()
  # Tree for APTED
  token_iterator = TokenIterator(g+')')
  treeg_t = read_single_tree(token_iterator, False)

  print('LAL Parser')
  print(tranlsate_tree_latex(i))
  tree1 = Tree2.fromstring(i)
  token_iterator = TokenIterator(i+')')
  tree1_t = read_single_tree(token_iterator, False)
  tree1.pretty_print()

  print('AJ Graph Parser')
  print(tranlsate_tree_latex(j))
  tree2 = Tree2.fromstring(j)
  token_iterator = TokenIterator(j+')')
  tree2_t = read_single_tree(token_iterator, False)
  tree2.pretty_print()

  # Trees for PARSEVAL  
  gold_tree = parser.create_from_bracket_string(g)
  LAL_tree = parser.create_from_bracket_string(i)
  AJ_tree = parser.create_from_bracket_string(j)
  
  # GOLD vs LAL
  # PARSEVAL scores, Cross bracketing, POS accuracy
  result1 = scorer.score_trees(gold_tree, LAL_tree)
  # Edit Distance
  apted = APTED(tree1_t, treeg_t, CustomConfig())
  print('Gold vs LAL Parser')
  print(f"Edit distance :{apted.compute_edit_distance()}")

  F1Score = (2*result1.recall*result1.prec)/(result1.recall+result1.prec)

  # Metrics
  print('Recall =' + str(result1.recall))
  print('Precision =' + str(result1.prec))
  print('F1-Score =' + str(F1Score))
  # Store metrics
  r_1g += [result1.recall]
  p_1g += [result1.prec]
  f1_1g += [F1Score]
  ed_1g += [apted.compute_edit_distance()]
  cb_1g += [result1.cross_brackets]
  pos_1g += [result1.tag_accracy]
  print(result1)

  # GOLD vs AJ

  result2 = scorer.score_trees(gold_tree, AJ_tree)

  apted = APTED(tree2_t, treeg_t, CustomConfig())
  print('Gold vs AJ Graph Parser')
  print(f"Edit distance :{apted.compute_edit_distance()}")

  F1Score = (2*result2.recall*result2.prec)/(result2.recall+result2.prec)
  
  print('Recall =' + str(result2.recall))
  print('Precision =' + str(result2.prec))
  print('F1-Score =' + str(F1Score))
  r_2g += [result2.recall]
  p_2g += [result2.prec]
  f1_2g += [F1Score]
  ed_2g += [apted.compute_edit_distance()]
  cb_2g += [result2.cross_brackets]
  pos_2g += [result2.tag_accracy]
  print(result2)

  # Comparison of the two parsers
  print('LAL Parser vs AJ Graph Parser')
  print(scorer.score_trees(LAL_tree, AJ_tree))
  print('-----------------')

GOLD
[S [NP [PRP$ My] [NP [NN aunt] [NP [POS 's] [NP [NN can] [NN opener]]]]] [VP [MD can] [VP [VB open] [NP [DT a] [NN drum]]]] [. .]]
                            S                                 
            ________________|_______________________________   
           NP                              |                | 
  _________|___                            |                |  
 |             NP                          VP               | 
 |     ________|___                ________|___             |  
 |    |            NP             |            VP           | 
 |    |     _______|___           |    ________|___         |  
 |    |    |           NP         |   |            NP       | 
 |    |    |        ___|____      |   |         ___|___     |  
PRP$  NN  POS      NN       NN    MD  VB       DT      NN   . 
 |    |    |       |        |     |   |        |       |    |  
 My  aunt  's     can     opener can open      a      drum  . 

LAL Parser
[S [NP [NP [PRP$ My] [NN au

In [23]:
# Overall metrics
print('LAL vs Gold')
print(f'Cross braketing: {np.mean(cb_1g)}')
print(f'Edit distance: {np.mean(ed_1g)}')
print('AJ vs Gold')
print(f'Cross braketing: {np.mean(cb_2g)}')
print(f'Edit distance: {np.mean(ed_2g)}')

LAL vs Gold
Cross braketing: 1.6363636363636365
Edit distance: 18.545454545454547
AJ vs Gold
Cross braketing: 1.1818181818181819
Edit distance: 15.636363636363637


#### Print all prediction for Latex

In [24]:
# Print a block of text for Latex
# It contains a paragraph for the sentence 
# and a paragraph for each tree (gold, LAL, AJ)
def print_tree_latex(gold, parser1, parser2, counter):
  sentence = ('\\paragraph {Sentence ' + str(counter) + '} \\\\ \n' +
               '\\paragraph{GOLD} \n')
  sentence += str(gold).replace('$', '\$') + '\n'
  sentence += '\\paragraph{LAL} \n'
  sentence += str(parser1).replace('$', '\$') + '\n'
  sentence += '\\paragraph{AJ} \n'
  sentence += str(parser2).replace('$', '\$') + '\n'
  sentence += '\n'
  print(sentence)

In [25]:
counter = 0
for i, j, g in zip(cd_tree_p1, cd_tree_p2, cp_gold_final):
  counter += 1
  gold_tree = parser.create_from_bracket_string(g)
  test_tree_1 = parser.create_from_bracket_string(i)
  test_tree_2 = parser.create_from_bracket_string(j)
  print_tree_latex(gold_tree, test_tree_1, test_tree_2, counter)
  print()

\paragraph {Sentence 1} \\ 
\paragraph{GOLD} 
(S (NP (PRP\$ My) (NP (NN aunt) (NP (POS 's) (NP (NN can) (NN opener))))) (VP (MD can) (VP (VB open) (NP (DT a) (NN drum)))) (. .))
\paragraph{LAL} 
(S (NP (NP (PRP\$ My) (NN aunt) (POS 's)) (MD can) (VB opener)) (VP (MD can) (VP (VB open) (NP (DT a) (NN drum)))) (. .))
\paragraph{AJ} 
(S (NP (NP (PRP\$ My) (NN aunt) (POS 's)) (MD can) (NN opener)) (VP (MD can) (VP (VB open) (NP (DT a) (NN drum)))) (. .))



\paragraph {Sentence 2} \\ 
\paragraph{GOLD} 
(S (NP (DT The) (NP (ADJP (JJ old)) (NN car))) (VP (V (VBD broke) (RP down)) (PP (IN in) (NP (DT the) (NP (NN car) (NN park))))) (. .))
\paragraph{LAL} 
(S (NP (DT The) (JJ old) (NN car)) (VP (VBD broke) (PRT (RB down)) (PP (IN in) (NP (DT the) (NN car) (NN park)))) (. .))
\paragraph{AJ} 
(S (NP (DT The) (JJ old) (NN car)) (VP (VBD broke) (PRT (RP down)) (PP (IN in) (NP (DT the) (NN car) (NN park)))) (. .))



\paragraph {Sentence 3} \\ 
\paragraph{GOLD} 
(S (NP (ADVP (RB At) (ADVP (RBS leas

#### Overall scores

In [26]:
# Writing prediction in right format
with open('cp_lal.txt', 'w', encoding='utf-8') as f:
   f.write(''.join(cd_tree_p1))
with open('cp_aj.txt', 'w', encoding='utf-8') as f:
   f.write(''.join(cd_tree_p2)) 
with open('cp_gold.txt', 'w', encoding='utf-8') as f:
   f.write('\n'.join(cp_gold_final))

In [27]:
# PARSEVAL for all sentences GOLD vs LAL
s = Scorer()
gold_path = 'cp_gold.txt'
test_path = 'cp_lal.txt'
result_path = 'result1g.txt'

s.evalb(gold_path, test_path, result_path)

In [28]:
# PARSEVAL for all sentences GOLD vs LAL
s = Scorer()
gold_path = 'cp_gold.txt'
test_path = 'cp_aj.txt'
result_path = 'result2g.txt'

s.evalb(gold_path, test_path, result_path)

### Comparing Dependency prediction

In [29]:
all_uas1 = []
all_uas2 = []
all_las1 = []
all_las2 = []
counter = 0

# Create a dir to store svgs
if (not os.path.exists("images")):
    os.mkdir("images")

for word, deps_t, heads_t, sent, gold, gold_words in zip(words_to_use, new_dep_labels,
                                   new_dep_heads, dp_tree_p2, dp_gold_to_use, final_words_w_index):
  counter += 1
  # Visualising GOLD tree and save it as SVG
  try:
    doc = Doc(nlp.vocab, words=gold_words, deps=gold[0], heads=gold[1])
    displacy.render(doc,jupyter=True)
    svg = displacy.render(doc, style="dep", jupyter=False)
    file_name = f'{counter}_gold'
    output_path = Path("./images/" + file_name)
    output_path.open("w", encoding="utf-8").write(svg)
  except Exception as e:
    print(e)
  
  # Visualising LAL tree and save it as SVG
  doc = Doc(nlp.vocab, words=word, deps=deps_t, heads=heads_t)
  displacy.render(doc,jupyter=True)
  svg = displacy.render(doc, style="dep", jupyter=False)
  file_name = f'{counter}_lal'
  output_path = Path("./images/" + file_name)
  output_path.open("w", encoding="utf-8").write(svg)

  #Preprocess AJ prediction
  words = []
  deps2 = []
  heads2 = []
  for i in sent:
    words += [i.form]
    if i.deprel == 'root':
      deps2 += ['ROOT']
      heads2 += [None]
    else:
      deps2 += [i.deprel]
      heads2 += [i.head-1]

  # Visualising AJ tree and save it as SVG
  doc = Doc(nlp.vocab, words=words, deps=deps2, heads=heads2)
  displacy.render(doc,jupyter=True) 
  svg = displacy.render(doc, style="dep", jupyter=False)
  file_name = f'{counter}_aj'
  output_path = Path("./images/" + file_name)
  output_path.open("w", encoding="utf-8").write(svg)

  #Comparison
  # GOLD vs LAL
  print('LAL vs Gold')
  uas1 = []
  las1 = []
  for i,j, hi, hj in zip(deps_t, gold[0], heads_t, gold[1]):
    # Ignore punct relations
    if i != 'punct':
      uas1 += [hi==hj]
  # UAS score
  print(f'UAS: {statistics.mean(uas1)}')
  for i,j, hi, hj in zip(deps_t, gold[0], heads_t, gold[1]):
    if i != 'punct':
      las1 += [i==j and hi==hj]
  #LAS score
  print(f'LAS: {statistics.mean(las1)}')

  # GOLD vs AJ
  print('AJ vs Gold')
  uas2 = []
  las2 = []
  for i,j, hi, hj in zip(deps2, gold[0], heads2, gold[1]):
    if j != 'punct':
      uas2 += [hi==hj]
    if (i == 'punct' or j == 'punct') and i!=j:
      print(i,j)
      print('-------------------------------------')
  print(f'UAS: {statistics.mean(uas2)}')
  for i,j, hi, hj in zip(deps2, gold[0], heads2, gold[1]):
    if j != 'punct':
      las2 += [i==j and hi==hj]
  print(f'LAS: {statistics.mean(las2)}')

  #Store metrics
  all_uas1 += [(statistics.mean(uas1), len(uas1))]
  all_uas2 += [(statistics.mean(uas2), len(uas2))]
  all_las1 += [(statistics.mean(las1), len(las1))]
  all_las2 += [(statistics.mean(las2), len(las2))]
  print('----------------')  

tot = 0
for i in range(len(all_uas1)):
  tot += all_uas1[i][1]
  all_uas1[i] = all_uas1[i][0]*all_uas1[i][1]
  all_uas2[i] = all_uas2[i][0]*all_uas2[i][1]
  all_las1[i] = all_las1[i][0]*all_las1[i][1]
  all_las2[i] = all_las2[i][0]*all_las2[i][1]

# Overall results
print(f'MEAN UAS LAL: {sum(all_uas1)/tot}')
print(f'MEAN LAS LAL: {sum(all_las1)/tot}')
print(f'MEAN UAS AJ: {sum(all_uas2)/tot}')
print(f'MEAN LAS AJ: {sum(all_las2)/tot}')

LAL vs Gold
UAS: 0.6666666666666666
LAS: 0.6666666666666666
AJ vs Gold
UAS: 1
LAS: 0.8888888888888888
----------------


LAL vs Gold
UAS: 1
LAS: 1
AJ vs Gold
UAS: 1
LAS: 1
----------------


LAL vs Gold
UAS: 0.8
LAS: 0.8
AJ vs Gold
UAS: 1
LAS: 1
----------------


LAL vs Gold
UAS: 0.8888888888888888
LAS: 0.8888888888888888
AJ vs Gold
UAS: 0.8888888888888888
LAS: 0.8888888888888888
----------------


LAL vs Gold
UAS: 0.9285714285714286
LAS: 0.8571428571428571
AJ vs Gold
UAS: 0.9285714285714286
LAS: 0.8571428571428571
----------------


LAL vs Gold
UAS: 0.8666666666666667
LAS: 0.8
AJ vs Gold
UAS: 0.8666666666666667
LAS: 0.8
----------------
[E039] Array bounds exceeded while searching for root word. This likely means the parse tree is in an invalid state. Please report this issue here: http://github.com/explosion/spaCy/issues


LAL vs Gold
UAS: 0.75
LAS: 0.75
AJ vs Gold
dep punct
-------------------------------------
dep punct
-------------------------------------
UAS: 0.75
LAS: 0.75
----------------


LAL vs Gold
UAS: 1
LAS: 0.9583333333333334
AJ vs Gold
dep punct
-------------------------------------
UAS: 0.7916666666666666
LAS: 0.7916666666666666
----------------


LAL vs Gold
UAS: 0.8285714285714286
LAS: 0.6285714285714286
AJ vs Gold
UAS: 0.7142857142857143
LAS: 0.5428571428571428
----------------


LAL vs Gold
UAS: 1
LAS: 0.9375
AJ vs Gold
dep punct
-------------------------------------
UAS: 0.9375
LAS: 0.9375
----------------


LAL vs Gold
UAS: 0.5
LAS: 0.5
AJ vs Gold
UAS: 0.6111111111111112
LAS: 0.5833333333333334
----------------
MEAN UAS LAL: 0.8082901554404145
MEAN LAS LAL: 0.7512953367875648
MEAN UAS AJ: 0.8031088082901554
MEAN LAS AJ: 0.7512953367875648


#### Printing table for Latex

In [30]:
# Print a table for Latex that contains relations 
# for each parser and the gold standard
def print_table_latex(deps_g, head_g, words_g,
                      deps_1, head_1, words_1,
                      deps_2, head_2, words_2, n):
  # Header of the table
  table = ('\\begin{table*}[h] \n' +
           '\\centering \n' +
           '\\begin{tabular}{|c||cc||cc||cc|} \n' +
           '\\hline \n' +
           '{} & \\multicolumn{2}{c||}{\\textbf{GOLD}} & \\multicolumn{2}{c||}{\\textbf{LAL parser}} & \\multicolumn{2}{c|}{\\textbf{AJ parser}} \\\\ \n' +
           '\\textbf{Word} & \\textbf{Head} & \\textbf{Rel} & \\textbf{Head} & \\textbf{Rel} & \\textbf{Head} & \\textbf{Rel} \\\\ \n' +
           '\\hline \n')
  counter = 0
  for dg, hg, wg, d1, h1, w1, d2, h2, w2, in zip(deps_g, head_g, words_g,
                                                 deps_1, head_1, words_1,
                                                 deps_2, head_2, words_2):
    
    symbolg = ""
    symbol1 = ""
    symbol2 = ""
    #Ignore punct relations
    if dg != "punct":

      # Gold
      if hg == None: 
        # ROOT head
        hg_t = ""
      # symbol means the direction of the arc
      elif hg > counter:
        # left arc
        symbolg = "<"
        hg_t = words_g[hg]
      elif hg < counter:
        # right arc
        symbolg = ">"
        hg_t = words_g[hg]

      # LAL
      if h1 == None: 
        h1_t = ""
      elif h1 > counter:
        symbol1 = "<"
        h1_t = words_1[h1]
      elif h1 < counter:
        symbol1 = ">"
        h1_t = words_1[h1]

      # AJ
      if h2 == None: 
        h2_t = ""
      elif h2 > counter:
        symbol2 = "<"
        h2_t = words_2[h2]
      elif h2 < counter:
        symbol2 = ">"
        h2_t = words_2[h2]

      # New row
      table += f"{w1} & {symbolg}{hg_t} & {dg} & {symbol1}{h1_t} & {d1} & {symbol2}{h2_t} & {d2} \\\\ \n"
      table += '\\hline \n'
    counter += 1

  # Table footer
  table += ('\\end{tabular} \n \\caption{\\label{tab:dps' + 
            str(n) + '} \n' + 
           'Dependency parsing of sentence ' + str(n) +
           ' of all parsers and gold standard.} \n' +
           '\end{table*}')
  
  print(table)

In [31]:
# Print table for Latex
counter = 1
for word, deps_t, heads_t, sent, gold, gold_words in zip(words_to_use, new_dep_labels,
                                   new_dep_heads, dp_tree_p2, dp_gold_to_use, final_words_w_index):
  # Preprocessing of AJ predictions
  words2 = []
  deps2 = []
  heads2 = []
  for i in sent:
    words2 += [i.form]
    if i.deprel == 'root':
      deps2 += ['ROOT']
      heads2 += [None]
    else:
      deps2 += [i.deprel]
      heads2 += [i.head-1]
  
  # Print table
  print(f"%Sentence no. {counter}")
  print_table_latex(gold[0], gold[1], gold_words,
                    deps_t, heads_t, word,
                    deps2, heads2, words2, counter)
  counter += 1
  print()

%Sentence no. 1
\begin{table*}[h] 
\centering 
\begin{tabular}{|c||cc||cc||cc|} 
\hline 
{} & \multicolumn{2}{c||}{\textbf{GOLD}} & \multicolumn{2}{c||}{\textbf{LAL parser}} & \multicolumn{2}{c|}{\textbf{AJ parser}} \\ 
\textbf{Word} & \textbf{Head} & \textbf{Rel} & \textbf{Head} & \textbf{Rel} & \textbf{Head} & \textbf{Rel} \\ 
\hline 
My & <aunt & poss & <aunt & poss & <aunt & poss \\ 
\hline 
aunt & <opener & poss & <can & poss & <opener & poss \\ 
\hline 
's & >aunt & possessive & >aunt & possessive & >aunt & possessive \\ 
\hline 
can & <opener & nn & <open & aux & <opener & dep \\ 
\hline 
opener & <open & nsubj & >can & dep & <open & nsubj \\ 
\hline 
can & <open & aux & <open & aux & <open & aux \\ 
\hline 
open &  & ROOT &  & ROOT &  & ROOT \\ 
\hline 
a & <drum & det & <drum & det & <drum & det \\ 
\hline 
drum & >open & dobj & >open & dobj & >open & dobj \\ 
\hline 
\end{tabular} 
 \caption{\label{tab:dps1} 
Dependency parsing of sentence 1 of all parsers and gold standard.}