# Lab 8 Parsing

Lab session by:
* Daniel Hess
* Pandelis Laurens Symeonidis

## Imports

In [1]:
from nltk.parse import BottomUpChartParser, BottomUpLeftCornerChartParser, LeftCornerChartParser
from nltk import CFG
from nltk.parse.util import Chart


# Define grammar

In [2]:
grammar = CFG.fromstring('''
  S   -> NP VP
  VP -> V NP | V PP
  PP -> P NP
  NP  -> NNS | JJ NNS | NP CC NP
  NNS -> "cats" | "dogs" | "mice" | NNS CC NNS 
  JJ  -> "big" | "small" | "lazy"
  CC  -> "and" | "or"
  P -> "with"
  V -> "play" | "sleep"
  ''')
print("Grammar rules:\n")
for prod in grammar.productions():
    print(prod)

Grammar rules:

S -> NP VP
VP -> V NP
VP -> V PP
PP -> P NP
NP -> NNS
NP -> JJ NNS
NP -> NP CC NP
NNS -> 'cats'
NNS -> 'dogs'
NNS -> 'mice'
NNS -> NNS CC NNS
JJ -> 'big'
JJ -> 'small'
JJ -> 'lazy'
CC -> 'and'
CC -> 'or'
P -> 'with'
V -> 'play'
V -> 'sleep'


# Helper function to run a parser

In [3]:
def run_parser(parser_name, grammar, sentence):
    if (parser_name == "BottomUpChartParser"):
        parser_class = BottomUpChartParser
    elif (parser_name == "BottomUpLeftCornerChartParser"):
        parser_class = BottomUpLeftCornerChartParser
    elif (parser_name == "LeftCornerChartParser"):
        parser_class = LeftCornerChartParser

    parser = parser_class(grammar)

    chart = parser.chart_parse(sentence)
    return chart



# Experiment

In [4]:
sentence = "lazy cats play with mice".split()

def analyse(chart: Chart):
    trees = list(chart.parses(grammar.start()))
    print("\nParse trees:")
    if not trees:
        print("  (no complete parse found)")
    else:
        for i, t in enumerate(trees, start=1):
            print(f"\nTree {i}:")
            print(t)

    edges = list(chart.edges())
    print(f"\nNumber of edges: {len(edges)}")

    print("\nExplored edges:")
    for e in edges:
        print(e)


# Bottom Up Chart Parser

In [10]:
parse_bottom_up = run_parser("BottomUpChartParser", grammar, sentence)
analyse(parse_bottom_up)


Parse trees:

Tree 1:
(S
  (NP (JJ lazy) (NNS cats))
  (VP (V play) (PP (P with) (NP (NNS mice)))))

Number of edges: 48

Explored edges:
[0:1] 'lazy'
[1:2] 'cats'
[2:3] 'play'
[3:4] 'with'
[4:5] 'mice'
[0:0] JJ -> * 'lazy'
[0:1] JJ -> 'lazy' *
[0:0] NP -> * JJ NNS
[0:1] NP -> JJ * NNS
[1:1] NNS -> * 'cats'
[1:2] NNS -> 'cats' *
[1:1] NP -> * NNS
[1:1] NNS -> * NNS CC NNS
[0:2] NP -> JJ NNS *
[1:2] NP -> NNS *
[1:2] NNS -> NNS * CC NNS
[1:1] S  -> * NP VP
[1:1] NP -> * NP CC NP
[1:2] S  -> NP * VP
[1:2] NP -> NP * CC NP
[0:0] S  -> * NP VP
[0:0] NP -> * NP CC NP
[0:2] S  -> NP * VP
[0:2] NP -> NP * CC NP
[2:2] V  -> * 'play'
[2:3] V  -> 'play' *
[2:2] VP -> * V NP
[2:2] VP -> * V PP
[2:3] VP -> V * NP
[2:3] VP -> V * PP
[3:3] P  -> * 'with'
[3:4] P  -> 'with' *
[3:3] PP -> * P NP
[3:4] PP -> P * NP
[4:4] NNS -> * 'mice'
[4:5] NNS -> 'mice' *
[4:4] NP -> * NNS
[4:4] NNS -> * NNS CC NNS
[4:5] NP -> NNS *
[4:5] NNS -> NNS * CC NNS
[4:4] S  -> * NP VP
[4:4] NP -> * NP CC NP
[3:5] PP -> P 

# Bottom Up Left Corner Chart Parser

In [11]:
parse_bottom_up_left_corner = run_parser("BottomUpLeftCornerChartParser", grammar, sentence)
analyse(parse_bottom_up_left_corner)


Parse trees:

Tree 1:
(S
  (NP (JJ lazy) (NNS cats))
  (VP (V play) (PP (P with) (NP (NNS mice)))))

Number of edges: 29

Explored edges:
[0:1] 'lazy'
[1:2] 'cats'
[2:3] 'play'
[3:4] 'with'
[4:5] 'mice'
[0:1] JJ -> 'lazy' *
[0:1] NP -> JJ * NNS
[1:2] NNS -> 'cats' *
[1:2] NP -> NNS *
[1:2] NNS -> NNS * CC NNS
[0:2] NP -> JJ NNS *
[0:2] S  -> NP * VP
[0:2] NP -> NP * CC NP
[1:2] S  -> NP * VP
[1:2] NP -> NP * CC NP
[2:3] V  -> 'play' *
[2:3] VP -> V * NP
[2:3] VP -> V * PP
[3:4] P  -> 'with' *
[3:4] PP -> P * NP
[4:5] NNS -> 'mice' *
[4:5] NP -> NNS *
[4:5] NNS -> NNS * CC NNS
[4:5] S  -> NP * VP
[4:5] NP -> NP * CC NP
[3:5] PP -> P NP *
[2:5] VP -> V PP *
[0:5] S  -> NP VP *
[1:5] S  -> NP VP *


# Left Corner Chart Parser

In [None]:
parse_left_corner = run_parser("LeftCornerChartParser", grammar, sentence)
analyse(parse_left_corner)


Parse trees:

Tree 1:
(S
  (NP (JJ lazy) (NNS cats))
  (VP (V play) (PP (P with) (NP (NNS mice)))))

Number of edges: 22

Explored edges:
[0:1] 'lazy'
[1:2] 'cats'
[2:3] 'play'
[3:4] 'with'
[4:5] 'mice'
[0:1] JJ -> 'lazy' *
[0:1] NP -> JJ * NNS
[1:2] NNS -> 'cats' *
[1:2] NP -> NNS *
[0:2] NP -> JJ NNS *
[0:2] S  -> NP * VP
[1:2] S  -> NP * VP
[2:3] V  -> 'play' *
[2:3] VP -> V * PP
[3:4] P  -> 'with' *
[3:4] PP -> P * NP
[4:5] NNS -> 'mice' *
[4:5] NP -> NNS *
[3:5] PP -> P NP *
[2:5] VP -> V PP *
[0:5] S  -> NP VP *
[1:5] S  -> NP VP *


In [20]:
parsers_edges = {
    "BottomUp": set(parse_bottom_up.edges()),
    "BottomUpLeftCorner": set(parse_bottom_up_left_corner.edges()),
    "LeftCorner": set(parse_left_corner.edges()),
}
names = list(parsers_edges.keys())
for i in range(len(names)):
    for j in range(i + 1, len(names)):
        n1, n2 = names[i], names[j]
        e1, e2 = parsers_edges[n1], parsers_edges[n2]

        only1 = e1 - e2
        only2 = e2 - e1

        print(f"\nEdges only in {n1} (not in {n2}): {len(only1)}")
        for e in sorted(only1, key=str):
            print("  ", e)

        print(f"\nEdges only in {n2} (not in {n1}): {len(only2)}")
        for e in sorted(only2, key=str):
            print("  ", e)



Edges only in BottomUp (not in BottomUpLeftCorner): 19
   [0:0] JJ -> * 'lazy'
   [0:0] NP -> * JJ NNS
   [0:0] NP -> * NP CC NP
   [0:0] S  -> * NP VP
   [1:1] NNS -> * 'cats'
   [1:1] NNS -> * NNS CC NNS
   [1:1] NP -> * NNS
   [1:1] NP -> * NP CC NP
   [1:1] S  -> * NP VP
   [2:2] V  -> * 'play'
   [2:2] VP -> * V NP
   [2:2] VP -> * V PP
   [3:3] P  -> * 'with'
   [3:3] PP -> * P NP
   [4:4] NNS -> * 'mice'
   [4:4] NNS -> * NNS CC NNS
   [4:4] NP -> * NNS
   [4:4] NP -> * NP CC NP
   [4:4] S  -> * NP VP

Edges only in BottomUpLeftCorner (not in BottomUp): 0

Edges only in BottomUp (not in LeftCorner): 26
   [0:0] JJ -> * 'lazy'
   [0:0] NP -> * JJ NNS
   [0:0] NP -> * NP CC NP
   [0:0] S  -> * NP VP
   [0:2] NP -> NP * CC NP
   [1:1] NNS -> * 'cats'
   [1:1] NNS -> * NNS CC NNS
   [1:1] NP -> * NNS
   [1:1] NP -> * NP CC NP
   [1:1] S  -> * NP VP
   [1:2] NNS -> NNS * CC NNS
   [1:2] NP -> NP * CC NP
   [2:2] V  -> * 'play'
   [2:2] VP -> * V NP
   [2:2] VP -> * V PP
   [2:3] VP 

# Analysis

TODO