In [None]:
"""
COMS W4705 - Natural Language Processing - Spring 2023
Homework 2 - Parsing with Context Free Grammars 
Daniel Bauer
"""

In [None]:
import sys
from collections import defaultdict
from math import fsum
import numpy as np

In [None]:
class Pcfg(object): 
    """
    Represent a probabilistic context free grammar. 
    """

    def __init__(self, grammar_file): 
        self.rhs_to_rules = defaultdict(list)
        self.lhs_to_rules = defaultdict(list)
        self.startsymbol = None 
        self.read_rules(grammar_file)      
 
    def read_rules(self,grammar_file):
        
        for line in grammar_file: 
            line = line.strip()
            if line and not line.startswith("#"):
                if "->" in line: 
                    rule = self.parse_rule(line.strip())
                    lhs, rhs, prob = rule
                    self.rhs_to_rules[rhs].append(rule)
                    self.lhs_to_rules[lhs].append(rule)
                else: 
                    startsymbol, prob = line.rsplit(";")
                    self.startsymbol = startsymbol.strip()
                    
     
    def parse_rule(self,rule_s):
        lhs, other = rule_s.split("->")
        lhs = lhs.strip()
        rhs_s, prob_s = other.rsplit(";",1) 
        prob = float(prob_s)
        rhs = tuple(rhs_s.strip().split())
        return (lhs, rhs, prob)

    def verify_grammar(self):
        """
        Return True if the grammar is a valid PCFG in CNF.
        Otherwise return False. 
        """
        for key, value in self.rhs_to_rules.items():
            for i in range(len(value)):

                if key != value[i][1]:
                    print("key error of rhs")
                    return False

                if len(value[i][1]) == 2:
                    if value[i][0].isupper() is False:
                        return False

                    for j in range(2):
                        if value[i][1][j].upper() != value[i][1][j]:
                            print("Non-terminal error", value[i][1])
                            return False

                if len(value[i][1]) == 1:
                    if value[i][1][0].lower() != value[i][1][0]:
                        print("Terminal error", value[i][1])
                        return False

        for key,values in self.lhs_to_rules.items():
            prob=[]
            for i in range(len(values)):
                prob.append(values[i][2])
            val = round(fsum(prob))
            if val==1:
                return True
            else:
                return False

        return True

In [None]:
if __name__ == "__main__":
    with open('atis3.pcfg','r') as grammar_file:
        grammar = Pcfg(grammar_file)
        print(grammar.verify_grammar())