In [13]:
class Grammar2:
    def __init__(self, source):
        self.grammar = {} #grammar dictionary
        
        line = None
        newLine = str()
        nextLineIsDefinition = False
        definition = None
        for char in source: 
            
            if char == '\n':#parse string into lines
                line = newLine
                newLine = str()
            else:
                newLine += char
                
            if line != None: #parse lines into grammar
                if nextLineIsDefinition:
                    definition = line
                    nextLineIsDefinition = False
                elif line == "{":
                    nextLineIsDefinition = True
                elif line == "}":
                    definition = None
                elif (definition != None):
                    if self.grammar.get(line) == None: #this rule definition has not been encountered before
                        self.grammar[line] = [] #add this list for this line
                    self.grammar[line].append(definition)
                line = None
    
    def parseWord(self, word):
        return [Constituent2(word = word, pos = parse) for parse in grammar[word]]
    
    def parseSentence(self, sentence):
        sentence = re.sub(r'[-;,:\"\'\\()\\[\\]{}%$/?!]', "", sentence.lower())
        words = sentence.split()
            
        matrix = [[set() for x in range(len(words))] for y in range(len(words))]
 
        for i in range(len(words)):
            y = 0
            x = i
            for j in range(i, len(words)):
                if x == y:
                    word = words[x]
                    for c in self.parseWord(word):
                        matrix[y][x].add(c)
                    #RIGHT HERE, I NEED TO IMPLEMENT A QUEUE TO TAKE THE COMBINED THINGS FROM THE ABOVE PARSEWORD CALL AND PARSE THINGS AGAIN
                totalOffset = x - y
                for offset in range(totalOffset):
                    for c in grammar.parse(matrix[y][x-totalOffset+offset], matrix[y+offset+1][x]):
                        matrix[y][x].add(c)
                x += 1
                y += 1
        for y in range(len(words)):
            for x in range(len(words)):
                print(matrix[y][x], end="\t")
            print("")
        return matrix[0][len(words)-1]       

In [None]:
import enum

class POS(enum.Enum):
    """
    This class contains enumerated types corresponding to syntactic attributes of words and phrases and can be
    used to build grammars
    """
    #types
    T = 0 #Tense
    N = 1 #Noun
    V = 2 #Verb
    Adj = 3 #Adjective
    Adv = 4 #Adverb
    P = 5 #Preposition
    Det = 6 #Determiner
    #levels
    Bar = 7 #Bar'
    Phrase = 8 #Phrase
    #add more specific information
    Transitive = 9#Verbs
    Intransitive = 10
    Ditransitive = 13 #Not sure how to implement this in a binary tree at the moment...
    Proper = 11#Nouns
    Pro = 12
    Mass = 14
    Count = 15
    Plural = 16


In [None]:
class Constituent:
    def __init__(self, word = None, left = None, right = None, pos = None):
        self.word = word
        self.left = left
        self.right = right
        if isinstance(pos, tuple):
            self.pos = pos
        else:
            self.pos = (pos,)
    def __str__(self):
        if self.word != None:
            return "[" + self.formatPos() + " " + self.word + "]"
        if self.right == self.left:
            return "[" + self.formatPos() + " " + str(self.left) + "]"
        return "[" + self.formatPos() + " " + str(self.left) + " " + str(self.right) + "]"
    def __repr__(self):
        return self.__str__()
    def formatPos(self):
        s = str()
        for p in self.pos:
            if p == POS.N:
                s = s + "N"
            elif p == POS.V:
                s = s + "V"
            elif p == POS.T:
                s = s + "T"
            elif p == POS.Det:
                s = s + "Det"
            elif p == POS.P:
                s = s + "P"
            if p == POS.Phrase:
                s = s + "P"
            elif p == POS.Bar:
                s = s + "'"
        return s

In [None]:
class Grammar:
    
    def __init__(self):
        self.uniGrammar = {}
        self.binGrammar = {}
        self.lexicon = {}
    def add(self, i, o):
        if isinstance(i, str):
            if not isinstance(o, set):
                o = [o]
            self.addLexicon(i, o)
        elif isinstance(i, tuple):
            if [type(e) for e in i] == [tuple, tuple]:
                self.addBin(i, o)
            else:
                self.addUni(i, o)
    def addBin(self, constituents, phrase):
        #print("adding mapping to binGrammar:", constituents, "->", phrase)
        self.binGrammar[constituents] = phrase
    def addUni(self, constituent, phrase):
        #print("adding mapping to uniGrammar:", constituent, "->", phrase)
        self.uniGrammar[constituent] = phrase
    def addLexicon(self, word, pos):
        #print("adding mapping to lexicon:", word, "->", pos)
        if word.lower() not in self.lexicon:
            self.lexicon[word.lower()] = set()
        for p in pos:
            self.lexicon[word.lower()].add(p)
    
    def parseWord(self, word):
        parses = []
        typeList = None
        if word.lower() in self.lexicon:
            typeList = self.lexicon[word.lower()]
        if typeList != None:
            for c in typeList:
                parses.append(Constituent(pos = c, word = word));
        return parses
    def parse(self, lefts, rights):
        parses = []
        if lefts == rights:
            for left in lefts:
                #print(left, "->")#~~~~~~~~~~~~~~~ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
                c = None
                if left.pos in self.uniGrammar:
                    c = self.uniGrammar[left.pos]
                if c != None:
                    
                    constituent = Constituent(pos = c, left = left, right = left)
                    parses.append(constituent)
        else:
            for left in lefts:
                for right in rights:
                    #print(left, "+", right, "->")#~~~~~~~~~~~~~~~~~~~
                    c = None
                    if (left.pos, right.pos) in self.binGrammar:
                        c = self.binGrammar[(left.pos, right.pos)]
                    if c != None:
                        constituent = Constituent(pos = c, left = left, right = right)
                        #print(constituent)
                        parses.append(constituent)
        #print(str(parses))
        return parses
       

In [None]:
class Grammars:
    @staticmethod
    def simple():
        g = Grammar()
        g.add("this", (POS.N, POS.Phrase))
        g.add("is", (POS.V,))
        g.add("syntax", (POS.N, POS.Phrase))
        g.add(((POS.V,), (POS.N, POS.Phrase)), (POS.V, POS.Phrase))
        g.add(((POS.N, POS.Phrase), (POS.V, POS.Phrase)), (POS.T, POS.Phrase))
        return g
        
    @staticmethod
    def demo():
        g = Grammar()
        #lexicon
        g.add("I", (POS.N, POS.Pro))#Pronouns
        g.add("me", (POS.N, POS.Pro))
        g.add("we", (POS.N, POS.Pro))
        g.add("us", (POS.N, POS.Pro))
        g.add("you", (POS.N, POS.Pro))
        g.add("he", (POS.N, POS.Pro))
        g.add("him", (POS.N, POS.Pro))
        g.add("she", (POS.N, POS.Pro))
        g.add("her", (POS.N, POS.Pro))
        g.add("they", (POS.N, POS.Pro))
        g.add("them", (POS.N, POS.Pro))
        g.add("saw", (POS.N, POS.Count))#Count Nouns
        g.add("dog", (POS.N, POS.Count))
        g.add("sky", (POS.N, POS.Count))
        g.add("car", (POS.N, POS.Count))
        g.add("cat", (POS.N, POS.Count))
        g.add("person", (POS.N, POS.Count))
        g.add("saws", (POS.N, POS.Plural))#Plural Nouns
        g.add("dogs", (POS.N, POS.Plural))
        g.add("cars", (POS.N, POS.Plural))
        g.add("cats", (POS.N, POS.Plural))
        g.add("people", (POS.N, POS.Plural))
        g.add("water", (POS.N, POS.Mass))#Mass Nouns
        g.add("go", (POS.V, POS.Intransitive))#Intransitive Verbs
        g.add("saw", (POS.V, POS.Transitive))#Transitive Verbs
        g.add("have", (POS.V, POS.Transitive))
        g.add("to", (POS.P,))#Pronouns
        g.add("through", (POS.P,))
        g.add("from", (POS.P,))
        g.add("with", (POS.P,))
        g.add("the", (POS.Det,))#Determiners
        g.add("an", (POS.Det,))
        g.add("a", (POS.Det,))
        #syntax - unary
        g.add((POS.N, POS.Pro), (POS.Det, POS.Phrase))#Nouns
        g.add((POS.N, POS.Pro), (POS.N, POS.Phrase))
        g.add((POS.N, POS.Proper), (POS.Det, POS.Phrase))
        g.add((POS.N, POS.Proper), (POS.N, POS.Phrase))
        g.add((POS.N, POS.Mass), (POS.N, POS.Bar))
        g.add((POS.N, POS.Plural), (POS.N, POS.Bar))
        g.add((POS.N, POS.Phrase), (POS.Det, POS.Phrase))
        g.add((POS.N, POS.Count), (POS.N, POS.Count, POS.Bar))
        g.add((POS.N, POS.Bar), (POS.N, POS.Phrase))
        g.add((POS.V, POS.Intransitive), (POS.V, POS.Bar))#Verbs
        g.add((POS.V, POS.Bar), (POS.V, POS.Phrase))
        g.add((POS.Adj,), (POS.Adj, POS.Bar))#Adjectives
        g.add((POS.Adj, POS.Bar), (POS.Adj, POS.Phrase))
        g.add((POS.Adv,), (POS.Adv, POS.Bar))#Adverb
        g.add((POS.Adv, POS.Bar), (POS.Adv, POS.Phrase))
        g.add((POS.Det, POS.Bar), (POS.Det, POS.Phrase))#Determiners
        g.add((POS.P, POS.Bar), (POS.P, POS.Phrase))#Prepositions
        #syntax - binary
        g.add(((POS.Det, POS.Phrase), (POS.V, POS.Phrase)), (POS.T, POS.Phrase))#Sentence
        g.add(((POS.Adv, POS.Phrase), (POS.Adj)), (POS.Adj, POS.Bar))#AdvP + Adj -> Adj' 
        g.add(((POS.Adv, POS.Phrase), (POS.Adv)), (POS.Adv, POS.Bar))#AdvP + Adv -> Adv' 
        g.add(((POS.Adj, POS.Phrase), (POS.N, POS.Count, POS.Bar)), (POS.N, POS.Count, POS.Bar)) #Nouns
        g.add(((POS.N, POS.Count, POS.Bar), (POS.P, POS.Phrase)), (POS.N, POS.Count, POS.Bar))
        g.add(((POS.Adj, POS.Phrase), (POS.N, POS.Bar)), (POS.N, POS.Bar))
        g.add(((POS.N, POS.Bar), (POS.P, POS.Phrase)), (POS.N, POS.Bar))
        g.add(((POS.V, POS.Transitive), (POS.Det, POS.Phrase)), (POS.V, POS.Bar))#Verbs
        g.add(((POS.V, POS.Bar), (POS.Adv, POS.Phrase)), (POS.V, POS.Bar))
        g.add(((POS.V, POS.Bar), (POS.P, POS.Phrase)), (POS.V, POS.Bar))
        g.add(((POS.Det,), (POS.N, POS.Phrase)), (POS.Det, POS.Bar))#Determiners
        g.add(((POS.P,), (POS.Det, POS.Phrase)), (POS.P, POS.Bar))#Prepositions
        return g

In [10]:
def parse(sentence, grammar):
    words = sentence.split()
    
    matrix = [[set() for x in range(len(words))] for y in range(len(words))]
    
    for i in range(len(words)):
        y = 0
        x = i
        for j in range(i, len(words)):
            if x == y:
                word = words[x]
                for c in grammar.parseWord(word):
                    matrix[y][x].add(c)
                #RIGHT HERE, I NEED TO IMPLEMENT A QUEUE TO TAKE THE COMBINED THINGS FROM THE ABOVE PARSEWORD CALL AND PARSE THINGS AGAIN
            totalOffset = x - y
            for offset in range(totalOffset):
                for c in grammar.parse(matrix[y][x-totalOffset+offset], matrix[y+offset+1][x]):
                    matrix[y][x].add(c)
            x += 1
            y += 1
    for y in range(len(words)):
        for x in range(len(words)):
            print(matrix[y][x], end="\t")
        print("")
    return matrix[0][len(words)-1]



 



In [15]:
parse("This is syntax", Grammars.simple())

{[NP This]}	set()	{[TP [NP This] [VP [V is] [NP syntax]]]}	
set()	{[V is]}	{[VP [V is] [NP syntax]]}	
set()	set()	{[NP syntax]}	


{[TP [NP This] [VP [V is] [NP syntax]]]}