<a href="https://colab.research.google.com/github/AjiteshMahalingam/100-Days-of-Code/blob/main/GoGrammar_Final.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [1]:
pip install pyspellchecker



In [2]:
pip install gingerit



In [3]:
from gingerit.gingerit import GingerIt
from spellchecker import SpellChecker 

## Construction of English Grammar

Input : English Grammar specifications file

Output : Grammar object

-> English Grammar string processing

-> Construction of Grammar class

In [4]:
# READING GRAMMAR SPECIFICATIONS

inputFile = open('grammar.txt', 'r')
inputText = inputFile.read()
grammarRules = [ rule.strip() for rule in inputText.split("\n")]
grammarRules

['S -> NP_SG PAST S1 | NP_SG PAST_PERFECT S1 | NP_SG PAST_CONT_SG S1 | NP_SG PAST_PERFECT_CONT S1 | NP_SG PRES_SG S1 | NP_SG PRES_PERFECT_SG S1 | NP_SG PRES_CONT_SG S1 | NP_SG PRES_PERFECT_CONT_SG S1 | NP_SG FUTURE S1 | NP_SG FUTURE_PERFECT S1 | NP_SG FUTURE_CONT S1 | NP_SG FUTURE_PERFECT_CONT S1 | NP_PL PAST S1 | NP_PL PAST_PERFECT S1 | NP_PL PAST_CONT_PL S1 | NP_PL PAST_PERFECT_CONT S1 | NP_PL PRES_PL S1  | NP_PL PRES_PERFECT_PL S1 | NP_PL PRES_CONT_PL S1 | NP_PL PRES_PERFECT_CONT_PL S1 | NP_PL FUTURE S1 | NP_PL FUTURE_PERFECT S1 | NP_PL FUTURE_CONT S1 | NP_PL FUTURE_PERFECT_CONT S1 | S ADV',
 'S1 -> NP | ADV',
 'NP -> NP_SG | NP_PL | PP | NP PP',
 'NP_SG ->  PRONOUN_SG | PRONOUN_SG NOM_SG| PROPERNOUN | NOM_SG | DET_SG NOM_SG | DET_BOTH NOM_SG | ADV NP_SG | NP_SG CONJ NP',
 'NOM_SG -> NOUN_SG | ADJ NOUN_SG | NOM_SG NOUN_SG',
 'NP_PL -> PRONOUN_PL | PRONOUN_PL NOM_PL| NOM_PL | DET_PL NOM_PL | DET_BOTH NOM_PL | ADV NP_PL | NP_PL CONJ NP',
 'NOM_PL -> NOUN_PL | ADJ NOUN_PL | NOM_PL NOUN

In [5]:
class Grammar :
  def __init__(self, grammarStr) :
    self.input = grammarStr
    self.symbols = set()
    self.terminals = set()
    self.nonterminals = set()
    self.startSymbol = None
    self.grammar = {}

    for production in grammarStr :
      # The partition() method searches for a specified string, and splits the string into a tuple containing three elements.
      head, _, bodies = production.partition(' -> ')
      
      # The nonterminal of the first production is considered to be the start symbol
      if self.startSymbol is None :
        self.startSymbol = head

      self.grammar.setdefault(head, set())
      self.nonterminals.add(head)

      bodies = {tuple(body.strip().split(" ")) for body in ' '.join(bodies.split(" ")).split('|') }

      for body in bodies :
        self.grammar[head].add(body)
        for symbol in body :
          if symbol.isupper() :
            self.nonterminals.add(symbol)
          else :
            self.terminals.add(symbol)  
    
    self.symbols = self.nonterminals | self.terminals

  def __str__ (self) :
    print("Start Symbol : " , self.startSymbol)
    print("Non-terminals : " , self.nonterminals)
    print("Terminals : " , self.terminals)
    print("Productions : ")
    for head in self.grammar :
      for body in self.grammar[head] :
        print(head, "->", body)
    return ""

In [6]:
G = Grammar(grammarRules)
print(G)
properNouns = set()
for pn in G.grammar["PROPERNOUN"]:
  for noun in pn :
    properNouns.add(noun)

Start Symbol :  S
Non-terminals :  {'RELPR_PL', 'NP_PL', 'PRES_PERFECT_CONT_PL', 'FUTURE', 'FUTURE_PERFECT_CONT', 'RELPR_SG', 'NOM_PL', 'DET_PL', 'CONJ', 'NP_SG', 'PP', 'DET_BOTH', 'NP', 'NOUN_PL', 'V3', 'GERUNDV', 'ADJ', 'S', 'ADV', 'PRES_PERFECT_SG', 'V2', 'PAST_PERFECT', 'PROPERNOUN', 'PREPOS', 'PRES_PERFECT_PL', 'V1_PL', 'NOM_SG', 'PAST_PERFECT_CONT', 'PRONOUN_PL', 'PAST_CONT_SG', 'V1_SG', 'NOUN_SG', 'PAST', 'PRES_CONT_SG', 'S1', 'PRES_SG', 'PRES_CONT_PL', 'FUTURE_CONT', 'FUTURE_PERFECT', 'DET_SG', 'PRONOUN_SG', 'PAST_CONT_PL', 'PRES_PERFECT_CONT_SG', 'PRES_PL'}
Terminals :  {'he', 'canada', 'an', 'patient', 'room', 'twice', 'turkey', 'made', 'night', 'jacksons', 'by', 'ritheesh', 'may', 'roses', 'received', 'fire', 'sweet', 'bed', 'album', 'likes', 'wednesday', 'baby', 'done', 'understood', 'two', 'am', 'my', 'emails', 'times', 'mom', 'in', 'tomorrow', 'every', 'brother', 'it', 'red', 'phone', 'outlet', 'november', 'accountant', 'boys', 'garden', 'specialists', 'many', 'sister', '

## Construction of SLR(1) Parser Table

Input : English Grammar Object

Output : SLR(1) Parsing table


-> First and Follow

-> Canonical collection of LR(0) items

-> Building Parser table


In [7]:
# Generating first and follow sets

def first_follow(G) :
  def union(set1, set2):
    set1len = len(set1)
    set1 |= set2
    return set1len != len(set1)

  # initialising the data structures to store and first and follow values
  first = { symbol: set() for symbol in G.symbols }
  follow = { symbol: set() for symbol in G.symbols }

  for symbol in G.symbols :
    if symbol in G.terminals :
      first[symbol].add(symbol)
  
  follow[G.startSymbol].add('$')

  while True :
    updated = False

    for head, bodies in G.grammar.items() :
      for body in bodies :
        for symbol in body :
          if symbol != '^':
            updated |= union(first[head], first[symbol] - set('^'))
            if '^' not in first[symbol] :
              break
            else :
              updated |= union(first[head], set('^'))
        aux = follow[head]
        for symbol in reversed(body) :
          if symbol == '^':
            continue
          if symbol in follow:
            updated |= union(follow[symbol], aux - set('^'))
          if '^' in first[symbol] :
            aux = aux | first[symbol]
          else :
            aux = first[symbol]
    if not updated:
      return first, follow


In [8]:
# Construction of LR(0) items

# Computation of set of items constructed from I
# I - set of items for G
def CLOSURE(aug_G, I):
  J = I
  
  while True :
    item_len = len(J)
    for head, bodies in J.copy().items() :
      for body in bodies.copy():
        if '.' in body[:-1]:
          dot_pos = body.index('.')
          symbol_after_dot = body[dot_pos + 1]
          if symbol_after_dot in aug_G.nonterminals :
            for G_body in aug_G.grammar[symbol_after_dot]:
              if G_body == tuple('^'):
                J.setdefault(symbol_after_dot, set()).add(set('.'))
              else :
                J.setdefault(symbol_after_dot, set()).add(('.',) + G_body)
    if item_len == len(J):
      return J

# GOTO(I, X) defined to be the closure of the set of all items [A -> aX.b] such that [A -> a.Xb] is in I
# I - set of items
# X - grammar symbol
def GOTO(aug_G, I, X):
  goto = {}

  for head, bodies in I.items() :
    for body in bodies :
      if '.' in body[:-1]:
        dot_pos = body.index('.')
        if body[dot_pos + 1] == X :
          replaced_dot_body = body[ : dot_pos] + (X, '.') + body[dot_pos + 2: ]
          for C_head, C_bodies in CLOSURE(aug_G, {head: {replaced_dot_body}}).items():
            goto.setdefault(C_head, set()).update(C_bodies)
  
  return goto

# Construction of canonical collection of sets of LR(0) items for G'
def items(aug_G):
  C = [CLOSURE(aug_G, {aug_G.startSymbol : {('.', aug_G.startSymbol[:-1])}})]

  while True:
    item_len = len(C)
    for I in C.copy() :
      for X in aug_G.symbols:
        goto = GOTO(aug_G, I, X)
        if (len(goto) > 0) and goto not in C:
          C.append(goto)
    if item_len == len(C) :
      return C


In [9]:
# Construction of SLR(1) Parsing table

class SLRParser:
  def __init__(self, G):
    self.aug_G = Grammar([f"{G.startSymbol}' -> {G.startSymbol}"] + G.input)
    self.max_aug_G_len = len(max(self.aug_G.grammar, key=len))

    self.G_indexed = []
    for head, bodies in self.aug_G.grammar.items():
      for body in bodies:
        self.G_indexed.append([head, body])
    
    self.first, self.follow = first_follow(self.aug_G)

    self.C = items(self.aug_G)
    
    self.action = list(self.aug_G.terminals) + ['$']
    self.goto = []
    for nonterminal in self.aug_G.nonterminals :
      if nonterminal != self.aug_G.startSymbol :
        self.goto.append(nonterminal)
    
    self.parse_table_symbols = self.action + self.goto
    self.parse_table = self.construct_table()
  
  def construct_table(self):
    # Step 1 - Construct C
    # Step 2 - parsing actions for state i are determined
    parse_table = {r: {c: '' for c in self.parse_table_symbols} for r in range(len(self.C))}

    for i, I in enumerate(self.C) :
      for head, bodies in I.items() :
        for body in bodies :
          if '.' in body[:-1] : # Case 2a
            dot_pos = body.index('.')
            symbol_after_dot = body[dot_pos + 1]

            if symbol_after_dot in self.aug_G.terminals :
              s = f's{self.C.index(GOTO(self.aug_G, I, symbol_after_dot))}'
              if s not in parse_table[i][symbol_after_dot]:
                if 'r' in parse_table[i][symbol_after_dot] :
                  parse_table[i][symbol_after_dot] += '/'
                parse_table[i][symbol_after_dot] += s
          
          elif body[-1] == '.' and head != self.aug_G.startSymbol : # Case 2b
            for j, (G_head, G_body) in enumerate(self.G_indexed) :
              if G_head == head and (G_body == body[:-1] or G_body == ('^',) and body == ('.',)):
                for f in self.follow[head] :
                  if parse_table[i][f] :
                    parse_table[i][f] += '/'
                  parse_table[i][f] += f'r{j}'

                break

          else : # Case 2c
            parse_table[i]['$'] = 'acc'
      
      # Step 3 - Goto transistions for all nonterminals are constructed
      for A in self.aug_G.nonterminals :
        j = GOTO(self.aug_G, I, A)
        if j in self.C :
          parse_table[i][A] = self.C.index(j)

    return parse_table

  def __str__ (self) :
    def fprint(text, variable) :
      print(f'{text:>12}: {", ".join(variable)}')
    def print_line() :
      print(f'+{("-" * width + "+") * (len(list(self.aug_G.symbols) + ["$"]))}')
    def symbols_width(symbols) :
      return (width + 1) * len(symbols) - 1

    print("Augmented Grammar : ")
    for i, (head, body) in enumerate(self.G_indexed):
      print(f'{i:>{len(str(len(self.G_indexed) - 1))}}: {head:>{self.max_aug_G_len}} -> {" ".join(body)}')
    print()
    fprint('TERMINALS', self.aug_G.terminals)
    fprint('NONTERMINALS', self.aug_G.nonterminals)
    fprint('SYMBOLS', self.aug_G.symbols)

    print('\nFIRST:')
    for head in self.aug_G.grammar:
      print(f'{head:>{self.max_aug_G_len}} = {{ {", ".join(self.first[head])} }}')

    print('\nFOLLOW:')
    for head in self.aug_G.grammar:
      print(f'{head:>{self.max_aug_G_len}} = {{ {", ".join(self.follow[head])} }}')

    width = max(len(c) for c in {'ACTION'} | self.aug_G.symbols) + 2
    for r in range(len(self.C)):
      max_len = max(len(str(c)) for c in self.parse_table[r].values())

      if width < max_len + 2:
        width = max_len + 2

    print('\nPARSING TABLE:')
    print(f'+{"-" * width}+{"-" * symbols_width(self.action)}+{"-" * symbols_width(self.goto)}+')
    print(f'|{"":{width}}|{"ACTION":^{symbols_width(self.action)}}|{"GOTO":^{symbols_width(self.goto)}}|')
    print(f'|{"STATE":^{width}}+{("-" * width + "+") * len(self.parse_table_symbols)}')
    print(f'|{"":^{width}}|', end=' ')

    for symbol in self.parse_table_symbols:
      print(f'{symbol:^{width - 1}}|', end=' ')

    print()
    print_line()

    for r in range(len(self.C)):
      print(f'|{r:^{width}}|', end=' ')

      for c in self.parse_table_symbols:
        print(f'{self.parse_table[r][c]:^{width - 1}}|', end=' ')

      print()

    print_line()
    print()
    return ""


In [10]:
slr = SLRParser(G)
print(slr)

Augmented Grammar : 
  0:                   S' -> S
  1:                    S -> NP_PL FUTURE S1
  2:                    S -> NP_PL FUTURE_PERFECT_CONT S1
  3:                    S -> NP_SG PAST_CONT_SG S1
  4:                    S -> NP_PL FUTURE_PERFECT S1
  5:                    S -> NP_PL PRES_PERFECT_CONT_PL S1
  6:                    S -> NP_SG PAST S1
  7:                    S -> NP_SG PRES_SG S1
  8:                    S -> NP_PL PRES_PL S1
  9:                    S -> NP_SG PAST_PERFECT S1
 10:                    S -> NP_PL FUTURE_CONT S1
 11:                    S -> NP_PL PAST_PERFECT_CONT S1
 12:                    S -> S ADV
 13:                    S -> NP_SG PRES_CONT_SG S1
 14:                    S -> NP_PL PAST_CONT_PL S1
 15:                    S -> NP_PL PAST S1
 16:                    S -> NP_SG PAST_PERFECT_CONT S1
 17:                    S -> NP_SG FUTURE_CONT S1
 18:                    S -> NP_SG PRES_PERFECT_SG S1
 19:                    S -> NP_PL PRES_PERFECT_PL

## Text Pre-processing

In [11]:
# READING INPUT FROM FILE

inputFile = open('input.txt', 'r')
inputText = inputFile.read()
text = inputText.replace('\n', ' ')
inputFile.close()
text

'She were writing a letter to Ishwarya. My teacher teaches well. She went to school. Raj is climbs a mountian. Raj plays fotball well. Mom cook delisious food. Navvya is a topper. Ishwarya is a kabbadi player. Ishwarya will sings a song in the show. These people is from Canada.'

In [12]:
# OUTPUT REPORT

outputLines = []
outputLines.append("ANALYSIS REPORT : \n\n")
outputLines.append("---------------------------------------------\n\n")

In [13]:
# SENTENCE TOKENIZATION

stop_Char=["!",".","?",";"]

sentences=[]

sentence=""
for ch in text :
  if ch not in stop_Char:
    sentence += ch
  else :
    sentence = sentence.strip()
    sentences.append(sentence)
    sentence=""  

sentenceCount = len(sentences) 
print("Sentence count : " + str(sentenceCount))

for sentence in sentences :
  print(sentence)
  

Sentence count : 10
She were writing a letter to Ishwarya
My teacher teaches well
She went to school
Raj is climbs a mountian
Raj plays fotball well
Mom cook delisious food
Navvya is a topper
Ishwarya is a kabbadi player
Ishwarya will sings a song in the show
These people is from Canada


In [14]:
# REMOVAL OF STOP CHARACTERS

reqSplChars = ["@", "&", "(", ")", "-"]
splChars = ["#", "%", "&", "^", "$", "*", "`", "+"]
punctuation = [",", ".", "!"]

exclude = splChars + punctuation
cleanSentences=[]
for sentence in sentences :
  cleantext=""
  for ch in sentence :
    if ch not in exclude:
      cleantext += ch
  cleanSentences.append(cleantext)

for cleanSentence in cleanSentences :
  print(cleanSentence)


She were writing a letter to Ishwarya
My teacher teaches well
She went to school
Raj is climbs a mountian
Raj plays fotball well
Mom cook delisious food
Navvya is a topper
Ishwarya is a kabbadi player
Ishwarya will sings a song in the show
These people is from Canada


In [15]:
# WORD TOKENIZATION

wordsArray = []
wordsCount = []
for cleanText in cleanSentences :
  words = []
  word = ""
  i = 0
  while(i < len(cleanText)):
    if(cleanText[i] == " "):
      i += 1
    else:
      word = ""
      while(i < len(cleanText) and cleanText[i] != " "):
        word += cleanText[i]
        i += 1
      if(len(word) > 0):
        words.append(word)
  wordsArray.append(words)
  wordsCount.append(len(words))

totalWordCount = sum(wordsCount)

for i in range(sentenceCount) :
  print(wordsArray[i])
  print("Word count : " + str(wordsCount[i]))
  print()

print("Total word count : " + str(totalWordCount))

['She', 'were', 'writing', 'a', 'letter', 'to', 'Ishwarya']
Word count : 7

['My', 'teacher', 'teaches', 'well']
Word count : 4

['She', 'went', 'to', 'school']
Word count : 4

['Raj', 'is', 'climbs', 'a', 'mountian']
Word count : 5

['Raj', 'plays', 'fotball', 'well']
Word count : 4

['Mom', 'cook', 'delisious', 'food']
Word count : 4

['Navvya', 'is', 'a', 'topper']
Word count : 4

['Ishwarya', 'is', 'a', 'kabbadi', 'player']
Word count : 5

['Ishwarya', 'will', 'sings', 'a', 'song', 'in', 'the', 'show']
Word count : 8

['These', 'people', 'is', 'from', 'Canada']
Word count : 5

Total word count : 50


In [16]:
# Spell Check

spell = SpellChecker()

correctedWordsArray = []
misspelledCount = []
misspelledResult = []
for words in wordsArray : 
  count = 0
  misspelled = spell.unknown(words)
  
  if(len(misspelled) > 0):
    print(words)
    for word in misspelled:
        if(word not in properNouns) :
          count += 1
          print("Word : " + word)
          print("Best match : " + str(spell.correction(word)))
          print("Possible matches : " + str(spell.candidates(word)))
          misspelledResult.append("Misspelled word : {}\nBest match : {}\nPossible matches : {}\n\n".format(word, str(spell.correction(word)), str(spell.candidates(word))))
    print("Misspelled word count : " + str(count))
    misspelledCount.append(count)
    print()
    
    # Correcting the misspelled words
    correctedWords = []
    for word in words :
      if word in misspelled :
        correctedWords.append((spell.correction(word)).lower())
      else :
        correctedWords.append(word.lower())
    correctedWordsArray.append(correctedWords)
  else :
    correctedWords = [word.lower() for word in words]
    correctedWordsArray.append(correctedWords)
    print(words)
    print("Misspelled word count : " + str(len(misspelled)))
    print()


['She', 'were', 'writing', 'a', 'letter', 'to', 'Ishwarya']
Misspelled word count : 0

['My', 'teacher', 'teaches', 'well']
Misspelled word count : 0

['She', 'went', 'to', 'school']
Misspelled word count : 0

['Raj', 'is', 'climbs', 'a', 'mountian']
Word : mountian
Best match : mountain
Possible matches : {'mountain'}
Misspelled word count : 1

['Raj', 'plays', 'fotball', 'well']
Word : fotball
Best match : football
Possible matches : {'football', 'fooball', 'ootball'}
Misspelled word count : 1

['Mom', 'cook', 'delisious', 'food']
Word : delisious
Best match : delicious
Possible matches : {'delicious', 'delirious'}
Misspelled word count : 1

['Navvya', 'is', 'a', 'topper']
Misspelled word count : 0

['Ishwarya', 'is', 'a', 'kabbadi', 'player']
Word : kabbadi
Best match : kabaddi
Possible matches : {'kabaddi'}
Misspelled word count : 1

['Ishwarya', 'will', 'sings', 'a', 'song', 'in', 'the', 'show']
Misspelled word count : 0

['These', 'people', 'is', 'from', 'Canada']
Misspelled word

In [17]:
totalMispelledCount = sum(misspelledCount)
print("Total misspelled words : " + str(totalMispelledCount))

print("Array of words with correct spelling : ")
for words in correctedWordsArray :
  print(words)
  

Total misspelled words : 4
Array of words with correct spelling : 
['she', 'were', 'writing', 'a', 'letter', 'to', 'ishwarya']
['my', 'teacher', 'teaches', 'well']
['she', 'went', 'to', 'school']
['raj', 'is', 'climbs', 'a', 'mountain']
['raj', 'plays', 'football', 'well']
['mom', 'cook', 'delicious', 'food']
['navvya', 'is', 'a', 'topper']
['ishwarya', 'is', 'a', 'kabaddi', 'player']
['ishwarya', 'will', 'sings', 'a', 'song', 'in', 'the', 'show']
['these', 'people', 'is', 'from', 'canada']


In [18]:
outputLines.append("Sentence Count : {}\n".format(sentenceCount))
outputLines.append("Word Count : {}\n".format(totalWordCount))
outputLines.append("Misspelled Count : {}\n\n".format(totalMispelledCount))
outputLines.append("---------------------------------------------\n\n")
for result in misspelledResult :
  outputLines.append(result)
outputLines.append("---------------------------------------------\n\n")

## Evaluation of Grammar Correctness

Input : SLR(1) Parsing table, input sentences

Output : Analysis report of the input


-> Stack immplementation of input sentence

-> Semantic analysis using NLP

-> Genration of report


In [19]:
# Stack implementaion of input strings

def LR_parser(slr, w):
  isAccepted = False
  buffer = [x for x in w]
  buffer.append('$')
  pointer = 0
  a = buffer[pointer]
  stack = ['0']
  symbols = ['']
  results = {'step': [''], 'stack': ['STACK'] + stack, 'symbols': ['SYMBOLS'] + symbols, 'input': ['INPUT'], 'action': ['ACTION']}

  step = 0
  while True:
    s = int(stack[-1])
    step += 1
    results['step'].append(f'({step})')
    results['input'].append(' '.join(buffer[pointer:]))

    if a not in slr.parse_table[s]:
      results['action'].append(f'ERROR: unrecognized symbol {a}')
      isAccepted = False
      break

    elif not slr.parse_table[s][a]:
      results['action'].append('ERROR: input cannot be parsed by given grammar')
      isAccepted = False
      break

    elif '/' in slr.parse_table[s][a]:
      action = 'reduce' if slr.parse_table[s][a].count('r') > 1 else 'shift'
      results['action'].append(f'ERROR: {action}-reduce conflict at state {s}, symbol {a}')
      isAccepted = False
      break

    elif slr.parse_table[s][a].startswith('s'):
      results['action'].append('shift')
      stack.append(slr.parse_table[s][a][1:])
      symbols.append(a)
      results['stack'].append(' '.join(stack))
      results['symbols'].append(' '.join(symbols))
      pointer += 1
      a = buffer[pointer]

    elif slr.parse_table[s][a].startswith('r'):
      head, body = slr.G_indexed[int(slr.parse_table[s][a][1:])]
      results['action'].append(f'reduce by {head} -> {" ".join(body)}')

      if body != ('^',):
        stack = stack[:-len(body)]
        symbols = symbols[:-len(body)]

      stack.append(str(slr.parse_table[int(stack[-1])][head]))
      symbols.append(head)
      results['stack'].append(' '.join(stack))
      results['symbols'].append(' '.join(symbols))

    elif slr.parse_table[s][a] == 'acc':
      results['action'].append('accept')
      isAccepted = True
      break

  # Printing results
  def print_line():
    print(f'{"".join(["+" + ("-" * (max_len + 2)) for max_len in max_lens.values()])}+')

  max_lens = {key: max(len(value) for value in results[key]) for key in results}
  justs = {'step': '>', 'stack': '', 'symbols': '', 'input': '>', 'action': ''}

  print_line()
  print(''.join([f'| {history[0]:^{max_len}} ' for history, max_len in zip(results.values(), max_lens.values())]) + '|')
  print_line()
  for i, step in enumerate(results['step'][:-1], 1):
    print(''.join([f'| {history[i]:{just}{max_len}} ' for history, just, max_len in zip(results.values(), justs.values(), max_lens.values())]) + '|')

  print_line()

  return isAccepted


In [20]:
incorrectSentenceCount = 0
incorrectSentenceResult = []
for input_text in correctedWordsArray :
  isAccepted = LR_parser(slr, input_text)
  # Performing semantic analysis  using nlp for grammar corrections
  if(not isAccepted) :
    print("Correction : ")
    correction = GingerIt().parse(" ".join(input_text))['result']
    print(correction)
    incorrectSentenceCount += 1
    incorrectSentenceResult.append("Incorrect sentence : {}\nCorrection : {}\n\n".format(" ".join(input_text), correction))

+-----+---------+-------------+-----------------------------------------+------------------------------------------------+
|     |  STACK  |   SYMBOLS   |                  INPUT                  |                     ACTION                     |
+-----+---------+-------------+-----------------------------------------+------------------------------------------------+
| (1) | 0       |             | she were writing a letter to ishwarya $ | shift                                          |
| (2) | 0 67    |  she        |     were writing a letter to ishwarya $ | reduce by PRONOUN_SG -> she                    |
| (3) | 0 48    |  PRONOUN_SG |     were writing a letter to ishwarya $ | reduce by NP_SG -> PRONOUN_SG                  |
| (4) | 0 9     |  NP_SG      |     were writing a letter to ishwarya $ | shift                                          |
| (5) | 0 9 214 |  NP_SG were |          writing a letter to ishwarya $ | ERROR: input cannot be parsed by given grammar |
+-----+---------

In [21]:
outputLines.append("Incorrect sentences count : {}\n\n".format(incorrectSentenceCount))
outputLines.append("---------------------------------------------\n\n")
for result in incorrectSentenceResult :
  outputLines.append(result)
outputLines.append("---------------------------------------------\n\n")

In [22]:
# WRITING RESULT TO THE OUTPUT FILE

outputFile = open("output.txt", "w")
for line in outputLines:
  outputFile.write(line)
outputFile.close()