In [1]:
import stanfordnlp
from pattern.en import suggest
from pattern.en import referenced
import codecs
from nltk.tokenize import word_tokenize
from nltk.tokenize import sent_tokenize
from pattern.en import conjugate
from pattern.en import pluralize, singularize
from nltk import download
from os import path

In [2]:
def CapitalizeError(text,nlp,correctFlag=False):
    '''
    Purpose: To check if text has errors due to capitalization. 
             Additionally, it returns corrected sentence.
    
    Parameters: text: string
                    A string of text-single or a paragraph.
                    
                correctFlag:boolean 
                   True or False
                    
    Returns: count: integer  
             text: Corrected sentence. (If correctFlag is True)
    '''
    doc = nlp(text)
    count=0
    text=""
    for sen in doc.sentences:
        if sen.words[0].text.islower():
            count+=1
            text+=sen.words[0].text.capitalize()
            text+=" "
        else:
            text+=sen.words[0].text
            text+=" "
        for i in range(1,len(sen.words)):
            if sen.words[i].upos=="PROPN" or sen.words[i].upos=="NNS":
                if sen.words[i].text.islower():
                    count+=1
                    text+=sen.words[i].text.capitalize()
                    text+=" "
                else:
                    text+=sen.words[i].text
                    text+=" "
            elif sen.words[i].text=="i":
                count+=1
                text+=sen.words[i].text.capitalize()
                text+=" "
            else:
                text+=sen.words[i].text.lower()
                text+=" "
    if correctFlag==True:
        return count,text
    else:
        return count

In [3]:
def checkSpellingError(text,nlp,correctFlag=False):
    '''
    Purpose: To check if text has errors due to wrong spellings.
             Additionally, it returns corrected sentence.
    
    Parameters: text: string
                    A string of text-single or a paragraph.
                
                correctFlag:boolean 
                   True or False
                    
    Returns: count: integer  
             text: Corrected sentence. (If correctFlag is True)
    '''
    doc = nlp(text)
    count=0
    text=""
    for sen in doc.sentences:
        for word in sen.words:
            #print(word.text.lower())
            l=["'s","n't","'ll"]
            try:
                sugList=suggest(word.text.lower())
            except:
                sugList = []
                l.append(word.text.lower())
            for k in sugList:
                l.append(k[0])
            if (word.text.lower() in l) or (word.lemma in l):
                text+=word.text
                text+=" "
                continue
            else:
                count+=1
                text+=sugList[0][0]
                text+=" "
    if correctFlag==True:
        return count,text
    else:
        return count

In [4]:
def read_file(file):
    '''
    Purpose: Helper function: Read text files.
    
    Parameters: file: text file.
                    
    Returns: text: string format.
    '''    
    fp=codecs.open(file,"r",encoding='utf8',errors='ignore')
    text=fp.readlines()
    return text

def articleError(text,nlp,correctFlag=False):
    '''
    Purpose: To check if text has errors due to wrong article usage.
             Additionally, it returns corrected sentence.
             
    Parameters: text: string
                    A string of text-single or a paragraph.
                    
                correctFlag:boolean 
                   True or False
                    
    Returns: count: integer  
             text: Corrected sentence. (If correctFlag is True)
    '''
    path="uncNouns.txt"
    unc_text=read_file(path)
    unc_words=[]
    for i in unc_text:
        tokens=word_tokenize(i)
        unc_words.append(tokens[0].lower())
    doc = nlp(text)
    count=0
    ntext=""
    
    for s in doc.sentences:
        for t in range(len(s.words)):
            if s.words[t].text=='a' or s.words[t].text=='an':
                if ((s.words[t+1].text in unc_words) or s.words[t+1].xpos=="NNS" or s.words[t+1].xpos=="NNPS"):
                    count+=1
                elif (t<len(s.words)-2) and (s.words[t+1].xpos in ["JJ","JJR"]) and (s.words[t+2].xpos in ['NNP','NN']):
                    if (s.words[t].text=='a' and referenced(s.words[t+1].text)==('an '+s.words[t+1].text)):
                        ntext+='an'
                        count+=1
                    elif(s.words[t].text=='an' and referenced(s.words[t+1].text)==('a '+s.words[t+1].text)):
                        ntext+='a'
                        count+=1
                    else:
                        ntext+=s.words[t].text
                elif (s.words[t+1].xpos not in ["NNP","NN"] ):
                    count+=1
                elif(s.words[t].text=='a' and referenced(s.words[t+1].text)==('an '+s.words[t+1].text)):
                    ntext+='an'
                    count+=1
                elif(s.words[t].text=='an' and referenced(s.words[t+1].text)==('a '+s.words[t+1].text)):
                    ntext+='a'
                    count+=1
                else:
                    ntext+=s.words[t].text
                ntext+=" "
#             elif (t<len(s.words)-1) and (s.words[t].xpos in ["JJ","JJR"]) and (s.words[t+1].xpos in ['NNP','NN']):
#                 ntext+=referenced(s.words[t].text)+" "
            else:
                ntext+=s.words[t].text
                ntext+=" "
    if correctFlag==True:
        return count,ntext
    else:
        return count

In [5]:
def becauseError(text,nlp,correctFlag=False):
    '''
    Purpose: To check if text after using word 'because' incomplete sentence. 
             Additionally, it returns corrected sentence.
             
    Parameters: text: string
                    A string of text-single or a paragraph.
                    
                correctFlag:boolean 
                   True or False
                    
    Returns: count: integer  
             text: Corrected sentence. (If correctFlag is True)
    '''    
    doc = nlp(text)
    count=0
    text=""
    for s in doc.sentences:
        for i in range(len(s.words)):
            if s.words[i].text=='because':
                if s.words[i+1].upos=='PUNCT' or i==len(s.words)-1:
                    count+=1
                    text+='.'
                    break
                if s.words[i+1].xpos=='IN':
                    if i==len(s.words)-2:
                        count+=1
                    elif(s.words[i+2].xpos not in ['NN','NNS','NNP','NNPS','PRP','PRP$','DT']):
                        count+=1
                        text+="."
                        break
                    else:
                        text+=s.words[i].text
                        text+=" " 
                elif s.words[i+1].xpos in ['NN','NNS','NNP','NNPS','PRP','PRP$']:
                    if i==len(s.words)-2:
                        count+=1
                    flag=0
                    for j in range(i+2,len(s.words)):
                        if s.words[j].xpos in ['VB','VBP','VBZ','VBG','VBN','VBD','MD']:
                            flag+=1
                            break
                    if flag==0:
                        count+=1
                        text+="."
                        break    
                    else:
                        text+=s.words[i].text
                        text+=" "
                else:
                    text+=s.words[i].text
                    text+=" "
            else:
                text+=s.words[i].text
                text+=" "
    if correctFlag==True:
        return count,text
    else:
        return count

In [6]:
def apostropheError(text,nlp,correctFlag=False):
    '''
    Purpose: To check for apostophe errors. 
             Additionally, it returns corrected sentence.
             
    Parameters: text: string
                    A string of text-single or a paragraph.
                    
                correctFlag:boolean 
                   True or False
                    
    Returns: count: integer  
             text: Corrected sentence. (If correctFlag is True)
    '''    
    doc = nlp(text)
    count=0
    text=""
    suffixList=["m","re","s","nt","ll","ve","d"]
    for s in doc.sentences:
        for i in range(len(s.words)):
            if i<(len(s.words)-1) and s.words[i].xpos in ['NN','NNS','NNP','NNPS']:
                if s.words[i].text[-1]=='s':
                    if s.words[i+1].xpos in ['NN','NNS','NNP','NNPS']:
                        status=handleCompoundErrors(s.words[i].text,s.words[i+1].text)
                        if status==True:
                            text+=s.words[i].text+" "
                        else:
                            count+=1
                            text+=s.words[i].text
                            text+="' "
                    else:
                        text+=s.words[i].text
                        text+=" "
                else:
                    if s.words[i+1].xpos in ['NN','NNS','NNP','NNPS']:
                        status=handleCompoundErrors(s.words[i].text,s.words[i+1].text)
                        if status==True:
                            text+=s.words[i].text+" "
                        else:
                            count+=1
                            text+=s.words[i].text
                            text+="'s "
                    else:
                        text+=s.words[i].text
                        text+=" "
            elif s.words[i].text in suffixList:
                if s.words[i].text[0]!="'":
                    count+=1
                    text+="'"+s.words[i].text+" "
                else:
                    text+=s.words[i].text+" "
            else:
                text+=s.words[i].text+" "
                text+=" "
    if correctFlag==True:
        return count,text
    else:
        return count

In [7]:
def SubVerbAgreementError(text,nlp,correctFlag=False):
    '''
    Purpose: To check for errors due to subject-verb agreement error. 
             Additionally, it returns corrected sentence.
             
    Parameters: text: string
                    A string of text-single or a paragraph.
                    
                correctFlag:boolean 
                   True or False
                    
    Returns: count: integer  
             text: Corrected sentence. (If correctFlag is True)
    '''
    doc = nlp(text)
    count=0
    text=""
    for s in doc.sentences:
        for i in range(len(s.words)):
            try:
                if s.words[i].xpos=='NN' or s.words[i].xpos=='NNP':
                    if (i!=len(s.words)-1) and (s.words[i+1].xpos in ['VB','VBP','VBG']):
                        verbLemma=s.words[i+1].lemma
                        v=conjugate(verbLemma, tense = "present",person = 3, number = "singular", mood = "indicative",aspect = "imperfective",negated = False)
                        text+=s.words[i].text+" "
                        if s.words[i+1].text!=v:
                            count+=1
                            s.words[i+1].text=v
                        text+=" "
                    else:
                        text+=s.words[i].text
                        text+=" "
                elif (i!=len(s.words)-1) and (s.words[i].xpos=='NNS' or s.words[i].xpos=='NNPS'):
                    if s.words[i+1].xpos in ['VBG','VBZ']:
                        verbLemma=s.words[i+1].lemma
                        v=conjugate(verbLemma, tense = "present",person = 3, number = "plural", mood = "indicative",aspect = "imperfective",negated = False)
                        text+=s.words[i].text+" "
                        if s.words[i+1].text!=v:
                            count+=1
                            s.words[i+1].text=v
                        text+=" "
                    else:
                        text+=s.words[i].text
                        text+=" "
                elif (i!=len(s.words)-1) and s.words[i].xpos=='PRP':
                    if s.words[i].text=='I':
                        if s.words[i+1].xpos in ['VBZ','VBN','VBG']:
                            
                            verbLemma=s.words[i+1].lemma
                            v=conjugate(verbLemma, tense = "present",person = 1, number = "singular", mood = "indicative",aspect = "imperfective",negated = False)
                            text+=s.words[i].text+" "
                            if s.words[i+1].text!=v:
                                count+=1
                                s.words[i+1].text=v
                            text+=" "
                        else:
                            text+=s.words[i].text
                            text+=" "
                    elif s.words[i].text.lower() in ['he','she','it']:
                        if s.words[i+1].xpos in ['VBP','VB','VBN','VBG']:
                            
                            verbLemma=s.words[i+1].lemma
                            v=conjugate(verbLemma, tense = "present",person = 3, number = "singular", mood = "indicative",aspect = "imperfective",negated = False)
                            text+=s.words[i].text+" "
                            if s.words[i+1].text!=v:
                                count+=1
                                s.words[i+1].text=v
                            text+=" "
                        else:
                            text+=s.words[i].text
                            text+=" "
                    elif s.words[i].text.lower() in ['we','they','you']:
                        if s.words[i+1].xpos not in ['VB','VBP']:
                            
                            verbLemma=s.words[i+1].lemma
                            v=conjugate(verbLemma, tense = "present",person = 3, number = "plural", mood = "indicative",aspect = "imperfective",negated = False)
                            text+=s.words[i].text+" "
                            if s.words[i+1].text!=v:
                                count+=1
                                s.words[i+1].text=v
                            text+=" "
                        else:
                            text+=s.words[i].text
                            text+=" "
                    else:
                        text+=s.words[i].text
                        text+=" "
                else:
                    text+=s.words[i].text
                    text+=" "
            except:
                text+=s.words[i].text
                text+=" "
    if correctFlag==True:
        return count,text
    else:
        return count

In [8]:
def pluralizationError(text,nlp,correctFlag=False):
    '''
    Purpose: To check for pluralization error. 
             Additionally, it returns corrected sentence.
             
    Parameters: text: string
                    A string of text-single or a paragraph.
                    
                correctFlag:boolean 
                   True or False
                    
    Returns: count: integer  
             text: Corrected sentence. (If correctFlag is True)
    '''    
    
    doc = nlp(text)
    count=0
    text=""
    for s in doc.sentences:
        for i in range(len(s.words)):
            if (i!=len(s.words)-1) and (s.words[i].xpos=="NN" or s.words[i].xpos=="NNP"):
                if s.words[i+1].xpos in ["VB","VBP"]:
                    count+=1
                    text+=pluralize(s.words[i].text)+" "
                else:
                    text+=s.words[i].text+" "
            elif (i!=len(s.words)-1) and (s.words[i].xpos=="NNS" or s.words[i].xpos=="NNPS"):
                if s.words[i+1].xpos=="VBZ":
                    text+=singularize(s.words[i].text)+" "
                else:
                    text+=s.words[i].text+" "
            elif  (i!=len(s.words)-1) and s.words[i].xpos=="CD":
                if s.words[i].text=="1" or s.words[i].text=="one":
                    if s.words[i+1].xpos=="NNS" or s.words[i+1].xpos=="NNPS":
                        count+=1
                        s.words[i+1].text=singularize(s.words[i+1].text)
                        text+=s.words[i].text+" "
                else:
                    if s.words[i+1].xpos=="NN" or s.words[i+1].xpos=="NNP":
                        count+=1
                        s.words[i+1].text=pluralize(s.words[i+1].text)
                        text+=s.words[i].text+" "
            else:
                    text+=s.words[i].text+" "
    if correctFlag==True:
        return count,text
    else:
        return count

In [9]:
def handleCompoundErrors(w1,w2):
    '''
    Purpose: Helper function: Check if the words make a compound word together.
             
    Parameters: w1: string
                w2: string
                    
    Returns: booolean
                if True, it is a compound words, else not a compound words.
    '''    
    path="compunds.txt"
    text=read_file(path)
    compounds=[]
    for t in text:
        tokens=word_tokenize(t)
        word=tokens[0]+" "
        if tokens[1][-3:]=="was":
            word+=tokens[1][:-3]
        else:
            word+=tokens[1]
        compounds.append(word)
    w=w1+" "+w2
    if w in compounds:
        return True
    else:
        return False

In [10]:
def TenseError(text,nlp,correctFlag=False):
    '''
    Purpose: To check if text has tense errors. 
             Additionally, it returns corrected sentence.
             
    Parameters: text: string
                    A string of text-single or a paragraph.
                    
                correctFlag:boolean 
                   True or False
                    
    Returns: count: integer  
             text: Corrected sentence. (If correctFlag is True)
    '''    
    doc = nlp(text)
    count=0
    text=""
    for s in doc.sentences:
        dic={"VB":0,"VBP":0,"VBD":0,"VBZ":0,"VBN":0,"VBG":0,"MD":0}
        for i in range(len(s.words)):
            try:
                if s.words[i].xpos in ["MD"]:
                    if (i<len(s.words)-1) and (s.words[i+1].xpos in ["VBZ",'VBP','VBN','VBG','VBD']):
                        if s.words[i+1].text!=s.words[i+1].lemma:
                            s.words[i+1].text=s.words[i+1].lemma
                            count+=1
                        dic["VB"]+=1
                        dic["MD"]+=1
                    if i<len(s.words)-2 and s.words[i+1].text=="be" and (s.words[i+2].xpos in ["VB","VBP","VBZ","VBD"]):
                        v=s.words[i+2].lemma
                        vp=conjugate(v,"part")
                        if vp!=s.words[i+2].text:
                            s.words[i+2].text=vp
                            count+=1
                        dic["VBG"]+=1
                        dic["MD"]+=1
                    elif i<len(s.words)-2 and (s.words[i+1].xpos in ["VB","VBP"]) and (s.words[i+2].xpos in ["VB","VBP","VBZ","VBD","VBG"]):
                        
                        v=s.words[i+2].lemma
                        vp=conjugate(v,"ppart")
                        if vp!=s.words[i+2].text:
                            s.words[i+2].text=vp
                            count+=1
                        dic["VBN"]+=1
                        dic["MD"]+=1
                    text+=s.words[i].text+" "
                elif i<len(s.words)-1 and s.words[i].xpos=="VBD":
                    if s.words[i].text=="had" and s.words[i+1].xpos!="VBN":
                        v=s.words[i+1].lemma
                        vp=conjugate(v,"ppart")
                        if vp!=s.words[i+1].text:
                            s.words[i+1].text=vp
                            count+=1
                        dic["VBD"]+=1
                        dic["VBN"]+=1
                        text+=s.words[i].text+" "
                    elif s.words[i+1].xpos in ["VB","VBP","VBZ","VBD","VBN"]:
                        v=s.words[i+1].lemma
                        vp=conjugate(v,"part")
                        if vp!=s.words[i+1].text:
                            s.words[i+1].text=vp
                            count+=1
                        dic["VBD"]+=1
                        dic["VBG"]+=1
                        text+=s.words[i].text+" "
                    else:
                        text+=s.words[i].text+" "
                elif s.words[i].xpos in ["VB","VBP","VBZ"]:
                    if i<(len(s.words)-1) and (s.words[i].text in ["has","have"]) and (s.words[i+1].xpos in ["VBZ","VBD","VB","VBP","VBG"]):
                        
                        v=s.words[i+1].lemma
                        vp=conjugate(v,"ppart")
                        if vp!=s.words[i+1].text:
                            s.words[i+1].text=vp
                            count+=1
                        dic["VBN"]+=1
                        text+=s.words[i].text+" "
                    elif i<(len(s.words)-1) and (s.words[i].text in ["is","am","are"]) and (s.words[i+1].xpos in ["VBZ","VBD","VB","VBP"]):
                        
                        v=s.words[i+1].lemma
                        vp=conjugate(v,"part")
                        if vp!=s.words[i+1].text:
                            s.words[i+1].text=vp
                            count+=1
                        dic["VBG"]+=1
                        text+=s.words[i].text+" "
                    else:
                        text+=s.words[i].text+" "
                else:
                    text+=s.words[i].text+" "
            except:
                text+=s.words[i].text+" "
    if correctFlag==True:
        return count,text
    else:
        return count                  

In [20]:
nlp = stanfordnlp.Pipeline(processors = "tokenize,mwt,lemma,pos",use_gpu=True)

def grammar_correction(text):
    
#     if path.exists(f"{path.expanduser('~')}/stanfordnlp_resources/en_ewt_models"):
#         print("requirment already installed")
#     else:
#         stanfordnlp.download("en")

        

#     if path.exists(f"{path.expanduser('~')}/nltk_data/tokenizers/punkt/"):
#         print("requirment already installed")
#     else:
#         download('punkt')


    #nlp = stanfordnlp.Pipeline(processors = "tokenize,mwt,lemma,pos",use_gpu=True)

    count=0
    c,t=CapitalizeError(text,nlp,True)
    count+=c

    c,t=checkSpellingError(t,nlp,True)
    count+=c

    c,t=articleError(t,nlp,True)
    count+=c

    c,t=becauseError(t,nlp,True)
    count+=c

    c,t=SubVerbAgreementError(t,nlp,True)
    count+=c

    c,t=pluralizationError(t,nlp,True)
    count+=c

    c,t=TenseError(t,nlp,True)
    count+=c
    
    seq1 = word_tokenize(text)
    seq2 = word_tokenize(t)
    #dist, errors = edit_distance(seq1,seq2,text,t)

    return_json={
    "text":None,
    "errors":[{
        "offset":None,
        "length":None,
        "error_code":None,
        "error_category":None,
        "description":None,
        "correction":None
    },{
        "offset":None,
        "length":None,
        "error_code":None,
        "error_category":None,
        "description":None,
        "correction":None
    }],
    "exceptions":[],
    "correction":None
    }
    #return_json["errors"]=errors
    return_json["text"]=text
    return_json["correction"]=t

    return return_json


Use device: cpu
---
Loading: tokenize
With settings: 
{'model_path': '/Users/anmolsinghal/stanfordnlp_resources/en_ewt_models/en_ewt_tokenizer.pt', 'lang': 'en', 'shorthand': 'en_ewt', 'mode': 'predict'}
---
Loading: lemma
With settings: 
{'model_path': '/Users/anmolsinghal/stanfordnlp_resources/en_ewt_models/en_ewt_lemmatizer.pt', 'lang': 'en', 'shorthand': 'en_ewt', 'mode': 'predict'}
Building an attentional Seq2Seq model...
Using a Bi-LSTM encoder
Using soft attention for LSTM.
Finetune all embeddings.
[Running seq2seq lemmatizer with edit classifier]
---
Loading: pos
With settings: 
{'model_path': '/Users/anmolsinghal/stanfordnlp_resources/en_ewt_models/en_ewt_tagger.pt', 'pretrain_path': '/Users/anmolsinghal/stanfordnlp_resources/en_ewt_models/en_ewt.pretrain.pt', 'lang': 'en', 'shorthand': 'en_ewt', 'mode': 'predict'}
Done loading processors!
---


In [38]:
import numpy as np

def edit_distance(seq1, seq2, s1, s2):
    '''
    Edit Distance Assumptions:
    Cost of addition - 1
    Cost of deletion - 1
    Cost of substitution - 1
    
    Parameters:
    seq1 - word sequence of input sentence
    seq2 - word sequence of corrected sentence
    s1 - input sentence string
    s2 - correct sentence string
    
    Returns:
    1. Edit Distance
    2. List of errors ( each error in the form of a dictionary)
    
    '''
    size_x = len(seq1) + 1
    size_y = len(seq2) + 1
    matrix = np.zeros ((size_x, size_y))
    bp = np.zeros((size_x,size_y,2))
    for x in range(size_x):
        matrix [x, 0] = x
    for y in range(size_y):
        matrix [0, y] = y
    for x in range(1, size_x):
        for y in range(1, size_y):
            if seq1[x-1] == seq2[y-1]:
                matrix [x,y] = min(
                    matrix[x-1, y] + 1,
                    matrix[x-1, y-1],
                    matrix[x, y-1] + 1
                )
                if matrix[x,y] == matrix[x-1,y] + 1:
                    bp[x,y,0] = x-1
                    bp[x,y,1] = y
                elif matrix[x,y] == matrix[x,y-1] + 1:
                    bp[x,y,0] = x
                    bp[x,y,1] = y-1
                else:
                    bp[x,y,0] = x-1
                    bp[x,y,1] = y-1
            else:
                matrix [x,y] = min(
                    matrix[x-1,y] + 1,
                    matrix[x-1,y-1] + 1,
                    matrix[x,y-1] + 1
                )
                if matrix[x,y] == matrix[x-1,y] + 1:
                    bp[x,y,0] = x-1
                    bp[x,y,1] = y
                elif matrix[x,y] == matrix[x,y-1] + 1:
                    bp[x,y,0] = x
                    bp[x,y,1] = y-1
                else:
                    bp[x,y,0] = x-1
                    bp[x,y,1] = y-1
                    
    #print (matrix)
    fx = int(size_x - 1)
    fy = int(size_y - 1)
    errors = []
    while(fx != 0 or fy != 0):
        nx = int(bp[fx,fy,0])
        ny = int(bp[fx,fy,1])
        
        if (nx == fx - 1) and (ny == fy - 1):
            
            if seq1[nx] != seq2[ny]:
                d = {}
                d['error code'] = "Grammar Error"
                d['description'] = "Word statrting at index {offset} needs to be substituted"
                d['operation_required'] = "Substitution"
                d['correction'] = [seq1[nx],seq2[ny]]
                d['length'] = len(seq1[nx])
                d['offset'] = s1.index(seq1[nx])
                #print(d)
                errors.append(d)
                #print('\n')
                
        elif (nx == fx) and (ny == fy - 1):
            d = {}
            d['error code'] = "Grammar Error"
            d['description'] = "Word need to be inserted in the first white space after {offset}"
            d['operation_required'] = "Add"
            d['correction'] = seq2[ny]
            if nx < len(seq1):
                d['length'] = len(s1[s1.index(seq1[nx-1]):s1.index(seq1[nx])])+len(seq1[nx])
            else:
                d['length'] = len(s1[s1.index(seq1[nx-1]):])
            d['offset'] = s1.index(seq1[nx-1])
            #print(d)
            errors.append(d)
            #print('\n')
            
        elif (nx == fx - 1) and (ny == fy):
            d = {}
            d['error code'] = "Grammar Error"
            d['description'] = "Word starting at index {offset} needs to be deleted"
            d['operation_required'] = "Delete"
            d['correction'] = seq1[nx]
            d['length'] = len(seq1[nx])
            d['offset'] = s1.index(seq1[nx])
            #print(d)
            errors.append(d)
            #print('\n')
            
        fx = nx
        fy = ny
    return (matrix[size_x - 1, size_y - 1],errors)

'''
s1 = "this is great ."
s2 = "This is great"
seq1 = word_tokenize(s1)
seq2 = word_tokenize(s2)
dist, errors = edit_distance(seq1,seq2,s1,s2)
print("Edit Distance", dist)
print("No of errors", len(errors))
'''
#For testing- 

Edit Distance 2.0
No of errors 2


In [25]:
if __name__=="__main__":
    test_file_path="output.tok.txt"
    text=read_file(test_file_path)
    for t in text:
        rjson = grammar_correction(t)
        print(rjson['correction'])

I think it 's because if the country is good , decide if the children who live in the country is clever . 
In the future , children will make not only the country but also this world . 
It is important for us to grow future < sunk > s people who support us when we become older . 
If most of children in the future learn in the university , the country and the world will be better . 
But , some of them can not learn the university even if they are clever . 
of the reasons are that there is not enough money to study there . 
If the university lost cost to study , some of children who have no money can study there . 
For this reason , i think losing university < sunk > s cost is better . 
I agree with this idea . 
There are reasons for this . 
First , our parents use a lot of money in their life . 
So that idea is good to help them . 
Second , students who study in university will increase . 
And to study in university will gain a lot of knowledge . 
Therefore , the japanese economy will b

If the government did that , there were many sharp people in the country . 
Now there are few people who give money and gifts to the university to pass the exam without the sharp brain . 
I think they are very worse and do not warfare . 
Because of them the sharp people 's chances are taken away . 
If the sharp people became a sharp work , the country would be better . 
I agree with this idea . 
I have reasons for this . 
There are a lot of people who ca n't go to university because of a problem with money . 
In part of them , they can use a system which is able to borrow money to study in university . 
But other people ca n't use it . 
I feel sorry for them . 
If there is no money to study at a university , they can go to university which they want to go to . 
This is my first reason . 
I have a sister who passed a university last week . 
The university is n't near my house , so my parents should work harder . 
To go to a university requires a lot of money . 
To study , to borrow a li

We do n't have to go to university . 
Because university does n't need to all of the people . 
But the hospital needs all of the people in the world . 
If we do free to university , we can n't spend money to the hospital . 
It was so bad . 
Second , university is n't used more money . 
Because , children are increasing now . 
So we should decrease university . 
We can cut the cost . 
We do n't have to waste a lot of money . 
I agree with it . 
I have reasons for this . 
, poor people ca n't pay . 
They are poor , but there are people who want to study . 
If they have money , they can go to school . 
It helps them to pay a lot . 
, it is hard for students to pay money . 
When my uncle was a student , he also worked every day . 
One days , he had aed illness because of his hard work . 
He said , ' be careful . 
Working harder may be able to kill you . 
I think the pressure by paying money is not important . 
It is important for students to study . 
Because of the reasons , i agree that u

I think talking is fun . 
Because i like to talk with my friends . 
I was talking about sports . 
And sometimes i draw pictures . 
I was talking about a picture of a museum . 
I enjoyed it . 
I played volleyball , because i can watch it in me dr . My friend with a teacher plays fun . 
Volley ball is difficult but game war a Eexsaite ing . 
I think i ' m talking to a school student . 
Because i am studying technology with my friend . 
The friend plays volleyball , soccer in the outside . 
I am enjoying my school life . 
The most interesting thing is club activity . 
I joined a guitar club . 
I practice guitar every day in this club . 
Playing guitar makes me happy . 
I think school is fun . 
I have reasons : 
First . 
I have many friends . 
They make me happy . 
Second , i like soccer . 
I can play soccer everyday . 
It was fun . 
Third , i like history . 
It can learn about world history . 
It is very interesting . 
I enjoy talking with my friends at school . 
My friends are very excit

It is very interesting . 
I enjoy talking with my friends at school . 
My friends are very excited . 
I talked with my friends at lunch time . 
We talk about baseball games . 
I like watching baseball very much . 
My friend likes watching baseball , too . 
I enjoy club activities , too . 
I am at the table tennis club . 
The first time i did n't like table tennis . 
But i like table tennis now . 
I do n't like to study but i like going to school . 
I think , to practice swimming is the best of all in my school life . 
When practicing swimming , i feel very happy and excited . 
And , talk with my teacher is happier . 
Second , i will practice harder , and the result will be better . 
This experience was very good and so happy . 
This is why , to practice swimming is the best of all in my school life . 
There are countless things in my school life that i find exciting and fun . 
However above them all is seeing my friends at school . 
As i am attending boys ' school , everyone around me 

I want to read many comic books . 
I like comics and books . 
I want to try swimming in the sea during summer vacation . 
I have reasons for this . 
First , i like swimming . 
Swimming makes fun and feel relaxed . 
Second , i want to see a lot of fish . 
In the sea , there were many careful finish swimming . 
They are cut and beautiful . 
For these reasons I want to try swimming in the sea on summer vacation . 
I want to go to Hokkaoido to take many trains during summer vacation . 
There are reasons : 
First , there is too hot in Japan in summer , but Hokkaido is little colder than the other in Japan . 
And there is too much snow in Hokkaido in winter , so we can go to Hokkaido easily only in spring or summer . 
Second , railways in Hokkaido is disappearing these days . 
I want to take all the trains , so i have to take trains in Hokkaido before they will disappear . 
Third , i only have been to Hokkaido once , so i want to go to Hokkaido more . 
For these reasons , i want to go to Hok

Volunteer is no money but we can help many people . 
I think helping people do n't have enough money . 
It is enough for a volunteer spirit . 
In conclusion , we ought to join a volunteer activity . 
I think learning communication to people . 
Because i meet and speak to many people . 
I think , the participate region event is from now on useful me . 
Because , region events are cleaning , etc there are many . 
There are many cooperation scenes there . 
I think that i could learn how to communicate with many kinds of people . 
Many people , who are young and old , men and women , take part in events . 
So , you will be able to communicate with them if you participate in . 
That is why i think so . 
I think through these activities , i can learn things . 
First , i can learn to keep the environment clean is important . 
If air dirty , i do n't stay there . 
Second , i can learn about vorantires that are fun . 
These activities contact many people . 
So i can take many communication and 

This is why , to practice swimming is the best of all in my school life . 
There are countless things in my school life that i find exciting and fun . 
However above them all is seeing my friends at school . 
As i am attending boys ' school , everyone around me is boys ( obviously ) . 
Though some of you may think that , that can be boring you are mostly mistaken . 
Well , i had somed worries before attending boys ' high school , mostly because it was the first time to ever attend , and i admit i had somed worries about missing girls too . 
Anyway , i like the school atmosphere where students are mainly free to do anything . 
There are no rules . 
Mobile phones are allowed during breaktimes , and even some of us bring gazing consoles to school ! 
Therefore , broaktimes are always really enjoyable , but on the downside i clearly lost some precious time to exercise . 
In conclusion , these are mainly things i mostly enjoy at school . 
I especially enjoy playing the trombone in school 's 

So , i want to see many people . 
That is why i want to do so . 
I want to read many comic books . 
I like comics and books . 
I want to try swimming in the sea during summer vacation . 
I have reasons for this . 
First , i like swimming . 
Swimming makes fun and feel relaxed . 
Second , i want to see a lot of fish . 
In the sea , there were many careful finish swimming . 
They are cut and beautiful . 
For these reasons I want to try swimming in the sea on summer vacation . 
I want to go to Hokkaoido to take many trains during summer vacation . 
There are reasons : 
First , there is too hot in Japan in summer , but Hokkaido is little colder than the other in Japan . 
And there is too much snow in Hokkaido in winter , so we can go to Hokkaido easily only in spring or summer . 
Second , railways in Hokkaido is disappearing these days . 
I want to take all the trains , so i have to take trains in Hokkaido before they will disappear . 
Third , i only have been to Hokkaido once , so i want 

And i was so happy when the people said to us < sunk > < sunk > Thank you a lot '' Second , we can learn a lot of things . 
Volunteer is no money but we can help many people . 
I think helping people do n't have enough money . 
It is enough for a volunteer spirit . 
In conclusion , we ought to join a volunteer activity . 
I think learning communication to people . 
Because i meet and speak to many people . 
I think , the participate region event is from now on useful me . 
Because , region events are cleaning , etc there are many . 
There are many cooperation scenes there . 
I think that i could learn how to communicate with many kinds of people . 
Many people , who are young and old , men and women , take part in events . 
So , you will be able to communicate with them if you participate in . 
That is why i think so . 
I think through these activities , i can learn things . 
First , i can learn to keep the environment clean is important . 
If air dirty , i do n't stay there . 
Second 

If we injure our body , we will not be able to receive some treatment . 
We need them . 
So i do not agree with that idea . 
I agree with this idea . 
I have reasons : 
First , the idea is helpful for us such as poor people . 
Our family does n't have much money , so we are hard to go to college . 
Second , if we do n't need to pay money , we can use money for other things . 
For example , clothes , shoes and so on . 
Third , if we do n't need to pay money , more people go to college . 
Happening the thing , our country 's skill improved . 
So i agree with this idea . 
I do n't want to pay money to college . 
I agree with this idea . 
I have reasons for this . 
First , if the cost of university is free , the parents can buy what they want . 
That makes the family happy . 
Second , money is important for me to spend a better life . 
If i have a lot of money , i will use it for me in the future . 
That is why , i should do the cost of the university is free . 
I agree with this idea that

Our parents must work harder . 
Secondly , there are children who can not go to the university without enough money . 
They may be more clever or wise than those who go there . 
I think it is better to decrease such a child , and i want them never to give up going school because of no money . 
For these reasons , i agree with this idea . 
I agree with this opinion . 
I have reasons for this . 
First , i think that when we study at the university , we should not mind the cost . 
If we mind the cost , we can n't study something which we want to . 
Second , if there is no cost of university , we can use the money to the lifestyle items . 
For example , a car , clothes , a bed , a watch , and delicious food . 
These will make their lives wonderful . 
For these reasons , i should have no cost of university for the students who study at the university . 
I agree with this opinion . 
Because if we do n't need to spend the cost of school , we would study harder . 
I might not have enough money