# Text 1's sentence

In [3]:
import re
import nltk


#simple regular expression, extract tokens from sentence.
pattern = r"\w+|[.,!?;:'\"-]"
text_test = "I am very appreciated  the full support of the professor, for our Springer proceedings publication"
tokens_test = re.findall(pattern, text_test)
print(tokens_test)

#assigning roles to each token.
roles_test = {
    tokens_test[0]: "PRP",      # "I" = Personal Pronoun
    tokens_test[1]: "BVP",      # "am" = Verb, Present, (Non-3rd person singular)
    tokens_test[2]: "RB",       # "very" = Adverb
    tokens_test[3]: "VBN",      # "appreciated" = Verb, Past Participle
    tokens_test[4]: "DT",       # "the" = Determiner
    tokens_test[5]: "JJ",       # "full" = Adjective
    tokens_test[6]: "NN",       # "support" = Noun
    tokens_test[7]: "IN",       # "of" = Preposition
    tokens_test[8]: "DT",       # "the" = Determiner
    tokens_test[9]: "NN",       # "professor" = Noun
    tokens_test[10]: "PUNC",    # "," = Punctuation
    tokens_test[11]: "IN",      # "for" = Preposition
    tokens_test[12]: "PRP$",    # "our" = Possessive Pronoun
    tokens_test[13]: "NNP",     # "Springer" = Proper Noun
    tokens_test[14]: "NNS",     # "proceedings" = Noun, Plural
    tokens_test[15]: "NN"       # "publication" = Noun
}



print("ROLES")
print(roles_test)

#The CORRECT sentence
text_reference = "I am very grateful for the full support of the professor, for our Springer proceedings publication."
tokens_reference = re.findall(pattern, text_reference)
print(tokens_reference)

#assigning roles to each token.
roles_reference = {
    tokens_reference[0]: "PRP",
    tokens_reference[1]: "BVP",
    tokens_reference[2]: "RB",
    tokens_reference[3]: "JJ",
    tokens_reference[4]: "IN",
    tokens_reference[5]: "DT",
    tokens_reference[6]: "JJ",
    tokens_reference[7]: "NN",
    tokens_reference[8]: "IN",
    tokens_reference[9]: "DT",
    tokens_reference[10]: "NN",
    tokens_reference[11]: "PUNC",
    tokens_reference[12]: "IN",
    tokens_reference[13]: "PRP$",
    tokens_reference[14]: "NNP",
    tokens_reference[15]: "NNS",
    tokens_reference[16]: "NN",
    tokens_reference[17]: "PUNC"
}




#Print word-role mappings
for word, role in roles_reference.items():
    print(f"{word}: {role}")


#Get the POS of a word (a word from the wrong sentence)
def POS(word):
    return(roles_reference[word])


index1 = 0
targetPOS = roles_reference[tokens_reference[index1]]    #What POS tag this word is supposed to have.
def FSA():
    global index1, targetPOS, tokens_test, tokens_reference, roles_test, roles_reference

    #Append a placeholder at the end, so that tokens_test has a valid "tokens_test[27]" to work on, so that no errors are caused.
    if index1 == 16 and tokens_test[index1] == "publication":
        tokens_test.append("_")
        roles_test["_"] = "PLACEHOLDER"

    print(f"targetPOS: {targetPOS}")
    print(f"currentPOS: {roles_test[tokens_test[index1]]}")
    print(f"target word: {tokens_reference[index1]}")
    print(f"current word: {tokens_test[index1]}")

    #Compare targetPOS to the ACTUAL POS that this word has. Are they the same? 
    #If so, then there are no errors, so we move on to next word.
    if targetPOS == roles_test[tokens_test[index1]]:    
        #Go to next state

        print(f"Length of tokens: {len(tokens_test)}")
        if index1 < len(tokens_test) - 1:
            targetPOS = roles_reference[tokens_reference[index1 + 1]]
            print(f"New targetPOS: {targetPOS}")
        print("Proceeding...")
       

    #The POS tags are different. Use one of the rules below to fix this error.
    else:   
        print("Error found")
        if tokens_test[index1] == "appreciated":  #Rule 1

            print("Rule 1")
            tokens_test[index1] = "grateful"
            roles_test[tokens_test[index1]] = "JJ"
            index1 -= 1     #This is not required. It is only used to show the corrected version while printing. The fix still takes place regardless.

        elif tokens_test[index1 - 1] == "grateful" and tokens_test[index1] != "for":  #Rule 2

            print("Rule 2")
            tokens_test.insert(index1, "for")
            index1 -= 1    #This is not required. It is only used to show the corrected version while printing. The fix still takes place regardless.

        elif tokens_test[index1] == "_" and index1 == 17:   #Rule 3
            print("Rule 3")
            tokens_test[index1] = "."

        else:
            print("No matching rule.")
            print(tokens_test[index1])

    index1 += 1
    print("   ")
    print("   ")




def arrayToText(array):
    sentence = ""
    finalarray = []
    i = 0
    while i < len(array) - 1:
        finalarray.append(array[i])
        if POS(array[i + 1]) != "PUNC": #Add a space after a word, ONLY if the next word is NOT a punctuation. (We want "Hello, World!", and not "Hello , World !".)
            finalarray.append(" ")
        i += 1
        
    finalarray.append(array[len(array) - 1])    
    sentence = "".join(finalarray)

    return sentence


#FSA will run in a while loop. While index1 <= 17.

print("Before")
print(text_test)
print("   ")
print("   ")

while index1 <= len(tokens_reference) - 1:
    FSA()
    print(f"current index1: {index1}")

print("   ")
print("   ")
print("After")
print(arrayToText(tokens_test))


grammar = nltk.CFG.fromstring(
    """ 
    S -> NP VP PERIOD
    NP -> PRP | PRP N | Det N COMMA PP | Det JJ N PP
    VP -> VBP RB JJ PP
    PP -> IN NP
    PRP -> 'I' | 'our'
    N -> 'support' | 'professor' | 'publication' | JJ N
    Det -> 'the'
    JJ -> 'grateful' | 'full' | 'Springer' | 'proceedings'
    VBP -> 'am' 
    RB -> 'very'
    IN -> 'for' | 'of'
    COMMA -> ','
    PERIOD -> '.'

    """
)
    
parser = nltk.ChartParser(grammar)

treesfound = 0

print("TREE")
for tree in parser.parse(tokens_test):
    tree.pretty_print()


print("TREE DONE")


['I', 'am', 'very', 'appreciated', 'the', 'full', 'support', 'of', 'the', 'professor', ',', 'for', 'our', 'Springer', 'proceedings', 'publication']
ROLES
{'I': 'PRP', 'am': 'BVP', 'very': 'RB', 'appreciated': 'VBN', 'the': 'DT', 'full': 'JJ', 'support': 'NN', 'of': 'IN', 'professor': 'NN', ',': 'PUNC', 'for': 'IN', 'our': 'PRP$', 'Springer': 'NNP', 'proceedings': 'NNS', 'publication': 'NN'}
['I', 'am', 'very', 'grateful', 'for', 'the', 'full', 'support', 'of', 'the', 'professor', ',', 'for', 'our', 'Springer', 'proceedings', 'publication', '.']
I: PRP
am: BVP
very: RB
grateful: JJ
for: IN
the: DT
full: JJ
support: NN
of: IN
professor: NN
,: PUNC
our: PRP$
Springer: NNP
proceedings: NNS
publication: NN
.: PUNC
Before
I am very appreciated  the full support of the professor, for our Springer proceedings publication
   
   
targetPOS: PRP
currentPOS: PRP
target word: I
current word: I
Length of tokens: 16
New targetPOS: BVP
Proceeding...
   
   
current index1: 1
targetPOS: BVP
currentPOS

# Text 2's Sentence

In [None]:
import re
import nltk


#simple regular expression, extract tokens from sentence.
pattern2 = r"\w+|[.,!?;:'\"-]"
text_test = "Anyway, I believe the team, although bit delay and less communication at recent days, they really tried best for paper and cooperation."
tokens_test = re.findall(pattern2, text_test)
#print(tokens)

#assigning roles to each token.
roles_test = {
    tokens_test[0]: "RB",
    tokens_test[1]: "PUNC",
    tokens_test[2]: "PRP",
    tokens_test[3]: "BVP",
    tokens_test[4]: "DT",
    tokens_test[5]: "NN",
    tokens_test[6]: "PUNC",
    tokens_test[7]: "IN",
    tokens_test[8]: "NN",
    tokens_test[9]: "NN",
    tokens_test[10]: "CC",
    tokens_test[11]: "RBR",
    tokens_test[12]: "NN",
    tokens_test[13]: "IN",
    tokens_test[14]: "JJ",
    tokens_test[15]: "NNS",
    tokens_test[16]: "PUNC",
    tokens_test[17]: "PRP",
    tokens_test[18]: "RB",
    tokens_test[19]: "VBD",
    tokens_test[20]: "JJS",
    tokens_test[21]: "IN",
    tokens_test[22]: "NN",
    tokens_test[23]: "CC",
    tokens_test[24]: "NN",
    tokens_test[25]: "PUNC"

}


print("Test Tokens")
#Print word-role mappings
for word, role in roles_test.items():
    print(f"{word}: {role}")


#The CORRECT sentence

text_reference = "Anyway, I believe the team, although a bit delayed and less communicative in recent days, really tried their best to cooperate on the paper."
tokens_reference = re.findall(pattern2, text_reference)


#assigning roles to each token.
roles_reference = {
    tokens_reference[0]: "RB",      #Anyway
    tokens_reference[1]: "PUNC",    #,
    tokens_reference[2]: "PRP",     #I
    tokens_reference[3]: "BVP",     #believe
    tokens_reference[4]: "DT",      #the
    tokens_reference[5]: "NN",      #team
    tokens_reference[6]: "PUNC",    #,
    tokens_reference[7]: "IN",      #although
    tokens_reference[8]: "DT",      #a
    tokens_reference[9]: "NN",      #bit
    tokens_reference[10]: "JJ",     #delayed
    tokens_reference[11]: "CC",     #and
    tokens_reference[12]: "RBR",    #less
    tokens_reference[13]: "JJ",     #communicative
    tokens_reference[14]: "IN",     #in
    tokens_reference[15]: "JJ",     #recent
    tokens_reference[16]: "NNS",    #days
    tokens_reference[17]: "PUNC",   #,
    tokens_reference[18]: "RB",     #really
    tokens_reference[19]: "VBD",    #tried
    tokens_reference[20]: "PRP$",   #their
    tokens_reference[21]: "JJS",    #best
    tokens_reference[22]: "TO",     #to
    tokens_reference[23]: "VB",     #cooperate
    tokens_reference[24]: "IN",     #on
    tokens_reference[25]: "DT",     #the
    tokens_reference[26]: "NN",     #paper
    tokens_reference[27]: "PUNC"    #.
}


print("   ")
print("   ")
print("   ")
print("Reference Tokens")


print(tokens_reference)


#Get the POS of a word (a word from the wrong sentence)
def POS(word):
    return roles_reference[word]


index1 = 0
targetPOS = roles_reference[tokens_reference[index1]]
def FSA():
    global index1, targetPOS, tokens_test, tokens_reference, roles_test, roles_reference

    #Append a placeholder at the end, so that tokens_test has a valid "tokens_test[27]" to work on, so that no errors are caused.
    if index1 == 26 and tokens_test[index1] == "paper":
        tokens_test.append("_")
        roles_test["_"] = "PLACEHOLDER"
        

    print(f"targetPOS: {targetPOS}")
    print(f"currentPOS: {roles_test[tokens_test[index1]]}")
    print(f"target word: {tokens_reference[index1]}")
    print(f"current word: {tokens_test[index1]}")
    print(f"Length of tokens: {len(tokens_test)}")

    if targetPOS == roles_test[tokens_test[index1]]:
        
        if tokens_test[index1] == "at" and tokens_test[index1 + 1] == "recent" and tokens_test[index1 + 2] == "days":
            print("Rule 3")
            tokens_test[index1] = "in"


        #Go to next state
        
        if index1 < len(tokens_test) - 1:
            targetPOS = roles_reference[tokens_reference[index1 + 1]]
            print(f"New targetPOS: {targetPOS}")
            print("Proceeding...")
        
            
       


    else:   
        print("Error found")
        if tokens_test[index1] == "communication":  #Rule 1

            print("Rule 1")
            tokens_test[index1] = "communicative"
            roles_test[tokens_test[index1]] = "JJ"
            index1 -= 1     #This is not required. It is only used to show the corrected version while printing. The fix still takes place regardless.

        elif tokens_test[index1] == "they":  #Rule 4
            print("Rule 4")
            tokens_test.remove("they")

            index1 -= 1    #This is not required. It is only used to show the corrected version while printing. The fix still takes place regardless.

        elif tokens_test[index1] == "bit" and tokens_test[index1 - 1] != "a":   #Rule 5
            print("Rule 5")
            tokens_test.insert(index1, "a")
            roles_test["a"] = "DT"

            index1 -= 1

        elif tokens_test[index1] == "delay" and targetPOS == "JJ":   #Rule 6
            print("Rule 6")
            tokens_test[index1] = "delayed"
            roles_test["delayed"] = "JJ"

            #index1 -= 1

        elif tokens_test[index1] == "best" and tokens_test[index1 - 1] != "their":   #Rule 7
            print("Rule 7")
            tokens_test.insert(index1, "their")
            roles_test["their"] = "PRP$"

            index1 -= 1

        elif tokens_test[index1] == "for":   #Rule 8
            print("Rule 8")
            tokens_test[index1] = "to"
            roles_test["to"] = "TO"

            index1 -= 1

        elif tokens_test[index1] == "paper":   #Rule 9
            print("Rule 9")
            tokens_test[index1] = "cooperate"
            roles_test["cooperate"] = "VB"

            index1 -= 1

        elif tokens_test[index1] == "and" and tokens_test[index1 - 1] == "cooperate":   #Rule 10
            print("Rule 10")
            tokens_test[index1] = "on"
            roles_test["on"] = "IN"

            index1 -= 1

        elif tokens_test[index1] == "cooperation":   #Rule 11
            print("Rule 11")
            tokens_test[index1] = "the"
            roles_test["the"] = "DT"

            index1 -= 1

        elif tokens_test[index1] == ".":   #Rule 12
            print("Rule 12")
            tokens_test[index1] = "paper"

            #tokens_test.append(".")

            index1 -= 1

        elif tokens_test[index1] == "_" and index1 == 27:   #Rule 13
            print("Rule 13")
            tokens_test[index1] = "."

            #tokens_test.append(".")

            index1 -= 1

        else:
            print("No matching rule.")
            print(tokens_test[index1])

    index1 += 1



    print("   ")
    print("   ")


def arrayToText(array):
    sentence = ""
    finalarray = []
    i = 0
    while i < len(array) - 1:
        finalarray.append(array[i])
        if POS(array[i + 1]) != "PUNC":
            finalarray.append(" ")
        i += 1
        
    finalarray.append(array[len(array) - 1])    
    sentence = "".join(finalarray)

    return sentence




#FSA will run in a while loop. While index1 <= 15.

print("Before")
print(text_test)
print("   ")
print("   ")

while index1 <= 27:
    FSA()
    print(f"current index1: {index1}")

print("   ")
print("   ")
print("After")

print(tokens_test)
print(arrayToText(tokens_test))

#   "Anyway, I believe the team, although a bit delayed and less communicative in recent days, really tried their best to cooperate on the paper."

grammar = nltk.CFG.fromstring(
    """ 
    S -> RB COMMA NP VP | NP VP PERIOD
    NP -> PRP | DT NN | DT NN COMMA | JJ NN
    VP -> VB | VB S | RB VB PSP JJ TO_VP | SUB_CL VP
    TO_VP -> TO VP PP
    PP -> IN NP
    SUB_CL -> IN JJP_GROUP PP COMMA
    JJP_GROUP -> JJP | JJP CC JJP
    JJP -> DT NN JJ | RB JJ

    RB -> 'Anyway' | 'less' | 'really'
    PRP -> 'I'
    DT -> 'the' | 'a'
    NN -> 'team' | 'bit' | 'days' | 'paper'
    JJ -> 'delayed' | 'communicative' | 'recent' | 'best'
    VB -> 'believe' | 'tried' | 'cooperate'
    PSP -> 'their'
    TO -> 'to'
    IN -> 'on' | 'although' | 'in'
    CC -> 'and'
    COMMA -> ','
    PERIOD -> '.'
    
    """
)
    
parser = nltk.ChartParser(grammar)

treesfound = 0

print("TREE")
for tree in parser.parse(tokens_test):
    tree.pretty_print()


print("TREE DONE")

