In [None]:
import json
import csv
import string
import re

# Importing and Cleaning Lexicons and Review Datasets

In [None]:
# normalizing lexicon data
positiveWords = []
negativeWords = []
negationWords = []

with open('englishLexicon.txt', encoding='utf-8-sig') as eL, open('tagalogLexicon.csv', 'r', encoding='utf-8-sig') as tL, open('negations.txt', encoding='utf-8-sig') as nL:
  # english sentiment words
  for line in eL:
    tempLine = line.rstrip().split('\t')
    if int(tempLine[1]) > 0: # categorizing english positive lexicons
      positiveWords.append(tempLine[0])
    elif int(tempLine[1]) < 0: # categorizing english negative lexicons
      negativeWords.append(tempLine[0])

  # tagalog sentiment words
  reader = csv.reader(tL)
  next(reader)
  for row in reader:
      if row[4] ==  'positive': # categorizing tagalog positive lexicons
        positiveWords.append(row[1])
      elif row[4] ==  'negative': # categorizing tagalog negative lexicons
        negativeWords.append(row[1])

  # negations
  for line in nL: # inputting negation lexicons
    negationWords.append(line.rstrip())


In [None]:
# importing reviews
englishReviews = []
tagalogReviews = []

with open('tagalogReviews.csv', 'r', encoding='utf-8-sig') as tR, open('englishReviews.csv', encoding='utf-8-sig') as eR:

  # english reviews
  reader = csv.reader(eR)
  next(reader)
  for row in reader:
    review = row[0]
    sentiment = row[1].capitalize()
    englishReviews.append([review, sentiment])

  # tagalog reviews
  reader = csv.reader(tR)
  next(reader)
  for row in reader:
    review = row[0] # removes punctuations
    sentiment = row[1].capitalize()
    tagalogReviews.append([review, sentiment])

print(englishReviews[0])

['No issues.', 'Positive']


In [None]:
def sentenceSeparate(text):
  sentences = re.split(r"[.!?]", text)
  sentences = [sent.strip(" ") for sent in sentences]
  return sentences[:-1]

# Initialization and Setup

In [None]:
#Our state class
class State:
    #constructor
    def __init__(self):
        self.transitionDict = {}
        self.endStateBool = False
        self.startStateBool = False

    #Setting the state transition functions in the form of a python dictionary.
    def setDict(self, inputDict):
        self.transitionDict = inputDict

    #Add dictionary item
    def addDict(self, word, state):
        self.transitionDict.__setitem__(word, state)

    #Setting if the state is an end state or not.
    def setEndState(self, endState):
        self.endStateBool = endState

    #Setting if the state is the start state or not.
    def setStartState(self, startState):
        self.startStateBool = startState

    #Returns true if state is end state.
    def isEndState(self):
        return self.endStateBool

    #Returns true if state is start state.
    def isStartState(self):
        return self.startStateBool

    #Returns the index of the next state after processing input.
    def getNewState(self, key):
        return self.transitionDict[key]


In [None]:
def sentiAutomata(autoList, autoString):
    print(f"\nString: {autoString}")
    states = [0]
    # "Cleaning" the string (removing punctuations and making all the words lowercase), then splitting the string into a list of words
    autoWords = autoString.translate(str.maketrans('', '', string.punctuation)).lower().split()

    #Index of start state of the automata
    autoStart = -1

    #Counter for how many states in the Automata is set as a start state
    startStateCount = 0

    #Goes through the list of states, and sets autoState(Automata State) to whichever state where isStartState() == true
    #It will also count how many states are listed as start states
    for i in range(0, len(autoList)):
        if autoList[i].isStartState():
            autoStart = i
            startStateCount += 1

    #If more than one state listed as start state, returns this message
    if startStateCount > 1:
        return("There are too many start states")

    #If no start states are found, returns this message
    if autoStart == -1:
        return("Missing Start State")

    #If start state found, setting as the current state
    autoCurrent = autoStart

    #Goes through the input list
    while(len(autoWords) > 0):

        #grabs the first word
        autoWord = autoWords[0]

        #removes the first word from input list
        autoWords = autoWords[1:]

        #Iterates to the next state, according to the word plugged in
        if autoWord not in lexicons: # Check if the word is part of the lexicons
            autoCurrent = autoCurrent
        else:
            autoCurrent = autoList[autoCurrent].getNewState(autoWord)
            states.append(autoCurrent) # Add the states each word has gone through
            print(f"{autoWord} (state {autoCurrent})")


    #Once there are no more characters to process, checks if the state we end up with is an end state.
    if autoList[autoCurrent].isEndState():
        if autoCurrent == 1:
            return("Positive")
        elif autoCurrent == 2:
            return("Negative")
        elif autoCurrent == 3:
            return("Conflicting")
        elif autoCurrent == 0:
            return("Neutral")
    elif states[-1] in [4, 5, 6]:
        autoCurrent = states[-2]
        if autoCurrent == 1:
          return("Positive")
        elif autoCurrent == 2:
          return("Negative")
        elif autoCurrent == 3:
          return("Conflicting")
        elif autoCurrent == 0:
          return("Neutral")



In [None]:
# List of States
Senti_FSA = []
lexicons = positiveWords + negativeWords + negationWords
no_of_states = 7

# Populating with new state objects
for i in range(no_of_states):
    Senti_FSA.append(State())

# Using the addDict function, I insert key-value pairs in the transition dictionary depending on the word the for-loop reads
for word in lexicons:

    # state 0: neutral, state 1: positive, state 2: negative, state 3: conflicting, state 4: negation, state 5: negation preceded by a positive, state 6: negation preceded by a negative
    if word in positiveWords:
        Senti_FSA[0].addDict(word, 1)
        Senti_FSA[1].addDict(word, 1)
        Senti_FSA[2].addDict(word, 3)
        Senti_FSA[3].addDict(word, 3)
        Senti_FSA[4].addDict(word, 2)
        Senti_FSA[5].addDict(word, 3)
        Senti_FSA[6].addDict(word, 2)
    elif word in negativeWords:
        Senti_FSA[0].addDict(word, 2)
        Senti_FSA[1].addDict(word, 3)
        Senti_FSA[2].addDict(word, 2)
        Senti_FSA[3].addDict(word, 3)
        Senti_FSA[4].addDict(word, 1)
        Senti_FSA[5].addDict(word, 1)
        Senti_FSA[6].addDict(word, 3)
    elif word in negationWords:
        Senti_FSA[0].addDict(word, 4)
        Senti_FSA[1].addDict(word, 5)
        Senti_FSA[2].addDict(word, 6)
        Senti_FSA[3].addDict(word, 3)
        Senti_FSA[4].addDict(word, 0)
        Senti_FSA[5].addDict(word, 1)
        Senti_FSA[6].addDict(word, 2)

for i in range(no_of_states):
    if i == 0:
        Senti_FSA[i].setStartState(True)
        Senti_FSA[i].setEndState(True)
    elif i in [1,2,3]:
        Senti_FSA[i].setEndState(True)

# Results and Analysis

In [None]:
englishCorrectCounter = 0
for i in englishReviews[0:250]:
  if sentiAutomata(Senti_FSA, i[0]) == i[1]:
    englishCorrectCounter += 1

print("English Accuracy: ", englishCorrectCounter/250)


String: No issues.
no (state 4)
issues (state 1)

String: I've got a couple of these in varying sizes. I've had no problems and no complaints about speed. I always end up buying more because the prices are so low for what you get.
no (state 4)
problems (state 1)
no (state 5)
complaints (state 1)

String: Use this in my Samsung Galaxy S4.  Have had it for almost a year now, no problems, really expands your memory capabilities, easy enough to install.
no (state 4)
problems (state 1)
expands (state 1)
easy (state 1)

String: I have two of these and use them for my Galazy Note 2. It's great having the flexibility to keep extra storage with you for your 'phablet'.So far they have done what they are intended to do.
great (state 1)

String: Since there are almost three thousands reviews already, I bet almost everything about reading and writing speeds have been covered. I'll just try to provide some specifics from my experience in case if somebody is searching with these keywords. I've copie

In [None]:
tagalogCorrectCounter = 0
for i in tagalogReviews[0:250]:
  if sentiAutomata(Senti_FSA, i[0]) == i[1]:
    tagalogCorrectCounter += 1

print("Tagalog Accuracy: ", tagalogCorrectCounter/250)


String: sir okay armygreen shorts nice
okay (state 1)
nice (state 1)

String: super worth it ang ganda Sombra grabi order na kayo di kayo magsisisisis sobranh ganda order ulit ako at ang bilis dumating
super (state 1)
worth (state 1)
ganda (state 1)
di (state 5)
ganda (state 3)
ulit (state 3)

String: ganda po salamat
ganda (state 1)
salamat (state 1)

String: maayos pagkadeliver maganda den sya
maayos (state 1)
maganda (state 1)

String: ang gnda nang short nagustohan nang binigyan ko salamat din sa delivery guy ang bait Niya Thank you po kuya
salamat (state 1)
thank (state 1)

String: Thank You ang Ganda niyaUmorder ulit ako ng 50pcs sana my free na kahit 3 pcs hehehe
thank (state 1)
ganda (state 1)
ulit (state 1)
free (state 1)

String: Good quality at maayos naman ang pagkakabalot. thank you seller.
good (state 1)
maayos (state 1)
thank (state 1)

String: ganda niyatry lang oorder ulit. kala ko malapad Hindi pala. pero ang ganda Saka madikit
ganda (state 1)
ulit (state 1)
hindi (s

In [None]:
indexOfReview = 0

print(f"String: {tagalogReviews[indexOfReview][0]}\nSentiment: {sentiAutomata(Senti_FSA, tagalogReviews[indexOfReview][0])}")
print(f"String: {englishReviews[indexOfReview][0]}\nSentiment: {sentiAutomata(Senti_FSA, englishReviews[indexOfReview][0])}")




String: sir okay armygreen shorts nice
okay (state 1)
nice (state 1)
String: sir okay armygreen shorts nice
Sentiment: Positive

String: No issues.
no (state 4)
issues (state 1)
String: No issues.
Sentiment: Positive


In [None]:
positiveEnglishReviews = []
negativeEnglishReviews = []
conflictingEnglishReviews = []
neutralEnglishReviews = []
for i in englishReviews:
  if i[1] == "Positive":
    positiveEnglishReviews.append(i)
  if i[1] == "Negative":
    negativeEnglishReviews.append(i)
  if i[1] == "Conflicting":
    conflictingEnglishReviews.append(i)
  if i[1] == "Neutral":
    neutralEnglishReviews.append(i)

In [None]:
correctCounter = 0
for i in positiveEnglishReviews:
  if sentiAutomata(Senti_FSA, i[0]) == i[1]:
    correctCounter += 1

print("Positive English Reviews Accuracy: ", correctCounter/len(positiveEnglishReviews))

correctCounter = 0
for i in negativeEnglishReviews:
  if sentiAutomata(Senti_FSA, i[0]) == i[1]:
    correctCounter += 1

print("Negative English Reviews Accuracy: ", correctCounter/len(negativeEnglishReviews))

correctCounter = 0
for i in neutralEnglishReviews:
  if sentiAutomata(Senti_FSA, i[0]) == i[1]:
    correctCounter += 1

print("Neutral English Reviews Accuracy: ", correctCounter/len(neutralEnglishReviews))

correctCounter = 0
for i in conflictingEnglishReviews:
  if sentiAutomata(Senti_FSA, i[0]) == i[1]:
    correctCounter += 1

print("Conflicting English Reviews Accuracy: ", correctCounter/len(conflictingEnglishReviews))



String: No issues.
no (state 4)
issues (state 1)

String: I've got a couple of these in varying sizes. I've had no problems and no complaints about speed. I always end up buying more because the prices are so low for what you get.
no (state 4)
problems (state 1)
no (state 5)
complaints (state 1)

String: Use this in my Samsung Galaxy S4.  Have had it for almost a year now, no problems, really expands your memory capabilities, easy enough to install.
no (state 4)
problems (state 1)
expands (state 1)
easy (state 1)

String: I have two of these and use them for my Galazy Note 2. It's great having the flexibility to keep extra storage with you for your 'phablet'.So far they have done what they are intended to do.
great (state 1)

String: Since there are almost three thousands reviews already, I bet almost everything about reading and writing speeds have been covered. I'll just try to provide some specifics from my experience in case if somebody is searching with these keywords. I've copie

In [None]:
positiveTagalogReviews = []
negativeTagalogReviews = []
conflictingTagalogReviews = []
neutralTagalogReviews = []
for i in tagalogReviews:
  if i[1] == "Positive":
    positiveTagalogReviews.append(i)
  if i[1] == "Negative":
    negativeTagalogReviews.append(i)
  if i[1] == "Conflicting":
    conflictingTagalogReviews.append(i)
  if i[1] == "Neutral":
    neutralTagalogReviews.append(i)

correctCounter = 0
for i in positiveTagalogReviews:
  if sentiAutomata(Senti_FSA, i[0]) == i[1]:
    correctCounter += 1

print("Positive Tagalog Reviews Accuracy: ", correctCounter/len(positiveTagalogReviews))

correctCounter = 0
for i in negativeTagalogReviews:
  if sentiAutomata(Senti_FSA, i[0]) == i[1]:
    correctCounter += 1

print("Negative Tagalog Reviews Accuracy: ", correctCounter/len(negativeTagalogReviews))

correctCounter = 0
for i in neutralTagalogReviews:
  if sentiAutomata(Senti_FSA, i[0]) == i[1]:
    correctCounter += 1

print("Neutral Tagalog Reviews Accuracy: ", correctCounter/len(neutralTagalogReviews))

correctCounter = 0
for i in conflictingTagalogReviews:
  if sentiAutomata(Senti_FSA, i[0]) == i[1]:
    correctCounter += 1

print("Conflicting Tagalog Reviews Accuracy: ", correctCounter/len(conflictingTagalogReviews))

print(len(positiveTagalogReviews),len(negativeTagalogReviews),len(neutralTagalogReviews),len(conflictingTagalogReviews))


String: sir okay armygreen shorts nice
okay (state 1)
nice (state 1)

String: super worth it ang ganda Sombra grabi order na kayo di kayo magsisisisis sobranh ganda order ulit ako at ang bilis dumating
super (state 1)
worth (state 1)
ganda (state 1)
di (state 5)
ganda (state 3)
ulit (state 3)

String: ganda po salamat
ganda (state 1)
salamat (state 1)

String: maayos pagkadeliver maganda den sya
maayos (state 1)
maganda (state 1)

String: ang gnda nang short nagustohan nang binigyan ko salamat din sa delivery guy ang bait Niya Thank you po kuya
salamat (state 1)
thank (state 1)

String: Thank You ang Ganda niyaUmorder ulit ako ng 50pcs sana my free na kahit 3 pcs hehehe
thank (state 1)
ganda (state 1)
ulit (state 1)
free (state 1)

String: Good quality at maayos naman ang pagkakabalot. thank you seller.
good (state 1)
maayos (state 1)
thank (state 1)

String: ganda niyatry lang oorder ulit. kala ko malapad Hindi pala. pero ang ganda Saka madikit
ganda (state 1)
ulit (state 1)
hindi (s

In [None]:
testResults = []

for i in neutralTagalogReviews + neutralEnglishReviews:
  if sentiAutomata(Senti_FSA, i[0]) != i[1]:
    testResults.append([i, sentiAutomata(Senti_FSA, i[0])])


String: good morning
good (state 1)

String: good morning
good (state 1)

String: I ordered 1 black and 1 green, I received 2 greens

String: may order is black

String: what u see is what u get

String: uag nakwgaj lagahwo ahaksuwg kwjjfskao wgakwkwhwnwkw ajjwkwqkjwgekwe sjejwhwjwhjsjwkwoiw

String: one, isa nalang yung nandito sakin kasi naibigay ko na yung isa dun sa pinag bigyan ko  

String: Nysbahahhahahahahahahhahahahahhahahahahahahhshshshshahsbahahbsbsbsbsbsbsbbsbsbsbsbsbsbsbsbsbsbsbsbshshhshshshsbsbs

String: Plug&play, just wait more or less 1 minute and will automatically give you the output..

String: hindi kopa po nagagamit aantayin kopa yung usb dangle na type c sana hindi sira 30 days refund namanbilis ng deliver 2 days lang
hindi (state 4)
nagagamit (state 2)
hindi (state 6)
sira (state 3)

String: hindi kopa po nagagamit aantayin kopa yung usb dangle na type c sana hindi sira 30 days refund namanbilis ng deliver 2 days lang
hindi (state 4)
nagagamit (state 2)
hindi (s

In [None]:
sentiAutomata(Senti_FSA, 'Ok lng for the price kasi mura sya Adjustable din Useful nman for cellphone stand. Convenient to useFast delivery and well packed Recommend for not picky person')

NameError: name 'sentiAutomata' is not defined