## Multinomial Naive Bayes Classifier

### Example for Inside Custom Library

In [131]:
import pandas as pd
import numpy as np
import pickle
from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.naive_bayes import MultinomialNB
from Sastrawi.Stemmer.StemmerFactory import StemmerFactory
from Sastrawi.StopWordRemover.StopWordRemoverFactory import StopWordRemoverFactory

vectorizer = TfidfVectorizer()
naivebayes = MultinomialNB()
stemmer = StemmerFactory().create_stemmer()
remover = StopWordRemoverFactory().create_stop_word_remover()

In [132]:
class SentimentAnalysis(object):
    
    def __init__(self, train):
        self.loadStandardWords = pd.read_csv('standard_words.csv')
        self.datasets = pd.read_csv('dataset_raw.csv')
        self.train = train
        self.employeeId = [str(x) for x in list(train["pegawai_id_pegawai"])]
        self.teachId = [str(x) for x in list(train["ampu_id_ampu"])]
        self.standartWords = [str(x) for x in list(self.loadStandardWords["kata_baku"])]
        self.loadVector = None
        self.loadNB = None
        X = vectorizer.fit_transform(self.datasets.preprocessing_result.values.astype('U'))
        naivebayes.fit(X, self.datasets.sentimen)
        vectorFile = open('vectorizer.b', 'wb')
        nbFile = open('naive_bayes.b', 'wb')
        pickle.dump(vectorizer, vectorFile)
        pickle.dump(naivebayes, nbFile)
        vectorFile.close()  # close it to make sure it's all been written
        nbFile.close()  # close it to make sure it's all been written
        
    def getUniqueList(self, items):
        unique = list()
        for item in items:
            unique.append(item)
        return unique
    
    def loadPickle(self):
        self.loadVector = pickle.load(open('vectorizer.b', 'rb'), encoding='latin1')
        self.loadNB = pickle.load(open('naive_bayes.b', 'rb'), encoding='latin1')
        
    def preprocesing(self, documents):
        cleanText = list()
        for doc in documents:
            lowerText = doc.lower()
            stemmedText = stemmer.stem(lowerText)
            filteredText = remover.remove(stemmedText)
            for i, text in enumerate(self.loadStandardWords['vocabulary']):
                if filteredText == text:
                    filteredText = self.standartWords[i]
            cleanText.append(filteredText)
        return cleanText
    
    def predict(self):
        self.loadPickle() # Load pickle for vectorizer and naive bayes
        """
            Define variables
        """
        constructData = list()
        finalData = list()
        sentimentData = list()
        newSentimentData = list()
        saveIndex = list()
        total = 0
        sumPositive = 0
        sumNegative = 0
        sumNeutral = 0
        sumAll = 0
        totalPercentNeutral = 0
        totalPercentNegative = 0
        totalPercentPositive = 0
        answer = self.preprocesing(self.train.answer)
        """
            Get Prediction from Term Frequency
        """
        termFrequency = self.loadVector.transform(answer)
        for i in termFrequency:
            sentimentData.append(self.loadNB.predict(i))
            
        """
            Create custom data
        """
        for i, item in enumerate(answer):
            index = self.employeeId[i] + ',' + self.teachId[i]
            saveIndex.append(index)
            newSentimentData.append([index, [sentimentData[i]]])
        newSentimentData = self.getUniqueList(newSentimentData)
        cleanIndex = list(set(saveIndex))
        """
            Get Calculation
        """
        for i in cleanIndex:
            for j in newSentimentData:
                if str(i) == j[0]:
                    if np.int_(j[1]) == 0:
                        sumNegative += 1
                    elif np.int_(j[1]) == 2:
                        sumPositive += 1
                    elif np.int_(j[1]) == 1:
                        sumNeutral += 1
                    sumAll += 1
            percentNegative = round((float(sumNegative) / float(sumAll)) * 100, 2)
            percentNeutral = round((float(sumNeutral) / float(sumAll)) * 100, 2)
            percentPositive = round((float(sumPositive) / float(sumAll)) * 100, 2)
            totalPercentNegative += percentNegative
            totalPercentNeutral += percentNeutral
            totalPercentPositive += percentPositive
            constructData.append([i, percentPositive, percentNeutral, percentNegative])
        averagePositive = round(float(totalPercentPositive) / len(cleanIndex), 2)
        averageNeutral = round(float(totalPercentNeutral) / len(cleanIndex), 2)
        averageNegative = round(float(totalPercentPositive) / len(cleanIndex), 2)
        """
            Get final data that will ready used to dataframe
        """
        for i in constructData:
            n = i[0].split(',')
            dictionary = {
                'Id Dosen': n[0],
                'Id Mata Kuliah': n[1],
                'Sentimen Positif (%)': i[1],
                'Sentimen Netral (%)': i[2],
                'Sentimen Negatif (%)': i[3]
            }
            finalData.append(dictionary)
        return finalData

### Load Dataset

In [133]:
dataTrain = pd.read_csv('data_training.csv')
dataTrain

Unnamed: 0,answer,id_nilai,pegawai_id_pegawai,ampu_id_ampu,semester_id_semester,quest_1,quest_2,quest_3,quest_4,quest_5,quest_6,quest_7,quest_8,quest_9,quest_10,author
0,sangat on time. cara mengajarnya excellent.,15172,176,955,20141,4,4,4,4,4,4,4,4,4,4,198
1,dosen sudah mengajar dengan sangat baik,15173,176,955,20141,4,4,3,4,3,3,3,3,4,4,198
2,udah bagus cara mengajarnya,15174,176,955,20141,4,4,4,4,4,2,4,3,4,4,198
3,"cara mengajar menyenagkan, walau agak menegang...",15175,176,955,20141,4,4,4,4,4,2,4,4,4,4,198
4,saya harap dosen mampu mempertahankan cara men...,15176,176,955,20141,4,4,4,4,4,3,4,4,4,4,198
5,beri jeda di tengah-tengah pelajaran agar maha...,15177,176,955,20141,4,4,3,4,3,2,4,3,3,3,198
6,ketika memindah slide jangan terlalu cepat dan...,15178,176,955,20141,3,3,3,4,3,2,2,3,3,3,198
7,bu salamah excellent,15179,176,955,20141,4,4,4,4,4,3,4,4,4,4,198
8,"metode belajar jangan terlalu cepat, lebih ban...",15180,176,955,20141,3,4,4,4,4,3,3,3,3,4,198
9,jangan terlalu tegang dalam mengajar.,15181,176,955,20141,4,3,3,4,3,4,4,4,4,4,198


### Use Custom Library

In [134]:
sentimen = SentimentAnalysis(train=dataTrain)
result = sentimen.predict()
pd.DataFrame(result)

Unnamed: 0,Id Dosen,Id Mata Kuliah,Sentimen Negatif (%),Sentimen Netral (%),Sentimen Positif (%)
0,263,922,50.0,0.0,50.0
1,244,833,71.43,0.0,28.57
2,122,814,82.61,0.0,17.39
3,174,953,87.8,0.0,12.2
4,208,842,88.1,0.0,11.9
5,176,955,92.06,0.0,7.94
6,136,892,91.43,0.0,8.57
7,263,1007,88.31,1.3,10.39
8,225,900,88.75,1.25,10.0
9,176,961,86.0,2.0,12.0


## Example for Custom Library from File Python (.py)

In [135]:
from sentiment_analisys import SentimentAnalysis
import pandas as pd

### Load Dataset

In [136]:
dataTrain2 = pd.read_csv('data_training.csv')
dataTrain2

Unnamed: 0,answer,id_nilai,pegawai_id_pegawai,ampu_id_ampu,semester_id_semester,quest_1,quest_2,quest_3,quest_4,quest_5,quest_6,quest_7,quest_8,quest_9,quest_10,author
0,sangat on time. cara mengajarnya excellent.,15172,176,955,20141,4,4,4,4,4,4,4,4,4,4,198
1,dosen sudah mengajar dengan sangat baik,15173,176,955,20141,4,4,3,4,3,3,3,3,4,4,198
2,udah bagus cara mengajarnya,15174,176,955,20141,4,4,4,4,4,2,4,3,4,4,198
3,"cara mengajar menyenagkan, walau agak menegang...",15175,176,955,20141,4,4,4,4,4,2,4,4,4,4,198
4,saya harap dosen mampu mempertahankan cara men...,15176,176,955,20141,4,4,4,4,4,3,4,4,4,4,198
5,beri jeda di tengah-tengah pelajaran agar maha...,15177,176,955,20141,4,4,3,4,3,2,4,3,3,3,198
6,ketika memindah slide jangan terlalu cepat dan...,15178,176,955,20141,3,3,3,4,3,2,2,3,3,3,198
7,bu salamah excellent,15179,176,955,20141,4,4,4,4,4,3,4,4,4,4,198
8,"metode belajar jangan terlalu cepat, lebih ban...",15180,176,955,20141,3,4,4,4,4,3,3,3,3,4,198
9,jangan terlalu tegang dalam mengajar.,15181,176,955,20141,4,3,3,4,3,4,4,4,4,4,198


### Use Custom Library

In [137]:
sentimen2 = SentimentAnalysis(train=dataTrain)
result2 = sentimen2.predict()
pd.DataFrame(result2)

Unnamed: 0,Id Dosen,Id Mata Kuliah,Sentimen Negatif (%),Sentimen Netral (%),Sentimen Positif (%)
0,263,922,50.0,0.0,50.0
1,244,833,71.43,0.0,28.57
2,122,814,82.61,0.0,17.39
3,174,953,87.8,0.0,12.2
4,208,842,88.1,0.0,11.9
5,176,955,92.06,0.0,7.94
6,136,892,91.43,0.0,8.57
7,263,1007,88.31,1.3,10.39
8,225,900,88.75,1.25,10.0
9,176,961,86.0,2.0,12.0
