<h1>Analisis Sentimen Terhadap Presiden Jokowi</h1>

In [1]:
import re as regex
import pandas as pd
from Sastrawi.Stemmer.StemmerFactory import StemmerFactory
from Sastrawi.StopWordRemover.StopWordRemoverFactory import StopWordRemoverFactory

<p>Data berikut diperoleh dari komentar warganet terhadap Presiden Jokowi di Instagram. Pengumpulan data dilakukan dengan melakukan pencarian berdasarkan tag #jokowi</p>

In [2]:
data = pd.read_csv("dataset jokowi.csv", encoding = "ISO-8859-1")

arr_data = []
for index, row in data.iterrows():
    map_data = {}
    r = row[0].split(";")
    map_data["label"] = r[0]
    map_data["kalimat"] = r[1]
    
    if not pd.isnull(row[1]):
        map_data["kalimat"] += row[1]
    arr_data.append(map_data)
print(arr_data[19])

{'label': 'pos', 'kalimat': 'MAJU TRUSS PAK JOKOWI!!!'}


<h2><i>Preprocessing</i></h2>

<h4>Symbol Removal</h4>

In [3]:
symbolless = []
for d in arr_data:
    sentence = regex.sub("[^a-zA-Z0-9 ]","",d["kalimat"])
    symbolless.append(sentence)
print(symbolless[19])

MAJU TRUSS PAK JOKOWI


<h4>Casefolding</h4>

In [4]:
cases = []
for s in symbolless:
    sentence = s.lower()
    cases.append(sentence)
print(cases[19])

maju truss pak jokowi


<h4>Stopword Removal</h4>

In [25]:
factory = StopWordRemoverFactory()
stopwords = factory.get_stop_words()
stopwords.remove('enggak')
stopwords.remove('tidak')
stopwords.remove('tidaklah')

stopwords_remover = factory.create_stop_word_remover()

stopwordless = []

for c in cases:
    c = stopwords_remover.remove(c)
    stopwordless.append(c)
    
print(stopwordless[19])

maju truss jokowi


<h4>Tokenization</h4>

In [7]:
tokens = []

for c in cases:
    token_data = c.split(" ")
    tokens.append(token_data)
print(tokens[19])

['maju', 'truss', 'pak', 'jokowi']


<h4>Stemming</h4>

In [12]:
factory = StemmerFactory()
stemmer = factory.create_stemmer()

stem_word = []

for t in tokens:
    comment = []
    for w in t:
        stem_data = stemmer.stem(w)
        comment.append(stem_data)
    stem_word.append(comment)
print(stem_word[19])

['maju', 'truss', 'pak', 'jokowi']


#### Slang Word

In [49]:
slang = []
Kata= {"truss": "terus"}   
for s in symbolless:
    sentence = s.lower()
    slang.append(sentence)

sentence_list = slang[19].split()
new_sentence = []

for word in sentence_list:
    for candidate_replacement in Kata:
        if candidate_replacement in word:
            word = word.replace(candidate_replacement, Kata[candidate_replacement])

    new_sentence.append(word)

rfrm = " ".join(new_sentence)
print(rfrm)

maju terus pak jokowi


<h2>Naive Bayes</h2>

<p>Corpus untuk proses Naive Bayes</p>

In [129]:
nb_corpus = []

In [141]:

ALPHA = 1

def search_corpus(word):
    for i in range(len(nb_corpus)):
         if nb_corpus[i]["kata"] == word:
            return nb_corpus[i], i
    return({}, -1)

def naive_bayes_formula(posneg_number,total_in_corpus):
    if total_in_corpus == 0:
        return 0
    return((posneg_number + ALPHA)/total_in_corpus)

def add_into_corpus(data):
    search_result, index = search_corpus(data['kata'])
    if not search_result:
        nb_corpus.append(data)
    else:
        nb_corpus[index]["pos"] += 1
        nb_corpus[index]["neg"] += 1
        if data["pos"] > data["neg"]:
            nb_corpus[index]["neg"] -= 1
        elif data["neg"] > data["pos"]:
            nb_corpus[index]["pos"] -= 1

def naivebayes(single_data):
    arr_data = []
    
    total_pos = 1
    total_neg = 1
    
    for word in single_data:
        data_in_corpus, index = search_corpus(word)
        pos = 1
        neg = 1
        if data_in_corpus:
            pos = naive_bayes_formula(data_in_corpus["pos"], (data_in_corpus["pos"] + data_in_corpus["neg"]))
            neg = naive_bayes_formula(data_in_corpus["neg"], (data_in_corpus["pos"] + data_in_corpus["neg"]))
        add_into_corpus({
        "kata": word, 
        "pos": pos,
        "neg": neg
        })
        total_pos *= pos
        total_neg *= neg
    
    label = "positive"
    
    if total_neg > total_pos:
        label = "negative"
    elif total_neg == total_pos:
        label = "neutral"
        
    print(single_data,"     ",label)

<p>Training dilakukan dengan menggunakan 10 data positif dan negatif</p>

In [142]:
training_data = stem_word[0:7] + stem_word[16:17]
training_raw_data = arr_data[0:7] + arr_data[16:17]
print(training_raw_data)

[{'label': 'neg', 'kalimat': 'segala difoto biar apa'}, {'label': 'neg', 'kalimat': 'pencitraan terus'}, {'label': 'neg', 'kalimat': 'Datang cuman pelanga plongo'}, {'label': 'neg', 'kalimat': 'lambatnya bertindak'}, {'label': 'neg', 'kalimat': 'Datang cuma foto doang.'}, {'label': 'pos', 'kalimat': 'Semangat pak kami semua mendukung bapak'}, {'label': 'pos', 'kalimat': 'Sehat terus pak. Tetap berjiwa besar dan selalu jadi sosok yang penyabar atas nyinyiran orang yg tak tau apa apa tentang dirimu'}, {'label': 'pos', 'kalimat': 'Masyaallah pak.. Fighting.'}]


<p>Berikut proses training Naive Bayes</p>

In [143]:
for i in range(len(training_data)):
    pos = 0
    neg = 0
    
    if training_raw_data[i]['label'] == 'pos':
        pos = 1
    else: 
        neg = 1
    
    for j in range(len(training_data[i])):
        add_into_corpus({'kata': training_data[i][j], 'pos': pos, 'neg': neg})
        

for d in nb_corpus:
    print(d['kata'],": ",d['pos']," ",d['neg'])

segala :  0   7
foto :  0   26
biar :  0   7
apa :  18   3
citra :  0   23
terus :  27   27
datang :  0   22
cuman :  0   11
pelanga :  0   7
plongo :  0   7
lambat :  0   7
tindak :  0   7
cuma :  0   15
doang :  0   15
semangat :  39   0
pak :  97   0
kami :  11   0
semua :  31   0
dukung :  7   0
bapak :  43   0
sehat :  31   0
tetap :  7   0
jiwa :  7   0
besar :  7   0
dan :  23   0
selalu :  7   0
jadi :  11   0
sosok :  7   0
yang :  11   0
sabar :  15   0
atas :  15   0
nyinyir :  7   0
orang :  11   0
yg :  19   0
tak :  7   0
tau :  11   0
tentang :  7   0
diri :  19   0
masyaallah :  7   0
fighting :  7   0
kalo :  4   4
emang :  4   4
indonesia :  28   28
mau :  12   12
maju :  8   8
pikir :  4   4
warga :  8   8
ga :  12   12
harus :  8   8
sempit :  4   4
ya :  12   12
masa :  4   4
presiden :  40   40
sendiri :  12   12
ngebakar :  4   4
wilayah :  4   4
wkwkwkwk :  4   4
mantap :  4   4
jok :  4   4
kali :  4   4
ini :  20   20
akting :  4   4
gak :  12   12
nih :  4   

<p>Berikut hasil dari proses Naive Bayes terhadap seluruh data</p>

In [145]:
for word in stem_word:
    naivebayes(word)

['segala', 'foto', 'biar', 'apa']       negative
['citra', 'terus']       negative
['datang', 'cuman', 'pelanga', 'plongo']       negative
['lambat', 'tindak']       negative
['datang', 'cuma', 'foto', 'doang']       negative
['semangat', 'pak', 'kami', 'semua', 'dukung', 'bapak']       positive
['sehat', 'terus', 'pak', 'tetap', 'jiwa', 'besar', 'dan', 'selalu', 'jadi', 'sosok', 'yang', 'sabar', 'atas', 'nyinyir', 'orang', 'yg', 'tak', 'tau', 'apa', 'apa', 'tentang', 'diri']       positive
['kalo', 'emang', 'indonesia', 'mau', 'maju', 'pikir', 'warga', 'ga', 'harus', 'sempit', 'ya', 'masa', 'presiden', 'sendiri', 'ngebakar', 'wilayah', 'sendiri']       neutral
['wkwkwkwk', 'mantap', 'jok', 'kali', 'ini', 'akting', 'gak', 'sendiri', 'nih', 'yeee']       neutral
['kalau', 'foto', 'foto', 'doang', 'semua', 'orang', 'jugha', 'bisa', 'kayak', 'gitu', 'di', 'bilang', 'prestasi', 'haha', 'lucu', 'negriku']       negative
['mundur', 'saja', 'pakgak', 'kan', 'bisa', 'bapak', 'urus', 'negara', 

<p>Berikut keadaan corpus Naive Bayes setelah pemrosesan</p>

In [147]:
for d in nb_corpus:
    print(d['kata'],": ",d['pos']," ",d['neg'])

segala :  0   9
foto :  0   36
biar :  0   9
apa :  24   3
citra :  0   33
terus :  39   39
datang :  0   30
cuman :  0   15
pelanga :  0   9
plongo :  0   9
lambat :  0   9
tindak :  0   9
cuma :  0   21
doang :  0   21
semangat :  57   0
pak :  141   0
kami :  15   0
semua :  45   0
dukung :  9   0
bapak :  63   0
sehat :  45   0
tetap :  9   0
jiwa :  9   0
besar :  9   0
dan :  33   0
selalu :  9   0
jadi :  15   0
sosok :  9   0
yang :  15   0
sabar :  21   0
atas :  21   0
nyinyir :  9   0
orang :  15   0
yg :  27   0
tak :  9   0
tau :  15   0
tentang :  9   0
diri :  27   0
masyaallah :  9   0
fighting :  9   0
kalo :  6   6
emang :  6   6
indonesia :  42   42
mau :  18   18
maju :  12   12
pikir :  6   6
warga :  12   12
ga :  18   18
harus :  12   12
sempit :  6   6
ya :  18   18
masa :  6   6
presiden :  60   60
sendiri :  18   18
ngebakar :  6   6
wilayah :  6   6
wkwkwkwk :  6   6
mantap :  6   6
jok :  6   6
kali :  6   6
ini :  30   30
akting :  6   6
gak :  18   18
nih 