# Data Preprocessing
---

<div class="alert alert-block alert-danger">
<b>Warning:</b> Do not change this script
</div>

In [1]:
import pandas as pd
import swifter
import re

from tqdm.notebook import tqdm

import nltk
from nltk.stem import WordNetLemmatizer
from nltk.corpus import stopwords
from nltk.tokenize import word_tokenize
# nltk.download('punkt') --> uncomment if you haven't downloaded it yet
# nltk.download('wordnet') --> uncomment if you haven't downloaded it yet

from Sastrawi.StopWordRemover.StopWordRemoverFactory import StopWordRemoverFactory
from Sastrawi.Stemmer.StemmerFactory import StemmerFactory

In [2]:
path = "/home/jupyter/agra/source code/sentiment analytics/"

In [3]:
df = pd.read_csv(path + "data/resto-cleaned.csv")

In [4]:
df = df[df['language'] == 'in']

In [5]:
cols = ["text"]
df = df[cols]

## Preparation
---

In [6]:
def filtering(text):
    text = re.sub('[0-9]+', '', str(text)) #removing numberic value
    text = re.sub(r'#', '', text) #removing '#' symbol
    text = re.sub(r'[\n]+', '', text) # remove new line
    text = re.sub(r"^\s+|\s+$", "", text) #remove leading and trailing spaces in a word using OR sign to delete both
    text = re.sub(r" +", " ", text) #remove multiple space betwen words
    text = re.sub('https? :\/\/\S+', '', text) #removing hyperlink / URL
    text = re.sub(r"\b[a-zAZ]\b", "", text) #removing single char
    text = re.sub('\s+',' ',text) #removing multiple whitespace
    text = text.replace('\\t'," ").replace('\\n'," ").replace('\\u'," ").replace('\\',"") #remove tab, new line, and back slice
    text = re.sub(r'[^\w\s]', '', text) #remove puntuation& emoji (remove all besides \w > word dan \s > space)
    text = re.sub(r'(.)1+', r'1', text) # remove repeating character
    text = re.sub("[^a-zA-Z]",' ',text)
    
    return text

def casefoldingText(text): # Converting all the characters in a text into lower case
    text = text.lower() 
    return text

def tokenize(text) :
    text = nltk.word_tokenize(text)
    return text

with open(path + "dict/stopwords1.txt") as f:
    additional_stopwords1 = [line.strip() for line in f]

with open(path + "dict/stopwords2.txt") as f:
    additional_stopwords2 = [line.strip() for line in f]

with open(path + "dict/number-stopword.txt") as f:
    number_stopword = [line.strip() for line in f]
    
with open(path + "dict/calendar-words.txt") as f:
    calendar_stopword = [line.strip() for line in f]
    
with open(path + "dict/indonesian-region.txt") as f:
    region_stopword = [line.strip() for line in f]

with open(path + "dict/swear-words.txt") as f:
    swear_stopword = [line.strip() for line in f]
    
with open(path + "dict/resto-words.txt") as f:
    resto_stopword = [line.strip() for line in f]
    
def stopword(text): # Remove stopwors in a text
    listStopwords = set(stopwords.words('indonesian'))
    filtered = []
    for txt in text:
        if txt not in listStopwords:
            filtered.append(txt)
    text = filtered 
    return text

def Slangword(ulasan):
    kamusSlang = eval(open(path+"dict/combined_slang_words.txt").read())
    pattern = re.compile(r'\b( ' + '|'.join (kamusSlang.keys())+r')\b')
    content = []
    for kata in ulasan:
        filterSlang = pattern.sub(lambda x: kamusSlang[x.group()],kata)
        content.append(filterSlang.lower())
    ulasan = content
    return ulasan

def stopwords(words):
    return [word for word in words if word not in data]

def stemmed_wrapper(term):
    return stemmer.stem(term)

def stemmingText(document):
    return [term_dict[term] for term in document]

def check_exaggeration(text):
    """Mengidentifikasi apakah terdapat pengulangan huruf yang berlebihan dalam teks"""
    count = 0
    for i, char in enumerate(text):
        if i < len(text) - 1 and char == text[i + 1]:
            count += 1
    if count > len(text) * 0.3:
        return True
    else:
        return False
    
def repair_exaggeration(text):
    """Menghilangkan pengulangan huruf yang berlebihan dalam teks"""
    new_text = ""
    for i, char in enumerate(text):
        if i < len(text) - 1 and char == text[i + 1]:
            continue
        new_text += char
    return new_text

## Start Preprocessing
---

### Filtering

In [7]:
%time df['filtering'] = df['text'].swifter.apply(filtering)

Pandas Apply:   0%|          | 0/9502 [00:00<?, ?it/s]

CPU times: user 630 ms, sys: 11.3 ms, total: 641 ms
Wall time: 636 ms


In [8]:
print('before filtering : ', df['text'].iloc[1])
print('after filtering : ', df['filtering'].iloc[1])

before filtering :  @tanyarlfes Part time di kopi kenangan aja ada kalo gasalah khusus mahasiswa
after filtering :  tanyarlfes Part time di kopi kenangan aja ada kalo gasalah khusus mahasiswa


### Casefolding

In [9]:
%time df['casefolding'] = df['filtering'].swifter.apply(casefoldingText)

Pandas Apply:   0%|          | 0/9502 [00:00<?, ?it/s]

CPU times: user 57.7 ms, sys: 12.6 ms, total: 70.3 ms
Wall time: 71.9 ms


In [10]:
print('before casefolding : ', df['filtering'].iloc[1])
print('after casefolding : ', df['casefolding'].iloc[1])

before casefolding :  tanyarlfes Part time di kopi kenangan aja ada kalo gasalah khusus mahasiswa
after casefolding :  tanyarlfes part time di kopi kenangan aja ada kalo gasalah khusus mahasiswa


### Repair Exaggeration

In [11]:
%time df['check_exaggeration'] = df['casefolding'].swifter.apply(check_exaggeration)
check_exaggeration_df = df[df['check_exaggeration'] == True]

Pandas Apply:   0%|          | 0/9502 [00:00<?, ?it/s]

CPU times: user 253 ms, sys: 174 µs, total: 254 ms
Wall time: 254 ms


In [12]:
print("total exaggeration word : ", len(check_exaggeration_df))

total exaggeration word :  1


In [13]:
%time df['repair_exaggeration'] = df['casefolding'].swifter.apply(repair_exaggeration)

Pandas Apply:   0%|          | 0/9502 [00:00<?, ?it/s]

CPU times: user 319 ms, sys: 4.65 ms, total: 323 ms
Wall time: 318 ms


In [14]:
print('before repair_exaggeration :\n', df['casefolding'].iloc[3318],'\n')
print('after repair_exaggeration :\n', df['repair_exaggeration'].iloc[3318])

before repair_exaggeration :
 kamuuuuuuuuuu akuuuu akan menjadiii kopi kenangan 

after repair_exaggeration :
 kamu aku akan menjadi kopi kenangan


In [15]:
stop_factory = StopWordRemoverFactory()

# custom your stopword
more_stopword = additional_stopwords

data = stop_factory.get_stop_words()+more_stopword
stopword = stop_factory.create_stop_word_remover()

NameError: name 'additional_stopwords' is not defined

In [16]:
%time df['stopwords'] = df['repair_exaggeration'].swifter.apply(stopwords)

NameError: name 'data' is not defined

In [17]:
print('before stopwords :\n', df['repair_exaggeration'].iloc[2494],'\n')
print('after stopwords :\n', df['stopwords'].iloc[2494])

before stopwords :
 krsynt convomf es kopi kenangan mantan rizky bilar 



KeyError: 'stopwords'

### Tokenize

In [18]:
%time df['tokenize'] = df['repair_exaggeration'].swifter.apply(tokenize)

Pandas Apply:   0%|          | 0/9502 [00:00<?, ?it/s]

CPU times: user 2.06 s, sys: 28.2 ms, total: 2.08 s
Wall time: 2.18 s


In [19]:
print('before tokenize :\n', df['repair_exaggeration'].iloc[1],'\n')
print('after tokenize :\n', df['tokenize'].iloc[1])

before tokenize :
 tanyarlfes part time di kopi kenangan aja ada kalo gasalah khusus mahasiswa 

after tokenize :
 ['tanyarlfes', 'part', 'time', 'di', 'kopi', 'kenangan', 'aja', 'ada', 'kalo', 'gasalah', 'khusus', 'mahasiswa']


### Stopword

In [21]:
stop_factory = StopWordRemoverFactory()

# custom your stopword
more_stopword = ['jir','njir','bjir','juga', 'yang','yg', 'nya', 'aja', 'loh' ,'sih', 'deh', 'an', 'ro', 
                 'aj', 'kopi','kenangan','tanyarlfes','convomfs','FOODFESS2','FOODFESS', 'hehe', 'lho',
                'fodfes', 'worksfes', 'discountfes', 'bankneocommerce']

data = stop_factory.get_stop_words()+more_stopword+additional_stopwords1+additional_stopwords2+number_stopword+calendar_stopword+region_stopword+swear_stopword+resto_stopword
stopword = stop_factory.create_stop_word_remover()

In [22]:
%time df['stopword'] = df['tokenize'].swifter.apply(stopwords)

Pandas Apply:   0%|          | 0/9502 [00:00<?, ?it/s]

CPU times: user 3.35 s, sys: 8.2 ms, total: 3.36 s
Wall time: 3.42 s


In [23]:
print('before stopword :\n', df['tokenize'].iloc[1],'\n')
print('after stopword :\n', df['stopword'].iloc[1])

before stopword :
 ['tanyarlfes', 'part', 'time', 'di', 'kopi', 'kenangan', 'aja', 'ada', 'kalo', 'gasalah', 'khusus', 'mahasiswa'] 

after stopword :
 ['part', 'time', 'kalo', 'gasalah', 'khusus', 'mahasiswa']


### Formalization

In [24]:
%time df['formalisasi'] = df['stopword'].swifter.apply(Slangword)

Pandas Apply:   0%|          | 0/9502 [00:00<?, ?it/s]

CPU times: user 34.5 s, sys: 1.08 s, total: 35.6 s
Wall time: 37.7 s


In [25]:
print('before formalisasi :\n', df['stopword'].iloc[1],'\n')
print('after formalisasi :\n', df['formalisasi'].iloc[1])

before formalisasi :
 ['part', 'time', 'kalo', 'gasalah', 'khusus', 'mahasiswa'] 

after formalisasi :
 ['part', 'time', 'kalau', 'tidak salah', 'khusus', 'mahasiswa']


### Stemming

In [26]:
factory = StemmerFactory()
stemmer = factory.create_stemmer()

term_dict = {}

for document in df['formalisasi']:
    for term in document:
        if term not in term_dict:
            term_dict[term] = ' '

for term in tqdm(term_dict): term_dict[term] = stemmed_wrapper(term)
    # print(term,":" ,term_dict[term]) --> display text

  0%|          | 0/21271 [00:00<?, ?it/s]

In [27]:
%time df['stemming'] = df['formalisasi'].swifter.apply(stemmingText)

Pandas Apply:   0%|          | 0/9502 [00:00<?, ?it/s]

CPU times: user 105 ms, sys: 7.99 ms, total: 113 ms
Wall time: 127 ms


In [28]:
print('after stemming :\n', df['formalisasi'].iloc[1],'\n')
print('before stemming :\n', df['stemming'].iloc[1])

after stemming :
 ['part', 'time', 'kalau', 'tidak salah', 'khusus', 'mahasiswa'] 

before stemming :
 ['part', 'time', 'kalau', 'tidak salah', 'khusus', 'mahasiswa']


### Result

In [29]:
df.head()

Unnamed: 0,text,filtering,casefolding,check_exaggeration,repair_exaggeration,tokenize,stopword,formalisasi,stemming
0,"Pickup #kopi , tetap buka meski tanggal libur ...",Pickup kopi tetap buka meski tanggal libur Ja...,pickup kopi tetap buka meski tanggal libur ja...,False,pickup kopi tetap buka meski tangal libur jan ...,"[pickup, kopi, tetap, buka, meski, tangal, lib...","[pickup, buka, tangal, libur, ruko, kaliurang,...","[pickup, buka, tangal, libur, rumah toko, kali...","[pickup, buka, tangal, libur, rumah toko, kali..."
1,@tanyarlfes Part time di kopi kenangan aja ada...,tanyarlfes Part time di kopi kenangan aja ada ...,tanyarlfes part time di kopi kenangan aja ada ...,False,tanyarlfes part time di kopi kenangan aja ada ...,"[tanyarlfes, part, time, di, kopi, kenangan, a...","[part, time, kalo, gasalah, khusus, mahasiswa]","[part, time, kalau, tidak salah, khusus, mahas...","[part, time, kalau, tidak salah, khusus, mahas..."
2,"aku tadi keliling"" naik motor sambil dengerin ...",aku tadi keliling naik motor sambil dengerin r...,aku tadi keliling naik motor sambil dengerin r...,False,aku tadi keliling naik motor sambil dengerin r...,"[aku, tadi, keliling, naik, motor, sambil, den...","[keliling, motor, dengerin, radiohead, pesen, ...","[keliling, motor, dengerin, radiohead, pesan, ...","[keliling, motor, dengerin, radiohead, pesan, ..."
3,"wah, kopi kenangan nya sudah selesai. Makasih ...",wah kopi kenangan nya sudah selesai Makasih ko...,wah kopi kenangan nya sudah selesai makasih ko...,False,wah kopi kenangan nya sudah selesai makasih ko...,"[wah, kopi, kenangan, nya, sudah, selesai, mak...","[selesai, makasih, pahit, manis, harihari, ked...","[selesai, terima kasih, pahit, manis, harihari...","[selesai, terima kasih, pahit, manis, harihari..."
4,Menutup tahun dengan kenangan.\nKopi kenangan ...,Menutup tahun dengan kenanganKopi kenangan dan...,menutup tahun dengan kenangankopi kenangan dan...,False,menutup tahun dengan kenangankopi kenangan dan...,"[menutup, tahun, dengan, kenangankopi, kenanga...","[menutup, kenangankopi, konser, lys, htpscostp...","[menutup, kenangankopi, konser, lys, htpscostp...","[tutup, kenangankopi, konser, lys, htpscostpso..."


In [30]:
df.to_csv(path + "data/resto-preprocess-2.csv", index=False)

# Data Labelling
---

<div class="alert alert-block alert-danger">
<b>Warning:</b> Do not change this script
</div>

## Preparation
---

In [31]:
# Loads lexicon positive and negative data
lexicon_positive = dict()
import csv
with open(path + 'dict/lexicon_positive.csv', 'r') as csvfile:
    reader = csv.reader(csvfile, delimiter=',')
    for row in reader:
        lexicon_positive[row[0]] = int(row[1])

lexicon_negative = dict()
import csv
with open(path + 'dict/lexicon_negative.csv', 'r') as csvfile:
    reader = csv.reader(csvfile, delimiter=',')
    for row in reader:
        lexicon_negative[row[0]] = int(row[1])
        
# Function to determine sentiment polarity of tweets        
def sentiment_analysis_lexicon_indonesia(text):
    #for word in text:
    score = 0
    for word in text:
        if (word in lexicon_positive):
            score = score + lexicon_positive[word]
    for word in text:
        if (word in lexicon_negative):
            score = score + lexicon_negative[word]
    polarity=''
    if (score > 0):
        polarity = 'positive'
    elif (score < 0):
        polarity = 'negative'
    else:
        polarity = 'neutral'
    return score, polarity

## Start Labelling using Lexicon
---

In [32]:
%time results = df['stemming'].swifter.apply(sentiment_analysis_lexicon_indonesia)
results = list(zip(*results))

Pandas Apply:   0%|          | 0/9502 [00:00<?, ?it/s]

CPU times: user 86 ms, sys: 7.99 ms, total: 94 ms
Wall time: 91.6 ms


In [33]:
df['polarity_score'] = results[0]
df['polarity'] = results[1]
print(df['polarity'].value_counts())

negative    4916
neutral     2357
positive    2229
Name: polarity, dtype: int64


### Result

In [34]:
df.to_csv(path + "data/resto-labelling-2.csv", index=False)

In [35]:
df.head()

Unnamed: 0,text,filtering,casefolding,check_exaggeration,repair_exaggeration,tokenize,stopword,formalisasi,stemming,polarity_score,polarity
0,"Pickup #kopi , tetap buka meski tanggal libur ...",Pickup kopi tetap buka meski tanggal libur Ja...,pickup kopi tetap buka meski tanggal libur ja...,False,pickup kopi tetap buka meski tangal libur jan ...,"[pickup, kopi, tetap, buka, meski, tangal, lib...","[pickup, buka, tangal, libur, ruko, kaliurang,...","[pickup, buka, tangal, libur, rumah toko, kali...","[pickup, buka, tangal, libur, rumah toko, kali...",3,positive
1,@tanyarlfes Part time di kopi kenangan aja ada...,tanyarlfes Part time di kopi kenangan aja ada ...,tanyarlfes part time di kopi kenangan aja ada ...,False,tanyarlfes part time di kopi kenangan aja ada ...,"[tanyarlfes, part, time, di, kopi, kenangan, a...","[part, time, kalo, gasalah, khusus, mahasiswa]","[part, time, kalau, tidak salah, khusus, mahas...","[part, time, kalau, tidak salah, khusus, mahas...",-1,negative
2,"aku tadi keliling"" naik motor sambil dengerin ...",aku tadi keliling naik motor sambil dengerin r...,aku tadi keliling naik motor sambil dengerin r...,False,aku tadi keliling naik motor sambil dengerin r...,"[aku, tadi, keliling, naik, motor, sambil, den...","[keliling, motor, dengerin, radiohead, pesen, ...","[keliling, motor, dengerin, radiohead, pesan, ...","[keliling, motor, dengerin, radiohead, pesan, ...",5,positive
3,"wah, kopi kenangan nya sudah selesai. Makasih ...",wah kopi kenangan nya sudah selesai Makasih ko...,wah kopi kenangan nya sudah selesai makasih ko...,False,wah kopi kenangan nya sudah selesai makasih ko...,"[wah, kopi, kenangan, nya, sudah, selesai, mak...","[selesai, makasih, pahit, manis, harihari, ked...","[selesai, terima kasih, pahit, manis, harihari...","[selesai, terima kasih, pahit, manis, harihari...",10,positive
4,Menutup tahun dengan kenangan.\nKopi kenangan ...,Menutup tahun dengan kenanganKopi kenangan dan...,menutup tahun dengan kenangankopi kenangan dan...,False,menutup tahun dengan kenangankopi kenangan dan...,"[menutup, tahun, dengan, kenangankopi, kenanga...","[menutup, kenangankopi, konser, lys, htpscostp...","[menutup, kenangankopi, konser, lys, htpscostp...","[tutup, kenangankopi, konser, lys, htpscostpso...",-2,negative


## Comparasion Sentiment Polarity on Tweets Data

In [36]:
pd.set_option('display.max_colwidth', 3000)
positive_tweets = df[df['polarity'] == 'positive']
positive_tweets = positive_tweets[['text', 'polarity_score', 'polarity']].sort_values(by = 'polarity_score', ascending=False).reset_index(drop = True)
positive_tweets.index += 1
positive_tweets[0:10]

Unnamed: 0,text,polarity_score,polarity
1,"Selamat menjelang subuh pelangi-ku untuk awal minggu ini, nikmatilah Seteguk Kopi kentalmu demi meresapi semua kenangan yang sudah berlalu. Seperti mentari yang tiap pagi terbit. Tanpa peduli suka atau tidak suka. https://t.co/LXE4FnvDCX",24,positive
2,"5. Melakukan Inovasi Produk\n\nKopi Kenangan berinovasi menjual produk kopi siap minum dalam bentuk botol yang dijual di supermarket. Jadi, bestie bisa lebih mudah ditemukan oleh para penikmat kopi. \n\nMalah sekarang juga iklannya juga udah berseliweran ya di tv~ https://t.co/caLUZC7d8u",23,positive
3,bos ni emang kadang kadang kadang kadang ni. dari pagi buta cuaca hujan dingin pol eh tiba2 dateng kopi kenangan. dikasih asupan es ☺🤝🏻 makasih banyak pak bos,21,positive
4,"@Bang_Garr Fii amanillah Bang. Selamat menikmati bandara baru Jogja, selamat menikmati suasana Jogja yang terbuat dari rindu dan kenangan 😊✌ besok cobain kopi joss Bang, kopi kasih arang panas membara 😁",21,positive
5,"Bismillah\nThrowback desain 2017\n\nFull porto: https://t.co/f6hBesW31h\n\nYang butuh jasa desain, jasa bikin logo olshop, ilustrasi, jasa desain feed ig, dll bisa dm yaa :)\n\nNasi Goreng Solaria #BongkarPembantaiKM50 #BinJin UU ITE Sambo RANS PIK Kopi Kenangan https://t.co/sNuI7S3CGy",20,positive
6,Thank you @belajarlagiHQ for a memorable (7+) 5-week bootcamp! The best team there is. \nSpecial thanks untuk faciku @budak_milktea yang selalu menyemangati. Dan teman2 tim 9 para penyuka kopi kenangan. https://t.co/13yPwyw4J1,19,positive
7,"@hanyauntukmu_kk Moment paling cocok itu pas weekend dong, kumpul bareng bestie sambil ngobrol2 santai, nostalgia kenangan masa lalu hahaha tentunya dgn ditemani kopi kenangan yg pastinya bikin suasananya makin seru #KenanganHanyaUntukmu\n\nNgopi bareng yuk bestie🥰 @ajekrina @tweetyone_ @sugaaBST",18,positive
8,Kita patut bangga &amp; mendukung Kopi Kenangan sebagai brand kopi lokal yg sukses. Growth nya yg pesat ini membuat mereka berambisi menyaingi brand kopi dunia.\n\nMenurut kamu strategi apa yg menarik dari Kopi Kenangan ini? Reply dibawah yuk!,17,positive
9,"Trik membuat sepatu menjadi kinclong hanya sekejap. Foam pembersih sepatu, Viral nih🤭\n\nBeli👉 https://t.co/ovUrbMI7mY\n\nSusilo Bambang Yudhoyono Habib Bahar 17 Agustus Sambo Kopi Kenangan Bakmi GM Enzy Cakep Lomba Paket D Presiden Joko Widodo UU ITE #BongkarPembantaiKM50 https://t.co/NFNWbge8WS",17,positive
10,"Besok 12.12, beberapa gerai makanan dan minuman menyediakan diskon untuk promo 12.12. Kopi Kenangan menyediakan promo beli 2 kopi hanya Rp 29.000.\n#besok1212 #harbolnas #harbolnas1212 #promo1212 #diskon #promo #starbuck #kopikenangan #chigo\nhttps://t.co/OajX0D5dAt",16,positive


In [37]:
pd.set_option('display.max_colwidth', 3000)
negative_tweets = df[df['polarity'] == 'negative']
negative_tweets = negative_tweets[['text', 'polarity_score', 'polarity']].sort_values(by = 'polarity_score', ascending=True)[0:10].reset_index(drop = True)
negative_tweets.index += 1
negative_tweets[0:10]

Unnamed: 0,text,polarity_score,polarity
1,"Anjgg ASAM LAMBUNG INI KEKNYA GUA, ANJGG KOK TUMBEN PARAH BGT\n\nSAMPE GUE MENGGIGIL + MUAL BGT, DADA GUE BENERAN KEK GMN YA, YAA YG PUNYA ASAM LAMBUNG PASTI PAHAMM\n\nmasa gegara kopi kenangan doang anjird, tumbenan bgt ini guaa😭😭😭 mana besok kuliah offlen huweeeee sedih bgtt",-51,negative
2,"@orgilthingy binar nakal, NAKAL BANGET. dia kalau disuruh buat berhenti minum kopi atau tidur cepet susahnya Allahuakbar. nyebut pokoke, ada aja yang dijawab. no kopi no life. mumet, mual, meriang bukan sanmol yang dicari, tapi kopi kenangan huft. anaknya bertanggungjawab, suka NGOMEL ANJIR.",-47,negative
3,sebenarnya hari ini gw ga mau keluar. gw boongin anak ini gw ada di kopi kenangan neo soho padahal kemaren. eh anjir pas dia di sbux neo gw di samperin tapi gw ga ada wkwk\n\nmau ga mau gw yg nyamperin dia ke sbux neo\n\nini anak WA dia sering gw block dan gw ga pernah save nomor dia https://t.co/q6fhqyRgsb,-45,negative
4,"@PolJokesID mungkin sebaiknya malah menuntut perusahaan pemerintah yang ga sesuai namanya.\nPDAM itu perusahaan daerah air minum tapi boro2 airnya bisa diminum kalo ga mati, buteknya kaya warnanya kopi kenangan 😅\nnegara lain air keran bisa langsung diminum loh 🙏🏻",-43,negative
5,demi Allah gue gugup bgt masuk bioskop gara gara bawa kopi kenangan trs saking gugupnya malah jd ngaku sendiri anjggg “mas kalo bawa minuman dr luar nitip dmn ya?”,-40,negative
6,"pengen cobain kopi... selama ini gw gapernah cobain janjiw, fore, kenangan, dll. gak tau knp ya akhir2 ini pgn minum kopi gara2 nonton hospital playlist, karna pada mesen kopi mulu di dramanya hhh. trus jd penasaran, apa semua kopi bikin gak ngantuk? 🤔",-40,negative
7,"ada deng kesedihan dikitt, ditolak kopi kenangan, tpi ga sedih sedih bgt karna bodo amat bleee",-38,negative
8,@amourdem4vie YA KANNN ANJR!!! gue terakhir minum kopi kenangan malah jantung gue detak kenceng bgt ampe gua panik klo itu hari terakhir gue napas. abis itu gue kapok gamau beli kopi kopian,-38,negative
9,"@convomfs tapi sejak ada kopi kenangan aku udh ga merenung lagi di tempat kopi karen selalu ambil mantanchino🤣\nmerenung dikit di tempat teh nyari yang promo kalo ga ada ambilnya ichitan thaitea atau numilktea\ndi yogurt ambil cimory squeeze taro atau ketan item, susu cimory regal",-37,negative
10,Demi allah gak lagi lagi gue minum kopi kenangan jam 10 malem. Gini hari belum tidur juga ya allahhh padahal badan rasanya remuk banget 3 minggu berturut2 masuk sore tapi mata gamau meremmm😭😭😭😭😭😭😭,-37,negative


In [38]:
pd.set_option('display.max_colwidth', 3000)
neutral_tweets = df[df['polarity'] == 'neutral']
neutral_tweets = neutral_tweets[['text', 'polarity_score', 'polarity']].sort_values(by = 'polarity_score', ascending=True)[0:10].reset_index(drop = True)
neutral_tweets.index += 1
neutral_tweets[0:10]

Unnamed: 0,text,polarity_score,polarity
1,btw logonya mbc gayo ngingetin sama kopi kenangan dah wkwk,0,neutral
2,@lewd_pics_jpg Kopi kenangan ya,0,neutral
3,kata temenku mending kopi kenangan yg botol https://t.co/0b69yHkkwr,0,neutral
4,@andalasfess kopi kenangan,0,neutral
5,@clearindigo Some time... mau kopi kenangan,0,neutral
6,@rubah_galak kopi kenangan,0,neutral
7,Info ngopi di Banyuwangi yang enak di mana genkss?\n\nAla ala kopi kenangan tapi yang ori Banyuwangi 😉🙏,0,neutral
8,@sbyfess Aku kopi kenangan spbu ngagel 😭,0,neutral
9,Kopi kenangan tetep no 1 https://t.co/35GWk7edx4,0,neutral
10,@FFOODFESS yang kopi kenangan itu?,0,neutral
