### Import Library

In [1]:
import pandas as pd
import re
import string
import nltk
nltk.download('punkt')
nltk.download('stopwords')

from nltk.corpus import stopwords

!pip install Sastrawi

from Sastrawi.Stemmer.StemmerFactory import StemmerFactory


[nltk_data] Downloading package punkt to
[nltk_data]     C:\Users\user\AppData\Roaming\nltk_data...
[nltk_data]   Package punkt is already up-to-date!
[nltk_data] Downloading package stopwords to
[nltk_data]     C:\Users\user\AppData\Roaming\nltk_data...
[nltk_data]   Unzipping corpora\stopwords.zip.


Collecting Sastrawi
  Downloading Sastrawi-1.0.1-py2.py3-none-any.whl (209 kB)
Installing collected packages: Sastrawi
Successfully installed Sastrawi-1.0.1


### Import Data

In [2]:
df = pd.read_csv('dataset_tweet_sentiment_cellular_service_provider.csv')

In [3]:
df.head()

Unnamed: 0,Id,Sentiment,Text Tweet
0,1,positive,<USER_MENTION> #BOIKOT_<PROVIDER_NAME> Gunakan...
1,2,positive,"Saktinya balik lagi, alhamdulillah :v <PROVIDE..."
2,3,negative,Selamat pagi <PROVIDER_NAME> bisa bantu kenap...
3,4,negative,Dear <PROVIDER_NAME> akhir2 ini jaringan data ...
4,5,negative,Selamat malam PENDUSTA <PROVIDER_NAME>


In [4]:
df = df.rename(columns={"Sentiment": "label", "Text Tweet": "text_tweet"})

In [5]:
df['label'].value_counts()

negative    161
positive    139
Name: label, dtype: int64

In [6]:
df = df[['text_tweet','label']]

In [7]:
df

Unnamed: 0,text_tweet,label
0,<USER_MENTION> #BOIKOT_<PROVIDER_NAME> Gunakan...,positive
1,"Saktinya balik lagi, alhamdulillah :v <PROVIDE...",positive
2,Selamat pagi <PROVIDER_NAME> bisa bantu kenap...,negative
3,Dear <PROVIDER_NAME> akhir2 ini jaringan data ...,negative
4,Selamat malam PENDUSTA <PROVIDER_NAME>,negative
...,...,...
295,"Pantesan lancar ya, sinyal <PROVIDER_NAME> yan...",positive
296,Alhamdulillah lancar pakai <PROVIDER_NAME>,positive
297,"Untung pakai internet <PROVIDER_NAME>, lancar,...",positive
298,"di tempat ramai seperti di lokasi wisata, <PRO...",positive


## Data Preparation

### Data Cleaning

### Cek Data Kosong dan Data Duplikat

In [8]:
df.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 300 entries, 0 to 299
Data columns (total 2 columns):
 #   Column      Non-Null Count  Dtype 
---  ------      --------------  ----- 
 0   text_tweet  300 non-null    object
 1   label       300 non-null    object
dtypes: object(2)
memory usage: 4.8+ KB


In [9]:
df.isna().sum()

text_tweet    0
label         0
dtype: int64

In [10]:
df.duplicated().sum()

2

In [12]:
df = df.drop_duplicates().reset_index(drop=True)
df

Unnamed: 0,text_tweet,label
0,<USER_MENTION> #BOIKOT_<PROVIDER_NAME> Gunakan...,positive
1,"Saktinya balik lagi, alhamdulillah :v <PROVIDE...",positive
2,Selamat pagi <PROVIDER_NAME> bisa bantu kenap...,negative
3,Dear <PROVIDER_NAME> akhir2 ini jaringan data ...,negative
4,Selamat malam PENDUSTA <PROVIDER_NAME>,negative
...,...,...
293,"Pantesan lancar ya, sinyal <PROVIDER_NAME> yan...",positive
294,Alhamdulillah lancar pakai <PROVIDER_NAME>,positive
295,"Untung pakai internet <PROVIDER_NAME>, lancar,...",positive
296,"di tempat ramai seperti di lokasi wisata, <PRO...",positive


## Tokenisasi

Proses memisahkan kata, simbol, frase dari sebuah text

In [13]:
df['tokenize'] = df.apply(lambda x: nltk.word_tokenize(x['text_tweet']),axis=1)

In [14]:
df

Unnamed: 0,text_tweet,label,tokenize
0,<USER_MENTION> #BOIKOT_<PROVIDER_NAME> Gunakan...,positive,"[<, USER_MENTION, >, #, BOIKOT_, <, PROVIDER_N..."
1,"Saktinya balik lagi, alhamdulillah :v <PROVIDE...",positive,"[Saktinya, balik, lagi, ,, alhamdulillah, :, v..."
2,Selamat pagi <PROVIDER_NAME> bisa bantu kenap...,negative,"[Selamat, pagi, <, PROVIDER_NAME, >, bisa, ban..."
3,Dear <PROVIDER_NAME> akhir2 ini jaringan data ...,negative,"[Dear, <, PROVIDER_NAME, >, akhir2, ini, jarin..."
4,Selamat malam PENDUSTA <PROVIDER_NAME>,negative,"[Selamat, malam, PENDUSTA, <, PROVIDER_NAME, >]"
...,...,...,...
293,"Pantesan lancar ya, sinyal <PROVIDER_NAME> yan...",positive,"[Pantesan, lancar, ya, ,, sinyal, <, PROVIDER_..."
294,Alhamdulillah lancar pakai <PROVIDER_NAME>,positive,"[Alhamdulillah, lancar, pakai, <, PROVIDER_NAM..."
295,"Untung pakai internet <PROVIDER_NAME>, lancar,...",positive,"[Untung, pakai, internet, <, PROVIDER_NAME, >,..."
296,"di tempat ramai seperti di lokasi wisata, <PRO...",positive,"[di, tempat, ramai, seperti, di, lokasi, wisat..."


### Case Folding & Remove Punctuation

In [15]:
def clean_text(text):
    text = text.lower() #lowercase atau case folding
    text = re.sub('@[^\s]+', '', text) #remove username
    text = re.sub('\[.*?\]', '', text) # remove square brackets
    text = re.sub('((www\.[^\s]+)|(https?://[^\s]+))', '', text) # remove URLs
    text = re.sub('[%s]' % re.escape(string.punctuation), '', text) # remove punctuation
    text = re.sub('\w*\d\w*', '', text) 
    text = re.sub('[‘’“”…]', '', text)
    text = re.sub('\n', '', text)
    return text

In [16]:
clean1 = lambda x: clean_text(x)

df['clean1'] = pd.DataFrame(df['text_tweet'].apply(clean1))
df

Unnamed: 0,text_tweet,label,tokenize,clean1
0,<USER_MENTION> #BOIKOT_<PROVIDER_NAME> Gunakan...,positive,"[<, USER_MENTION, >, #, BOIKOT_, <, PROVIDER_N...",usermention boikotprovidername gunakan produk ...
1,"Saktinya balik lagi, alhamdulillah :v <PROVIDE...",positive,"[Saktinya, balik, lagi, ,, alhamdulillah, :, v...",saktinya balik lagi alhamdulillah v providername
2,Selamat pagi <PROVIDER_NAME> bisa bantu kenap...,negative,"[Selamat, pagi, <, PROVIDER_NAME, >, bisa, ban...",selamat pagi providername bisa bantu kenapa d...
3,Dear <PROVIDER_NAME> akhir2 ini jaringan data ...,negative,"[Dear, <, PROVIDER_NAME, >, akhir2, ini, jarin...",dear providername ini jaringan data lemot ban...
4,Selamat malam PENDUSTA <PROVIDER_NAME>,negative,"[Selamat, malam, PENDUSTA, <, PROVIDER_NAME, >]",selamat malam pendusta providername
...,...,...,...,...
293,"Pantesan lancar ya, sinyal <PROVIDER_NAME> yan...",positive,"[Pantesan, lancar, ya, ,, sinyal, <, PROVIDER_...",pantesan lancar ya sinyal providername yang l...
294,Alhamdulillah lancar pakai <PROVIDER_NAME>,positive,"[Alhamdulillah, lancar, pakai, <, PROVIDER_NAM...",alhamdulillah lancar pakai providername
295,"Untung pakai internet <PROVIDER_NAME>, lancar,...",positive,"[Untung, pakai, internet, <, PROVIDER_NAME, >,...",untung pakai internet providername lancar jadi...
296,"di tempat ramai seperti di lokasi wisata, <PRO...",positive,"[di, tempat, ramai, seperti, di, lokasi, wisat...",di tempat ramai seperti di lokasi wisata provi...


### Stemming 

In [17]:
text = df['clean1']

In [18]:
factory = StemmerFactory()
stemmer = factory.create_stemmer()

output = [(stemmer.stem(token)) for token in text]

In [19]:
df['clean2'] = output
df

Unnamed: 0,text_tweet,label,tokenize,clean1,clean2
0,<USER_MENTION> #BOIKOT_<PROVIDER_NAME> Gunakan...,positive,"[<, USER_MENTION, >, #, BOIKOT_, <, PROVIDER_N...",usermention boikotprovidername gunakan produk ...,usermention boikotprovidername guna produk ban...
1,"Saktinya balik lagi, alhamdulillah :v <PROVIDE...",positive,"[Saktinya, balik, lagi, ,, alhamdulillah, :, v...",saktinya balik lagi alhamdulillah v providername,sakti balik lagi alhamdulillah v providername
2,Selamat pagi <PROVIDER_NAME> bisa bantu kenap...,negative,"[Selamat, pagi, <, PROVIDER_NAME, >, bisa, ban...",selamat pagi providername bisa bantu kenapa d...,selamat pagi providername bisa bantu kenapa di...
3,Dear <PROVIDER_NAME> akhir2 ini jaringan data ...,negative,"[Dear, <, PROVIDER_NAME, >, akhir2, ini, jarin...",dear providername ini jaringan data lemot ban...,dear providername ini jaring data lot banget p...
4,Selamat malam PENDUSTA <PROVIDER_NAME>,negative,"[Selamat, malam, PENDUSTA, <, PROVIDER_NAME, >]",selamat malam pendusta providername,selamat malam dusta providername
...,...,...,...,...,...
293,"Pantesan lancar ya, sinyal <PROVIDER_NAME> yan...",positive,"[Pantesan, lancar, ya, ,, sinyal, <, PROVIDER_...",pantesan lancar ya sinyal providername yang l...,pantesan lancar ya sinyal providername yang la...
294,Alhamdulillah lancar pakai <PROVIDER_NAME>,positive,"[Alhamdulillah, lancar, pakai, <, PROVIDER_NAM...",alhamdulillah lancar pakai providername,alhamdulillah lancar pakai providername
295,"Untung pakai internet <PROVIDER_NAME>, lancar,...",positive,"[Untung, pakai, internet, <, PROVIDER_NAME, >,...",untung pakai internet providername lancar jadi...,untung pakai internet providername lancar jadi...
296,"di tempat ramai seperti di lokasi wisata, <PRO...",positive,"[di, tempat, ramai, seperti, di, lokasi, wisat...",di tempat ramai seperti di lokasi wisata provi...,di tempat ramai seperti di lokasi wisata provi...


### Stopwords

In [20]:
tambahan = ['telkom', 'providername', 'provider', 'retweet', 'rt', 'rts','wkwkwk']

sw = set().union(stopwords.words('indonesian'), tambahan)

In [21]:
len(sw)

764

In [22]:
sw

{'ada',
 'adalah',
 'adanya',
 'adapun',
 'agak',
 'agaknya',
 'agar',
 'akan',
 'akankah',
 'akhir',
 'akhiri',
 'akhirnya',
 'aku',
 'akulah',
 'amat',
 'amatlah',
 'anda',
 'andalah',
 'antar',
 'antara',
 'antaranya',
 'apa',
 'apaan',
 'apabila',
 'apakah',
 'apalagi',
 'apatah',
 'artinya',
 'asal',
 'asalkan',
 'atas',
 'atau',
 'ataukah',
 'ataupun',
 'awal',
 'awalnya',
 'bagai',
 'bagaikan',
 'bagaimana',
 'bagaimanakah',
 'bagaimanapun',
 'bagi',
 'bagian',
 'bahkan',
 'bahwa',
 'bahwasanya',
 'baik',
 'bakal',
 'bakalan',
 'balik',
 'banyak',
 'bapak',
 'baru',
 'bawah',
 'beberapa',
 'begini',
 'beginian',
 'beginikah',
 'beginilah',
 'begitu',
 'begitukah',
 'begitulah',
 'begitupun',
 'bekerja',
 'belakang',
 'belakangan',
 'belum',
 'belumlah',
 'benar',
 'benarkah',
 'benarlah',
 'berada',
 'berakhir',
 'berakhirlah',
 'berakhirnya',
 'berapa',
 'berapakah',
 'berapalah',
 'berapapun',
 'berarti',
 'berawal',
 'berbagai',
 'berdatangan',
 'beri',
 'berikan',
 'berikut'

In [23]:
df['clean3'] = pd.DataFrame(df['clean2'].apply(lambda x: ' '.join([y for y in x.split() if y not in (sw)])))

In [24]:
df

Unnamed: 0,text_tweet,label,tokenize,clean1,clean2,clean3
0,<USER_MENTION> #BOIKOT_<PROVIDER_NAME> Gunakan...,positive,"[<, USER_MENTION, >, #, BOIKOT_, <, PROVIDER_N...",usermention boikotprovidername gunakan produk ...,usermention boikotprovidername guna produk ban...,usermention boikotprovidername produk bangsa
1,"Saktinya balik lagi, alhamdulillah :v <PROVIDE...",positive,"[Saktinya, balik, lagi, ,, alhamdulillah, :, v...",saktinya balik lagi alhamdulillah v providername,sakti balik lagi alhamdulillah v providername,sakti alhamdulillah v
2,Selamat pagi <PROVIDER_NAME> bisa bantu kenap...,negative,"[Selamat, pagi, <, PROVIDER_NAME, >, bisa, ban...",selamat pagi providername bisa bantu kenapa d...,selamat pagi providername bisa bantu kenapa di...,selamat pagi bantu kamar sinyal hilang panggil...
3,Dear <PROVIDER_NAME> akhir2 ini jaringan data ...,negative,"[Dear, <, PROVIDER_NAME, >, akhir2, ini, jarin...",dear providername ini jaringan data lemot ban...,dear providername ini jaring data lot banget p...,dear jaring data lot banget h
4,Selamat malam PENDUSTA <PROVIDER_NAME>,negative,"[Selamat, malam, PENDUSTA, <, PROVIDER_NAME, >]",selamat malam pendusta providername,selamat malam dusta providername,selamat malam dusta
...,...,...,...,...,...,...
293,"Pantesan lancar ya, sinyal <PROVIDER_NAME> yan...",positive,"[Pantesan, lancar, ya, ,, sinyal, <, PROVIDER_...",pantesan lancar ya sinyal providername yang l...,pantesan lancar ya sinyal providername yang la...,pantesan lancar ya sinyal lancar lombok timur
294,Alhamdulillah lancar pakai <PROVIDER_NAME>,positive,"[Alhamdulillah, lancar, pakai, <, PROVIDER_NAM...",alhamdulillah lancar pakai providername,alhamdulillah lancar pakai providername,alhamdulillah lancar pakai
295,"Untung pakai internet <PROVIDER_NAME>, lancar,...",positive,"[Untung, pakai, internet, <, PROVIDER_NAME, >,...",untung pakai internet providername lancar jadi...,untung pakai internet providername lancar jadi...,untung pakai internet lancar ide
296,"di tempat ramai seperti di lokasi wisata, <PRO...",positive,"[di, tempat, ramai, seperti, di, lokasi, wisat...",di tempat ramai seperti di lokasi wisata provi...,di tempat ramai seperti di lokasi wisata provi...,ramai lokasi wisata jaya


### Data Encoding

In [26]:
df['label_num'] = df['label'].apply(lambda score: 1 if score == 'positive' else 0)

In [27]:
df

Unnamed: 0,text_tweet,label,tokenize,clean1,clean2,clean3,label_num
0,<USER_MENTION> #BOIKOT_<PROVIDER_NAME> Gunakan...,positive,"[<, USER_MENTION, >, #, BOIKOT_, <, PROVIDER_N...",usermention boikotprovidername gunakan produk ...,usermention boikotprovidername guna produk ban...,usermention boikotprovidername produk bangsa,1
1,"Saktinya balik lagi, alhamdulillah :v <PROVIDE...",positive,"[Saktinya, balik, lagi, ,, alhamdulillah, :, v...",saktinya balik lagi alhamdulillah v providername,sakti balik lagi alhamdulillah v providername,sakti alhamdulillah v,1
2,Selamat pagi <PROVIDER_NAME> bisa bantu kenap...,negative,"[Selamat, pagi, <, PROVIDER_NAME, >, bisa, ban...",selamat pagi providername bisa bantu kenapa d...,selamat pagi providername bisa bantu kenapa di...,selamat pagi bantu kamar sinyal hilang panggil...,0
3,Dear <PROVIDER_NAME> akhir2 ini jaringan data ...,negative,"[Dear, <, PROVIDER_NAME, >, akhir2, ini, jarin...",dear providername ini jaringan data lemot ban...,dear providername ini jaring data lot banget p...,dear jaring data lot banget h,0
4,Selamat malam PENDUSTA <PROVIDER_NAME>,negative,"[Selamat, malam, PENDUSTA, <, PROVIDER_NAME, >]",selamat malam pendusta providername,selamat malam dusta providername,selamat malam dusta,0
...,...,...,...,...,...,...,...
293,"Pantesan lancar ya, sinyal <PROVIDER_NAME> yan...",positive,"[Pantesan, lancar, ya, ,, sinyal, <, PROVIDER_...",pantesan lancar ya sinyal providername yang l...,pantesan lancar ya sinyal providername yang la...,pantesan lancar ya sinyal lancar lombok timur,1
294,Alhamdulillah lancar pakai <PROVIDER_NAME>,positive,"[Alhamdulillah, lancar, pakai, <, PROVIDER_NAM...",alhamdulillah lancar pakai providername,alhamdulillah lancar pakai providername,alhamdulillah lancar pakai,1
295,"Untung pakai internet <PROVIDER_NAME>, lancar,...",positive,"[Untung, pakai, internet, <, PROVIDER_NAME, >,...",untung pakai internet providername lancar jadi...,untung pakai internet providername lancar jadi...,untung pakai internet lancar ide,1
296,"di tempat ramai seperti di lokasi wisata, <PRO...",positive,"[di, tempat, ramai, seperti, di, lokasi, wisat...",di tempat ramai seperti di lokasi wisata provi...,di tempat ramai seperti di lokasi wisata provi...,ramai lokasi wisata jaya,1


# Finishing Clean Data to CSV

In [28]:
df.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 298 entries, 0 to 297
Data columns (total 7 columns):
 #   Column      Non-Null Count  Dtype 
---  ------      --------------  ----- 
 0   text_tweet  298 non-null    object
 1   label       298 non-null    object
 2   tokenize    298 non-null    object
 3   clean1      298 non-null    object
 4   clean2      298 non-null    object
 5   clean3      298 non-null    object
 6   label_num   298 non-null    int64 
dtypes: int64(1), object(6)
memory usage: 16.4+ KB


In [29]:
df.isna().sum()

text_tweet    0
label         0
tokenize      0
clean1        0
clean2        0
clean3        0
label_num     0
dtype: int64

In [31]:
df.duplicated(subset='clean3').sum()

4

In [32]:
df = df.drop_duplicates(subset ='clean3').reset_index(drop=True)

In [33]:
df.duplicated(subset='clean3').sum()

0

In [34]:
df = df[['clean3','label_num']]
df

Unnamed: 0,clean3,label_num
0,usermention boikotprovidername produk bangsa,1
1,sakti alhamdulillah v,1
2,selamat pagi bantu kamar sinyal hilang panggil...,0
3,dear jaring data lot banget h,0
4,selamat malam dusta,0
...,...,...
289,pantesan lancar ya sinyal lancar lombok timur,1
290,alhamdulillah lancar pakai,1
291,untung pakai internet lancar ide,1
292,ramai lokasi wisata jaya,1


In [35]:
df.rename({'clean3':'text', 'label_num':'label'}, axis=1, inplace=True)

In [36]:
df

Unnamed: 0,text,label
0,usermention boikotprovidername produk bangsa,1
1,sakti alhamdulillah v,1
2,selamat pagi bantu kamar sinyal hilang panggil...,0
3,dear jaring data lot banget h,0
4,selamat malam dusta,0
...,...,...
289,pantesan lancar ya sinyal lancar lombok timur,1
290,alhamdulillah lancar pakai,1
291,untung pakai internet lancar ide,1
292,ramai lokasi wisata jaya,1


In [38]:
df.to_csv('indihome_clean.csv', index=False)

## Modelling

## Feature Extraction

In [39]:
from sklearn.feature_extraction.text import CountVectorizer

In [40]:
df

Unnamed: 0,text,label
0,usermention boikotprovidername produk bangsa,1
1,sakti alhamdulillah v,1
2,selamat pagi bantu kamar sinyal hilang panggil...,0
3,dear jaring data lot banget h,0
4,selamat malam dusta,0
...,...,...
289,pantesan lancar ya sinyal lancar lombok timur,1
290,alhamdulillah lancar pakai,1
291,untung pakai internet lancar ide,1
292,ramai lokasi wisata jaya,1


In [41]:
df['text'] = df['text'].astype(str)

In [42]:
vec = CountVectorizer().fit(df['text'])
vec_transform = vec.transform(df['text'])

In [43]:
print(vec_transform)

  (0, 43)	1
  (0, 80)	1
  (0, 482)	1
  (0, 652)	1
  (1, 11)	1
  (1, 520)	1
  (2, 47)	1
  (2, 122)	1
  (2, 210)	1
  (2, 267)	1
  (2, 445)	1
  (2, 455)	1
  (2, 533)	1
  (2, 556)	1
  (2, 648)	1
  (3, 42)	1
  (3, 123)	1
  (3, 126)	1
  (3, 248)	1
  (3, 342)	1
  (4, 145)	1
  (4, 356)	1
  (4, 533)	1
  (5, 171)	1
  (5, 450)	1
  :	:
  (288, 547)	1
  (288, 573)	1
  (288, 600)	1
  (289, 322)	2
  (289, 341)	1
  (289, 457)	1
  (289, 556)	1
  (289, 612)	1
  (289, 668)	1
  (290, 11)	1
  (290, 322)	1
  (290, 447)	1
  (291, 222)	1
  (291, 227)	1
  (291, 322)	1
  (291, 447)	1
  (291, 643)	1
  (292, 250)	1
  (292, 340)	1
  (292, 503)	1
  (292, 664)	1
  (293, 15)	1
  (293, 167)	1
  (293, 556)	1
  (293, 570)	1


In [44]:
vec_transform.shape

(294, 678)

#### Train Test Split

In [45]:
from sklearn.model_selection import train_test_split

In [46]:
X = vec_transform.toarray()
y = df['label']

X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2)

### Modelling Machine Learning

In [48]:
from sklearn.naive_bayes import MultinomialNB
from sklearn.tree import DecisionTreeClassifier
from sklearn.ensemble import RandomForestClassifier

#training machine learning
model1 = MultinomialNB().fit(X_train, y_train)
model2 = DecisionTreeClassifier().fit(X_train, y_train)
model3 = RandomForestClassifier().fit(X_train, y_train)

#prediksi label
prediksi_model1 = model1.predict(X_test)
prediksi_model2 = model2.predict(X_test)
prediksi_model3 = model3.predict(X_test)

### Evaluation

In [50]:
print('Akurasi model 1 adalah', model1.score(X_test, y_test))
print('Akurasi model 2 adalah', model2.score(X_test, y_test))
print('Akurasi model 3 adalah', model3.score(X_test, y_test))

Akurasi model 1 adalah 0.864406779661017
Akurasi model 2 adalah 0.711864406779661
Akurasi model 3 adalah 0.7966101694915254


### Prediksi Data Baru

In [51]:
input_text = input('Masukkan opini baru:')
vec_input_text = vec.transform([input_text])

print(model1.predict(vec_input_text.toarray()))
print(model2.predict(vec_input_text.toarray()))
print(model3.predict(vec_input_text.toarray()))

Masukkan opini baru:bagus banget deh internetnya lambat
[0]
[0]
[1]


In [52]:
df_predict = pd.read_csv('indihome_clean.csv')

In [53]:
df_predict

Unnamed: 0,text,label
0,usermention boikotprovidername produk bangsa,1
1,sakti alhamdulillah v,1
2,selamat pagi bantu kamar sinyal hilang panggil...,0
3,dear jaring data lot banget h,0
4,selamat malam dusta,0
...,...,...
289,pantesan lancar ya sinyal lancar lombok timur,1
290,alhamdulillah lancar pakai,1
291,untung pakai internet lancar ide,1
292,ramai lokasi wisata jaya,1


In [54]:
df_predict['text'] = df_predict['text'].astype(str)

In [56]:
vec_predict = vec.transform(df_predict['text'])

In [57]:
hasil_prediksi = model1.predict(vec_predict.toarray())
hasil_prediksi

array([0, 1, 1, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0,
       0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0,
       1, 0, 1, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 1, 1, 0, 0, 0, 0, 0, 0,
       1, 0, 0, 1, 0, 0, 0, 0, 1, 0, 1, 1, 0, 1, 0, 1, 0, 1, 1, 0, 0, 1,
       0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 0, 1, 1, 0, 0, 0, 0, 0, 0, 1, 1,
       1, 1, 1, 1, 1, 1, 1, 1, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
       1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0,
       0, 0, 0, 1, 0, 0, 1, 1, 1, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 1, 1,
       0, 0, 1, 1, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 0, 1,
       1, 1, 1, 1, 0, 1, 1, 1, 1, 1, 0, 0, 1, 0, 0, 0, 1, 0, 1, 0, 1, 0,
       1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0,
       0, 0, 1, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
       0, 1, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1,
       1, 1, 1, 1, 1, 1, 1, 0], dtype=int64)

In [58]:
df_predict['hasil_prediksi'] = hasil_prediksi.tolist()
df_predict

Unnamed: 0,text,label,hasil_prediksi
0,usermention boikotprovidername produk bangsa,1,0
1,sakti alhamdulillah v,1,1
2,selamat pagi bantu kamar sinyal hilang panggil...,0,1
3,dear jaring data lot banget h,0,0
4,selamat malam dusta,0,0
...,...,...,...
289,pantesan lancar ya sinyal lancar lombok timur,1,1
290,alhamdulillah lancar pakai,1,1
291,untung pakai internet lancar ide,1,1
292,ramai lokasi wisata jaya,1,1
