# **Sentiment Classification with Convolutional Neural Networks**

### **[Part 1] Google Drive Mount**

In [1]:
from google.colab import drive
drive.mount('/content/drive')

Go to this URL in a browser: https://accounts.google.com/o/oauth2/auth?client_id=947318989803-6bn6qk8qdgf4n4g3pfee6491hc0brc4i.apps.googleusercontent.com&redirect_uri=urn%3aietf%3awg%3aoauth%3a2.0%3aoob&response_type=code&scope=email%20https%3a%2f%2fwww.googleapis.com%2fauth%2fdocs.test%20https%3a%2f%2fwww.googleapis.com%2fauth%2fdrive%20https%3a%2f%2fwww.googleapis.com%2fauth%2fdrive.photos.readonly%20https%3a%2f%2fwww.googleapis.com%2fauth%2fpeopleapi.readonly

Enter your authorization code:
··········
Mounted at /content/drive




---



### **[Part 2] Import Library**

In [2]:
from google.colab import files
from keras.models import load_model
from keras.preprocessing import sequence
from keras.utils import to_categorical
from sklearn.metrics import accuracy_score, confusion_matrix, classification_report

import pickle
import pandas as pd
import numpy as np

from bokeh.plotting import figure, show
from bokeh.io import output_notebook
from bokeh.models.tools import HoverTool
from bokeh.models import ColumnDataSource
from bokeh.transform import dodge
from bokeh.models import Panel, Tabs
%matplotlib inline

Using TensorFlow backend.


### **[Part 3] Load Dataset and Model**

In [0]:
model = load_model('/content/drive/My Drive/Tugas_Akhir/model/model.h5')

In [0]:
with open('/content/drive/My Drive/Tugas_Akhir/model/tokenizer.pickle', 'rb') as handle:
    tokenizer = pickle.load(handle)

In [0]:
test  = pd.read_excel('/content/drive/My Drive/Tugas_Akhir/dataset/test_label.xlsx', index_col=0)

In [6]:
test.head()

Unnamed: 0,username,text,label
0,pelamar_1,kali masyarakat nusantara dampak wabah sakit l...,-1
1,pelamar_1,mas th bangsa indonesia hny pintu gerbang merd...,0
2,pelamar_1,nemu orang paruh baya alami ganggu mental tida...,1
3,pelamar_1,bosan kyk,-1
4,pelamar_1,program stimulus tidak implementasi khawatir m...,-1


### **[Part 4] Convert Test Data into Pad_Sequences**

In [7]:
raw_docs_test  = test['text'].tolist()

word_seq_test  = tokenizer.texts_to_sequences(raw_docs_test)

word_index = tokenizer.word_index

print('Dictionary Size: ', len(word_index))

word_seq_test  = sequence.pad_sequences(word_seq_test, maxlen = 41)

Dictionary Size:  7495


### **[Part 5] Convert Label into Categorical Data**

In [0]:
target = []

for i in test['label']:
  if i == -1:
    target.append(0)
  elif i == 0:
    target.append(1)
  elif i == 1:
    target.append(2)

test['new_label'] = target

test_labels = to_categorical(test['new_label'])

In [0]:
y_test = test['new_label']

### **[Part 6] Predict Test Data**

In [0]:
y_pred = model.predict(word_seq_test).round()

In [0]:
pred_df = pd.DataFrame(data=y_pred)

In [0]:
y_pred = pred_df.values.argmax(1)

In [0]:
predict            = pd.DataFrame(columns=['username'] + ['text'] + ['label'] + ['predict'])
predict['username']= test['username']
predict['text']    = test['text']
predict['label']   = y_test
predict['predict'] = y_pred

### **[Part 7] Confusion Matrix**

In [14]:
cm = confusion_matrix(y_test, y_pred)

print(cm)
print('Accuracy ' +str(accuracy_score(y_test, y_pred)))

[[190  13  21]
 [ 23 208   9]
 [ 26  37 155]]
Accuracy 0.8108504398826979


In [15]:
print(classification_report(y_test, y_pred))

              precision    recall  f1-score   support

           0       0.79      0.85      0.82       224
           1       0.81      0.87      0.84       240
           2       0.84      0.71      0.77       218

    accuracy                           0.81       682
   macro avg       0.81      0.81      0.81       682
weighted avg       0.81      0.81      0.81       682



### **[Part 8] Uploaded Predict Files to Google Drive**

In [0]:
# predict.to_excel('predict.xlsx')

In [0]:
# !cp predict.xlsx '/content/drive/My Drive/Tugas_Akhir/dataset/'

# print("Uploaded Predict Files to Google Drive")

Uploaded Predict Files to Google Drive


In [16]:
predict.head()

Unnamed: 0,username,text,label,predict
0,pelamar_1,kali masyarakat nusantara dampak wabah sakit l...,0,0
1,pelamar_1,mas th bangsa indonesia hny pintu gerbang merd...,1,1
2,pelamar_1,nemu orang paruh baya alami ganggu mental tida...,2,2
3,pelamar_1,bosan kyk,0,0
4,pelamar_1,program stimulus tidak implementasi khawatir m...,0,0


### **[Part 9] Preparing Predict Files for Visualization**

In [0]:
result = predict[['username', 'text', 'predict']]

In [18]:
result.head()

Unnamed: 0,username,text,predict
0,pelamar_1,kali masyarakat nusantara dampak wabah sakit l...,0
1,pelamar_1,mas th bangsa indonesia hny pintu gerbang merd...,1
2,pelamar_1,nemu orang paruh baya alami ganggu mental tida...,2
3,pelamar_1,bosan kyk,0
4,pelamar_1,program stimulus tidak implementasi khawatir m...,0


### **[Part 10] Convert Predict Result to Each Sentiment**

In [0]:
pos = []
neg = []
neu = []

for i in result['predict']:
  if i == 0:
    pos.append(0)
    neu.append(0)
    neg.append(1)
  elif i == 1:
    pos.append(0)
    neu.append(1)
    neg.append(0)
  elif i == 2:
    pos.append(1)
    neu.append(0)
    neg.append(0)

In [0]:
result['pos']= pos
result['neu']= neu
result['neg']= neg

In [21]:
result.head()

Unnamed: 0,username,text,predict,pos,neu,neg
0,pelamar_1,kali masyarakat nusantara dampak wabah sakit l...,0,0,0,1
1,pelamar_1,mas th bangsa indonesia hny pintu gerbang merd...,1,0,1,0
2,pelamar_1,nemu orang paruh baya alami ganggu mental tida...,2,1,0,0
3,pelamar_1,bosan kyk,0,0,0,1
4,pelamar_1,program stimulus tidak implementasi khawatir m...,0,0,0,1


### **[Part 11] Grouping Data by Username**

In [0]:
grp = result.groupby(['username']).agg(np.sum)

In [0]:
grp = grp[['pos', 'neu', 'neg']]

In [0]:
grp = grp.reset_index()

In [25]:
grp.head()

Unnamed: 0,username,pos,neu,neg
0,pelamar_1,10,13,11
1,pelamar_10,12,11,11
2,pelamar_11,9,11,16
3,pelamar_12,10,14,8
4,pelamar_13,9,11,14


### **[Part 12] Setup Styles Function**

In [0]:
def set_style(p):
  p.xaxis.major_label_text_font_size = '7pt'
  p.yaxis.major_label_text_font_size = '8pt'
  p.x_range.range_padding = 0.01
  p.xgrid.grid_line_color = None
  p.legend.location = 'top_left'
  p.legend.orientation = 'horizontal'
  p.toolbar.logo = None
  p.toolbar_location = None

### **[Part 13] Show Visualization**

In [27]:
output_notebook()

data = {'username'    : grp['username'].tolist(),
        'pos'         : grp['pos'].tolist(),
        'neu'         : grp['neu'].tolist(),
        'neg'         : grp['neg'].tolist()}

source = ColumnDataSource(data=data)

p = figure(x_range=grp['username'].tolist(), y_range=(0, 20), plot_height=350, plot_width=1200,
           title='Klasifikasi Sentimen tiap Calon Karyawan', toolbar_location="left")

p.vbar(x=dodge('username', -0.25, range=p.x_range), top='pos', width=0.2, source=source,
       color="forestgreen", legend_label="pos")

p.vbar(x=dodge('username',  0.0,  range=p.x_range), top='neu', width=0.2, source=source,
       color="orange", legend_label="neu")

p.vbar(x=dodge('username',  0.25, range=p.x_range), top='neg', width=0.2, source=source,
       color="orangered", legend_label="neg")

set_style(p)

hover = HoverTool()
hover.tooltips = [
    ("Username ", "@username"),
    ("Sentimen Positif", "@pos Tweet"),
    ("Sentimen Netral", "@neu Tweet"),
    ("Sentimen Negatif", "@neg Tweet")]

hover.mode = 'vline'

p.add_tools(hover)

show(p)