**MUHAMMAD FAHMI**

**FRESH GRADUATE ACADEMY**

**MACHINE LEARNING - IBM**

**NO. REGISTRASI : 0182180131-64**

**MEMBUAT MODEL NLP DENGAN TENSORFLOW**

# **Instal dan import**

In [1]:
!pip install tweepy



In [2]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import re

from keras.preprocessing.text import Tokenizer
from keras.preprocessing.sequence import pad_sequences
from keras.models import Sequential
from keras.layers import Dense, Embedding, LSTM, SimpleRNN, SpatialDropout1D

from sklearn.model_selection import train_test_split
from sklearn import metrics

import os
for dirname, _, filenames in os.walk('Twitter_Data'):
    for filename in filenames:
        print(os.path.join(dirname, filename))

# **Menyiapkan Data**

In [4]:
df = pd.read_csv('Twitter_Data.csv')
df['category'] = df['category'].map({-1.0:'Negative', 0.0:'Neutral', 1.0:'Positive'})
df.head()

Unnamed: 0,clean_text,category
0,when modi promised “minimum government maximum...,Negative
1,talk all the nonsense and continue all the dra...,Neutral
2,what did just say vote for modi welcome bjp t...,Positive
3,asking his supporters prefix chowkidar their n...,Positive
4,answer who among these the most powerful world...,Positive


In [5]:
df.shape

(162980, 2)

**Mengombine data dan memberikan label**

In [9]:
num_of_rows = 5000
shuffled = df.reindex(np.random.permutation(df.index))
nt = shuffled[shuffled['category'] == 'Neutral'][:num_of_rows]
ng = shuffled[shuffled['category'] == 'Negative'][:num_of_rows]
ps = shuffled[shuffled['category'] == 'Positive'][:num_of_rows]
combine_data = pd.concat([nt, ng, ps], ignore_index=True)
combine_data = combine_data.reindex(np.random.permutation(combine_data.index))
combine_data['label'] = 0
combine_data.head()

Unnamed: 0,clean_text,category,label
8505,modi hai tow mumkin hai very bad news for cong...,Negative,0
13772,our most respected hnrble prime minister sir d...,Positive,0
5598,this propaganda magazine its editor sold out m...,Negative,0
3147,actually waiting for remark from that modi sir...,Neutral,0
8905,seems there one intellectual raghuram rajan yo...,Negative,0


In [10]:
print(len(combine_data[combine_data['category'] == 'Neutral']))
print(len(combine_data[combine_data['category'] == 'Negative']))
print(len(combine_data[combine_data['category'] == 'Positive']))

5000
5000
5000


In [11]:
df = combine_data.copy()

In [12]:
print(len(df[df['category'] == 'Neutral']))
print(len(df[df['category'] == 'Negative']))
print(len(df[df['category'] == 'Positive']))

5000
5000
5000


In [14]:
df.loc[df['category'] == 'Neutral', 'label'] = 0
df.loc[df['category'] == 'Negative', 'label'] = 1
df.loc[df['category'] == 'Positive', 'label'] = 2

In [15]:
df.head(10)

Unnamed: 0,clean_text,category,label
8505,modi hai tow mumkin hai very bad news for cong...,Negative,1
13772,our most respected hnrble prime minister sir d...,Positive,2
5598,this propaganda magazine its editor sold out m...,Negative,1
3147,actually waiting for remark from that modi sir...,Neutral,0
8905,seems there one intellectual raghuram rajan yo...,Negative,1
5282,entire effort this chowkidar keep enriching hi...,Negative,1
13682,modi will wait your important news,Positive,2
3503,this what will happen modi future,Neutral,0
6686,tell your naive anchor watch modis day before ...,Negative,1
13826,thats not true many are big supporters bjp and...,Positive,2


In [19]:
from tensorflow.keras.utils import to_categorical

In [20]:
labels = to_categorical(df['label'], num_classes=3)
labels.shape

(15000, 3)

In [21]:
labels[:10]

array([[0., 1., 0.],
       [0., 0., 1.],
       [0., 1., 0.],
       [1., 0., 0.],
       [0., 1., 0.],
       [0., 1., 0.],
       [0., 0., 1.],
       [1., 0., 0.],
       [0., 1., 0.],
       [0., 0., 1.]], dtype=float32)

# Tokenizer

In [22]:
max_features = 4000
max_len = 150
token = Tokenizer(num_words=max_features)
token.fit_on_texts(df['clean_text'].values)
sequences = token.texts_to_sequences(df['clean_text'].values)
X = pad_sequences(sequences, maxlen=max_len)

In [23]:
word_index = token.word_index
print('Ditemukan %s token unik.' % len(word_index))

Ditemukan 27168 token unik.


# **Membuat model NLP**

In [24]:
y = labels

In [25]:
X_train, X_test, y_train, y_test = train_test_split(X,y, test_size=0.2, random_state=42)

In [26]:
embed_dim = 128
lstm_out = 98

In [39]:
import tensorflow as tf
model = tf.keras.Sequential([
                             tf.keras.layers.Embedding(max_features, embed_dim, input_length = X.shape[1]),
                             tf.keras.layers.SpatialDropout1D(0.7),
                             tf.keras.layers.LSTM(64, dropout=0.7, recurrent_dropout=0.7),
                             tf.keras.layers.Dense(3, activation='softmax')])
model.compile(loss='categorical_crossentropy', optimizer='adam', metrics=['accuracy'])

In [40]:
from keras.callbacks import EarlyStopping

In [41]:
batch_size = 50
history = model.fit(X_train, y_train, epochs=20, batch_size=batch_size,validation_split=0.2,callbacks=[EarlyStopping(monitor='val_loss',patience=7, min_delta=0.0001)])

Epoch 1/20
Epoch 2/20
Epoch 3/20
Epoch 4/20
Epoch 5/20
Epoch 6/20
Epoch 7/20
Epoch 8/20
Epoch 9/20
Epoch 10/20
Epoch 11/20
Epoch 12/20
Epoch 13/20
Epoch 14/20
Epoch 15/20
Epoch 16/20
Epoch 17/20
Epoch 18/20
Epoch 19/20


In [42]:
loss, accuracy = model.evaluate(X_test, y_test)
print("loss", loss)
print("accuracy", accuracy)

loss 0.4061479866504669
accuracy 0.8843333125114441


# **Pengetesan Akurasi Model**

In [43]:
text = ['they are just sitting and waiting for Modi to come accross them']
text = token.texts_to_sequences(text)
text = pad_sequences(text, maxlen=max_len)
res = model.predict(text)
res

array([[0.99684393, 0.00120316, 0.00195294]], dtype=float32)

In [45]:
if np.argmax(res) == 0:
  print("Komen Netral")
elif np.argmax(res) == 1:
  print("Komen Negative")
elif np.argmax(res) == 2:
  print("Komen Positive")

Komen Netral


# **Sekian dan Terimakasih**