In [72]:
import pandas as pd
import string
import re
import nltk
import tensorflow as tf

from nltk.tokenize import word_tokenize
from nltk.corpus import stopwords
nltk.download('stopwords')
nltk.download('punkt')

from sklearn.model_selection import train_test_split
from tensorflow.keras.preprocessing.text import Tokenizer
from tensorflow.keras.preprocessing.sequence import pad_sequences

[nltk_data] Downloading package stopwords to /root/nltk_data...
[nltk_data]   Package stopwords is already up-to-date!
[nltk_data] Downloading package punkt to /root/nltk_data...
[nltk_data]   Package punkt is already up-to-date!


In [73]:
df = pd.read_csv("/content/Corona_NLP.csv")
df.head()

Unnamed: 0,UserName,ScreenName,Location,TweetAt,OriginalTweet,Sentiment
0,1,44953,NYC,02-03-2020,TRENDING: New Yorkers encounter empty supermar...,Extremely Negative
1,2,44954,"Seattle, WA",02-03-2020,When I couldn't find hand sanitizer at Fred Me...,Positive
2,3,44955,,02-03-2020,Find out how you can protect yourself and love...,Extremely Positive
3,4,44956,Chicagoland,02-03-2020,#Panic buying hits #NewYork City as anxious sh...,Negative
4,5,44957,"Melbourne, Victoria",03-03-2020,#toiletpaper #dunnypaper #coronavirus #coronav...,Neutral


In [74]:
df = df.drop(columns=['UserName', 'ScreenName', 'Location', 'TweetAt',])
df

Unnamed: 0,OriginalTweet,Sentiment
0,TRENDING: New Yorkers encounter empty supermar...,Extremely Negative
1,When I couldn't find hand sanitizer at Fred Me...,Positive
2,Find out how you can protect yourself and love...,Extremely Positive
3,#Panic buying hits #NewYork City as anxious sh...,Negative
4,#toiletpaper #dunnypaper #coronavirus #coronav...,Neutral
...,...,...
3793,Meanwhile In A Supermarket in Israel -- People...,Positive
3794,Did you panic buy a lot of non-perishable item...,Negative
3795,Asst Prof of Economics @cconces was on @NBCPhi...,Neutral
3796,Gov need to do somethings instead of biar je r...,Extremely Negative


In [75]:
sentiment = pd.get_dummies(df.Sentiment)
df_tweets = pd.concat([df, sentiment], axis=1)
df_tweets = df_tweets.drop(columns='Sentiment')
df_tweets

Unnamed: 0,OriginalTweet,Extremely Negative,Extremely Positive,Negative,Neutral,Positive
0,TRENDING: New Yorkers encounter empty supermar...,1,0,0,0,0
1,When I couldn't find hand sanitizer at Fred Me...,0,0,0,0,1
2,Find out how you can protect yourself and love...,0,1,0,0,0
3,#Panic buying hits #NewYork City as anxious sh...,0,0,1,0,0
4,#toiletpaper #dunnypaper #coronavirus #coronav...,0,0,0,1,0
...,...,...,...,...,...,...
3793,Meanwhile In A Supermarket in Israel -- People...,0,0,0,0,1
3794,Did you panic buy a lot of non-perishable item...,0,0,1,0,0
3795,Asst Prof of Economics @cconces was on @NBCPhi...,0,0,0,1,0
3796,Gov need to do somethings instead of biar je r...,1,0,0,0,0


In [76]:
stopwords = stopwords.words('english')

def tokenize(tweet):
  tweet = word_tokenize(tweet)
  tweet = [item for item in tweet if not item in stopwords]
  tweet = [word.lower() for word in tweet if word.isalnum()]
  return tweet

df_tweets['OriginalTweet'] = df_tweets['OriginalTweet'].apply(tokenize)
df_tweets['OriginalTweet'].head()

0    [trending, new, yorkers, encounter, empty, sup...
1    [when, i, could, find, hand, sanitizer, fred, ...
2            [find, protect, loved, ones, coronavirus]
3    [panic, buying, hits, newyork, city, anxious, ...
4    [toiletpaper, dunnypaper, coronavirus, coronav...
Name: OriginalTweet, dtype: object

In [77]:
tweets = df_tweets['OriginalTweet'].values
label = df_tweets[['Extremely Negative', 'Extremely Positive', 'Negative', 'Neutral', 'Positive']].values


In [78]:
tweet_train, tweet_test, label_train, label_test = train_test_split(tweets, label, test_size=0.2)

In [79]:
tokenizer = Tokenizer(num_words=5000, oov_token='x')
tokenizer.fit_on_texts(tweet_train)

sequence_train = tokenizer.texts_to_sequences(tweet_train)
sequence_test = tokenizer.texts_to_sequences(tweet_test)

padded_train = pad_sequences(sequence_train,
                             padding='post',
                             maxlen=15,
                             truncating='post')
padded_test = pad_sequences(sequence_test,
                             padding='post',
                             maxlen=15,
                             truncating='post')

In [80]:
model = tf.keras.Sequential([
    tf.keras.layers.Embedding(input_dim=5000, output_dim=16),
    tf.keras.layers.LSTM(64),
    tf.keras.layers.Dense(128, activation='relu'),
    tf.keras.layers.Dense(64, activation='relu'),
    tf.keras.layers.Dense(5, activation='softmax')
])

model.compile(loss='categorical_crossentropy', optimizer='adam', metrics=['accuracy'])

## **Callbacks**

In [81]:
class myCallback(tf.keras.callbacks.Callback):
  def on_epoch_end(self, epoch, logs={}):
    if(logs.get('accuracy')>0.9):
      print("\nAkurasi telah mencapai", logs.get('accuracy'))
      self.model.stop_training = True
callbacks = myCallback()

##**Training**

In [82]:
num_epochs = 30
history = model.fit(padded_train, label_train, epochs=num_epochs,
                    validation_data=(padded_test, label_test),
                    verbose=2, callbacks=[callbacks])

Epoch 1/30
95/95 - 4s - loss: 1.5492 - accuracy: 0.2785 - val_loss: 1.5060 - val_accuracy: 0.3039 - 4s/epoch - 44ms/step
Epoch 2/30
95/95 - 1s - loss: 1.4158 - accuracy: 0.3697 - val_loss: 1.4901 - val_accuracy: 0.3382 - 1s/epoch - 12ms/step
Epoch 3/30
95/95 - 1s - loss: 1.2204 - accuracy: 0.4618 - val_loss: 1.5645 - val_accuracy: 0.3579 - 1s/epoch - 13ms/step
Epoch 4/30
95/95 - 1s - loss: 1.0093 - accuracy: 0.5767 - val_loss: 1.6502 - val_accuracy: 0.3632 - 1s/epoch - 13ms/step
Epoch 5/30
95/95 - 1s - loss: 0.8063 - accuracy: 0.6666 - val_loss: 1.9135 - val_accuracy: 0.3382 - 1s/epoch - 13ms/step
Epoch 6/30
95/95 - 1s - loss: 0.6492 - accuracy: 0.7258 - val_loss: 2.4043 - val_accuracy: 0.3421 - 1s/epoch - 13ms/step
Epoch 7/30
95/95 - 1s - loss: 0.5370 - accuracy: 0.7735 - val_loss: 2.9423 - val_accuracy: 0.3395 - 1s/epoch - 13ms/step
Epoch 8/30
95/95 - 1s - loss: 0.4646 - accuracy: 0.8114 - val_loss: 3.3509 - val_accuracy: 0.3171 - 1s/epoch - 13ms/step
Epoch 9/30
95/95 - 1s - loss: 0.