# 1 - Introduction

Name : Gilang Wiradhyaksa

Batch : SBY - 001

Data : [Twitter Sentiment Analysis](https://www.kaggle.com/datasets/jp797498e/twitter-entity-sentiment-analysis/data)

# 2 - Import Libraries

In [50]:
import re
import nltk
import string
import numpy as np
import pandas as pd
import pickle

from nltk.tokenize import word_tokenize
nltk.download('stopwords')
nltk.download('punkt')

import warnings
warnings.filterwarnings('ignore')
pd.set_option('display.max_columns', None)

[nltk_data] Downloading package stopwords to
[nltk_data]     C:\Users\GilangW\AppData\Roaming\nltk_data...
[nltk_data]   Package stopwords is already up-to-date!
[nltk_data] Downloading package punkt to
[nltk_data]     C:\Users\GilangW\AppData\Roaming\nltk_data...
[nltk_data]   Package punkt is already up-to-date!


# 3 - Data & File Loading

In [37]:
df_val = pd.read_csv('twitter_validation.csv')
df_val.columns = ['id','entity','sentiment','tweet_content']
df_val.head()

Unnamed: 0,id,entity,sentiment,tweet_content
0,352,Amazon,Neutral,BBC News - Amazon boss Jeff Bezos rejects clai...
1,8312,Microsoft,Negative,@Microsoft Why do I pay for WORD when it funct...
2,4371,CS-GO,Negative,"CSGO matchmaking is so full of closet hacking,..."
3,4433,Google,Neutral,Now the President is slapping Americans in the...
4,6273,FIFA,Negative,Hi @EAHelp I’ve had Madeleine McCann in my cel...


In [38]:
#MODEL
with open('model_lstm_1.pkl', 'rb') as file_1:
    model_lstm_1_pkl = pickle.load(file_1)

#SCALER
# with open('text_preprocessing.pkl', 'rb') as file_2:
#     text_preprocessing_pkl = pickle.load(file_2)

In [39]:
# INDONESIAN STOPWORDS
# stopwords_nltk = nltk.corpus.stopwords
from nltk.corpus import stopwords
stpwds_id = list(set(stopwords.words('indonesian')))
stpwds_id.append('oh')
print(stpwds_id[:5])

# Define Stemming
from Sastrawi.Stemmer.StemmerFactory import StemmerFactory
stemmer_id = StemmerFactory().create_stemmer()

['sesuatu', 'beri', 'jadinya', 'tampaknya', 'sendiri']


In [40]:
# ENGLISH STOPWORDS
# stopwords_nltk = nltk.corpus.stopwords
stop_words = stopwords.words('english')
print(stop_words[:5])

# Define Stemming
from nltk.stem import PorterStemmer
stemmer = PorterStemmer()

['i', 'me', 'my', 'myself', 'we']


In [41]:
# Create A Function for Text Preprocessing
def text_preprocessing(text):
  # # Case folding
  text = text.lower()

  # Mention removal
  text = re.sub("@[A-Za-z0-9_]+", " ", text)

  # Hashtags removal
  text = re.sub("#[A-Za-z0-9_]+", " ", text)

  # Newline removal (\n)
  text = re.sub(r"\\n", " ",text)

  # Whitespace removal
  text = text.strip()

  # URL removal
  text = re.sub(r"http\S+", " ", text)
  text = re.sub(r"www.\S+", " ", text)

  # Non-letter removal (such as emoticon, symbol (like μ, $, 兀), etc
  text = re.sub("[^A-Za-z\s']", " ", text)

  # Tokenization
  tokens = word_tokenize(text)

  # Stopwords removal
  tokens = [word for word in tokens if word not in stop_words]

  # Stemming
  tokens = [stemmer.stem(word) for word in tokens]

  # Combining Tokens
  text = ' '.join(tokens)

  return text

In [42]:
df_val['text_processed'] = df_val['tweet_content'].apply(lambda x: text_preprocessing(x))
df_val

Unnamed: 0,id,entity,sentiment,tweet_content,text_processed
0,352,Amazon,Neutral,BBC News - Amazon boss Jeff Bezos rejects clai...,bbc news amazon boss jeff bezo reject claim co...
1,8312,Microsoft,Negative,@Microsoft Why do I pay for WORD when it funct...,pay word function poorli chromebook
2,4371,CS-GO,Negative,"CSGO matchmaking is so full of closet hacking,...",csgo matchmak full closet hack 's truli aw game
3,4433,Google,Neutral,Now the President is slapping Americans in the...,presid slap american face realli commit unlaw ...
4,6273,FIFA,Negative,Hi @EAHelp I’ve had Madeleine McCann in my cel...,hi madelein mccann cellar past year littl snea...
...,...,...,...,...,...
994,4891,GrandTheftAuto(GTA),Irrelevant,⭐️ Toronto is the arts and culture capital of ...,toronto art cultur capit canada wonder want st...
995,4359,CS-GO,Irrelevant,tHIS IS ACTUALLY A GOOD MOVE TOT BRING MORE VI...,actual good move tot bring viewer one peopl go...
996,2652,Borderlands,Positive,Today sucked so it’s time to drink wine n play...,today suck time drink wine n play borderland s...
997,8069,Microsoft,Positive,Bought a fraction of Microsoft today. Small wins.,bought fraction microsoft today small win


In [43]:
# Change Target into Number
df_val['label'] = df_val['sentiment'].replace({'Negative' : 0, 'Neutral' : 1, 'Positive' : 2, 'Irrelevant' : 3})
df_val

Unnamed: 0,id,entity,sentiment,tweet_content,text_processed,label
0,352,Amazon,Neutral,BBC News - Amazon boss Jeff Bezos rejects clai...,bbc news amazon boss jeff bezo reject claim co...,1
1,8312,Microsoft,Negative,@Microsoft Why do I pay for WORD when it funct...,pay word function poorli chromebook,0
2,4371,CS-GO,Negative,"CSGO matchmaking is so full of closet hacking,...",csgo matchmak full closet hack 's truli aw game,0
3,4433,Google,Neutral,Now the President is slapping Americans in the...,presid slap american face realli commit unlaw ...,1
4,6273,FIFA,Negative,Hi @EAHelp I’ve had Madeleine McCann in my cel...,hi madelein mccann cellar past year littl snea...,0
...,...,...,...,...,...,...
994,4891,GrandTheftAuto(GTA),Irrelevant,⭐️ Toronto is the arts and culture capital of ...,toronto art cultur capit canada wonder want st...,3
995,4359,CS-GO,Irrelevant,tHIS IS ACTUALLY A GOOD MOVE TOT BRING MORE VI...,actual good move tot bring viewer one peopl go...,3
996,2652,Borderlands,Positive,Today sucked so it’s time to drink wine n play...,today suck time drink wine n play borderland s...,2
997,8069,Microsoft,Positive,Bought a fraction of Microsoft today. Small wins.,bought fraction microsoft today small win,2


In [44]:
df_val_final = df_val[['text_processed']]
df_val_final

Unnamed: 0,text_processed
0,bbc news amazon boss jeff bezo reject claim co...
1,pay word function poorli chromebook
2,csgo matchmak full closet hack 's truli aw game
3,presid slap american face realli commit unlaw ...
4,hi madelein mccann cellar past year littl snea...
...,...
994,toronto art cultur capit canada wonder want st...
995,actual good move tot bring viewer one peopl go...
996,today suck time drink wine n play borderland s...
997,bought fraction microsoft today small win


# 4 - Inferencing

In [45]:
df_val_predict = model_lstm_1_pkl.predict(df_val_final)



In [46]:
df_val_predict

array([[2.8386005e-06, 9.9999547e-01, 1.0658904e-06, 6.0354296e-07],
       [9.9999809e-01, 1.1799059e-06, 4.2796162e-07, 2.1457087e-07],
       [9.9999714e-01, 1.7609910e-06, 6.9445298e-07, 4.4194726e-07],
       ...,
       [2.6272498e-06, 7.6300985e-06, 9.9998593e-01, 3.8232679e-06],
       [1.5009049e-07, 2.5466848e-06, 9.9999523e-01, 2.1348085e-06],
       [2.9605599e-06, 9.9999416e-01, 1.8558114e-06, 1.0603138e-06]],
      dtype=float32)

# 5 - Result

In [47]:
predicted_indices = np.argmax(df_val_predict, axis=1)

In [52]:
df_val['prediction_result'] = predicted_indices
df_val

Unnamed: 0,id,entity,sentiment,tweet_content,text_processed,label,prediction_result
0,352,Amazon,Neutral,BBC News - Amazon boss Jeff Bezos rejects clai...,bbc news amazon boss jeff bezo reject claim co...,1,1
1,8312,Microsoft,Negative,@Microsoft Why do I pay for WORD when it funct...,pay word function poorli chromebook,0,0
2,4371,CS-GO,Negative,"CSGO matchmaking is so full of closet hacking,...",csgo matchmak full closet hack 's truli aw game,0,0
3,4433,Google,Neutral,Now the President is slapping Americans in the...,presid slap american face realli commit unlaw ...,1,1
4,6273,FIFA,Negative,Hi @EAHelp I’ve had Madeleine McCann in my cel...,hi madelein mccann cellar past year littl snea...,0,0
...,...,...,...,...,...,...,...
994,4891,GrandTheftAuto(GTA),Irrelevant,⭐️ Toronto is the arts and culture capital of ...,toronto art cultur capit canada wonder want st...,3,3
995,4359,CS-GO,Irrelevant,tHIS IS ACTUALLY A GOOD MOVE TOT BRING MORE VI...,actual good move tot bring viewer one peopl go...,3,3
996,2652,Borderlands,Positive,Today sucked so it’s time to drink wine n play...,today suck time drink wine n play borderland s...,2,2
997,8069,Microsoft,Positive,Bought a fraction of Microsoft today. Small wins.,bought fraction microsoft today small win,2,2


In [60]:
df_result = df_val[df_val['label'] != df_val['prediction_result']]
df_result.head()

Unnamed: 0,id,entity,sentiment,tweet_content,text_processed,label,prediction_result
35,10589,RedDeadRedemption(RDR),Neutral,I got the horses in the back #PS4live (Red Dea...,got hors back red dead redempt live youtu bvkh...,1,2
58,11277,TomClancysRainbowSix,Irrelevant,Ok I'm blocking this man's he is on a new leve...,ok 'm block man 's new level,3,2
67,1908,CallOfDutyBlackopsColdWar,Neutral,Seems like #Playstation has the marketing deal...,seem like market deal feel good treat well pla...,1,0
72,8367,Microsoft,Positive,@satyanadella @Microsoft thanks for celebratin...,thank celebr need posit energi day,2,1
97,3526,Facebook,Neutral,Our #HISAPerth #OBIawards ceremony is taking p...,ceremoni take place friday may celebr outstand...,1,2


In [66]:
df_val_len = len(df_val)
df_result_len = len(df_result)
precentage_correct_prediction = 100 - ((df_result_len / df_val_len) * 100)
print(f"Percentage of Correct Prediction is : {precentage_correct_prediction:.2f}%")

Percentage of Correct Prediction is : 94.29%
