# **Import Libraries**

In [1]:
# Library Load Model
import pandas as pd
import numpy as np
import tensorflow as tf

import re

# Library Pre-Processing
from nltk.stem import WordNetLemmatizer
import nltk
from nltk.corpus import stopwords
from nltk.tokenize import word_tokenize

#download library
nltk.download('punkt') #punctuation package
nltk.download('stopwords') #stopwords package
nltk.download('wordnet')
nltk.download('punkt_tab')

#warnings
import warnings
warnings.filterwarnings('ignore')

[nltk_data] Downloading package punkt to
[nltk_data]     /Users/danisarahadians/nltk_data...
[nltk_data]   Package punkt is already up-to-date!
[nltk_data] Downloading package stopwords to
[nltk_data]     /Users/danisarahadians/nltk_data...
[nltk_data]   Package stopwords is already up-to-date!
[nltk_data] Downloading package wordnet to
[nltk_data]     /Users/danisarahadians/nltk_data...
[nltk_data]   Package wordnet is already up-to-date!
[nltk_data] Downloading package punkt_tab to
[nltk_data]     /Users/danisarahadians/nltk_data...
[nltk_data]   Package punkt_tab is already up-to-date!


# **Import Model**

In [2]:
# Load the saved model
loaded_model = tf.keras.models.load_model('model_lstm_2.keras')

loaded_model.summary()

Model: "sequential"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 text_vectorization (TextVe  (None, 1149)              0         
 ctorization)                                                    
                                                                 
 embedding (Embedding)       (None, 1149, 128)         1720704   
                                                                 
 bidirectional (Bidirection  (None, 1149, 64)          41216     
 al)                                                             
                                                                 
 dropout (Dropout)           (None, 1149, 64)          0         
                                                                 
 bidirectional_1 (Bidirecti  (None, 1149, 32)          10368     
 onal)                                                           
                                                        

# **Pre-processing**

Pada tahapan ini saya melakukan hal yang sama pada data seperti handle stopwords, case folding, remove punctuation dll.

In [3]:
# Menambahkan stopwords
add_stopwords = ['to', 'I','the','a','my','and','i', 'you', 'is', 'for', 'in', 'of',
 'it', 'on', 'have', 'that', 'me', 'so', 'with', 'be', 'but',
 'at', 'was', 'just', 'I`m', 'not', 'get', 'all', 'this', 'are',
 'out', 'like', 'day', 'up', 'go', 'your', 'good', 'got', 'from',
 'do', 'going', 'no', 'now',  'work', 'will', 'about',
 'one', 'really', 'it`s', 'u', 'don`t', 'some', 'know', 'see', 'can',
 'too', 'had', 'am', 'back', '&', 'time', 'what', 'its', 'want', 'we',
 'new', 'as', 'im', 'think', 'can`t', 'if', 'when', 'an', 'more',
 'still', 'today', 'miss', 'has', 'they', 'much', 'there', 'last',
 'need', 'My', 'how', 'been', 'home', 'lol', 'off', 'Just', 'feel',
 'night', 'i`m', 'her', 'would', 'The', 'sq', 've', 'le', 'hr', 'pre', 'ca', 'th', 'b', 'uk', 'sat', 'sg', 'k', 'sfo', 'usb', 'cm', 'san', 'ft', 'hkg', 'veg', 'usd', 'rd', 'la',
'bid', 'tag', 'usa', 'york', 'rang', 'ho', 'yr', 'fuss', 'los', 'bne', 'nap', 'hang', 'tad', 'wi', 'fi', 'sum', 'de', 'dp', 'fro', 'koh', 'samui', 'ana', 'sogi', 'prem',
'aud', 'act', 'mnl', 'ie', 'klm', 'comb', 'ny', 'opt', 'au', 'en', 'eg', 'hub', 'yo', 'hung','sia', 'hop', 'com', 'nov', 'bc', 'cairn', 'dim', 'oct', 'bom', 'dhaka', 'cdg', 'nrt', 'cph' , 'na', 'log', 'kul', 'lh', 'w', 'ive', 'qf', 'shoe', 'tap', 'jam', 'lip', 'wan', 'oz', 'tho', 'siem', 'r', 'eve', 'melb', 'da', 'haha', 'airnz', 'coz', 'akl', 'utmost', 'gourmet', 'apps', 'mb', 'cx',  'dom', 'inr', 'pls', 'yum', 'bang', 'haagen', 'kix', 'sep', 'phee', 'rip', 'hip', 'un', 'warn', 'wee', 'z', 'ek', 'pic', 'sm', 'xmas', 'davao', 'penh', 'pcr', 'krug', 'pill', 'mar', 'ml', 'omg', 'def', 'jnb', 'kathmandu', 'pnr', 'emma', 'pudong', 'yangon', 'nang', 'qr', 'lol', 'ff', 'soo', 'so', 'vip', 'mai', 'ala', 'dxb', 'in', 'dme', 'pram', 'era', 'sim', 'bug', 'chan', 'bump', 'bent', 'pea', 'leo', 'sgn', 'amp', 'ed', 'ptv', 'dazs', 'dull', 'thr', 'aft','al', 'mad', 'pan', 'eu', 'mere', 'icing', 'danang', 'vn', 'bcn', 'singapur', 'guru', 'abit', 'fukuoka', 'wa', 'eau' , 'hoon', 'nicole', 'ham', 'ifs', 'perrier', 'sevice', 'convince', 'ref', 'easyjet', 'zrh', 'fond', 'ldn', 'ons', 'dire', 'hcmc', 'fr', 'toe', 'pond', 'ur', 'afghan', 'shenzen', 'hv', 'hkd', 'offs', 'icn', 'q', 'gaulle', 'uae', 'sooo', 'si', 'chianti', 'bengaluru', 'yeah', 'gps', 'nine', 'inc', 'jhb', 'madam', 'ban', 'signage', 'cheng', 'twg', 'alway', 'arn', 'swivel', 'krisshop', 'ya', 'ma', 'swa', 'chc', 'hyd', 'peculiar', 'oj', 'osl', 'prop', 'rhapsody', 'iam', 'wong', 'doona', 'gst', 'concoction', 'nj', 'doughy', 'fav', 'hum', 'stern', 'revamp', 'nzd', 'blunt', 'gon', 'int', 'bout', 'bento', 'hnd', 'ingham', 'bwn', 'cuz', 'jkt', 'yang', 'dr', 'mass', 'snag', 'piss', 'irate', 'adl', 'gel', 'econony', 'adjoining', 'rattle', 'chor', 'hide', 'hkt', 'amex', 'kim', 'goreng', 'singapre', 'ling', 'ap', 'damp', 'gastro', 'boss', 'temp', 'midst', 'gatwick', 'slop', 'krabi', 'sh', 'vi', 'ha', 'cmb', 'bak', 'inn', 'ful', 'ion','tbh', 'basinet', 'cab', 'andrea', 'welfare', 'kochi', 'lump', 'ashton', 'yatra', 'wotif', 'ent', 'an', 'ca', 'sang', 'ply', 'snug', 'rt', 'tongs', 'allways', 'grub', 'reckon', 'can', 'pr', 'ovo', 'maa', 'koi', 'sharifah', 'ab', 'bogus', 'nigh', 'sn', 'kat', 'david', 'john', 'savvy', 'muesli', 'ind', 'skywalk', 'imo', 'sqs', 'ng', 'teng', 'brat', 'mle', 'lye', 'iata', 'kee', 'spinal', 'hmmmm','yep', 'shin', 'gaffa' , 'chai', 'med', 'coccyx', 'eur', 'jean', 'agian', 'mee', 'kapoor', 'fog', 'sebastian', 'lingus', 'nhat', 'li', 'qi', 'saga', 'tsa', 'hagen', 'jasmine', 'ah', 'chunk', 'kebaya', 'fot', 'poc', ]

In [4]:
# Stopwords defined
stpwds_eng = list(set(stopwords.words('english')))
stpwds_eng.append(add_stopwords)

In [5]:
# Create A Function for Text Preprocessing

def text_preprocessing(text):
  # Case folding
  text = text.lower()

  # Mention removal
  text = re.sub("@[A-Za-z0-9_]+", " ", text)

  # Hashtags removal
  text = re.sub("#[A-Za-z0-9_]+", " ", text)

  # Newline removal (\n)
  text = re.sub(r"\\n", " ", text)

  # Remove '
  text = re.sub(r"'s\b","", text)

  # Remove Numbers
  text = re.sub(r"\d+", " ", text)

  # Remove Punctuation
  text = re.sub(r"[^\w\s]", " ", text)

  # Whitespace removal
  text = text.strip()

  # URL removal
  text = re.sub(r"http\S+", " ", text)
  text = re.sub(r"www.\S+", " ", text)

  # Delete word yang kemuculannya hanya 2 kata
  text = re.sub(r"\b\w{1,2}\b", " ", text)

  # Non-letter removal (such as emoticon, symbol (like μ, $, 兀), etc
  text = re.sub("[^A-Za-z\s']", " ", text)

  # Skema: token dulu/split word -> stopwords
  # Tokenization
  tokens = word_tokenize(text)

  # Stopwords removal
  tokens = [word for word in tokens if word not in stpwds_eng]

  # Lemmatization
  lemmatizer = WordNetLemmatizer()
  tokens = [lemmatizer.lemmatize(word) for word in tokens]

  # Combining Tokens
  text = ' '.join(tokens)

  return text

# **New Data Frame**

Membuat dataframe baru dengan kata yang akan di prediksi. Karena model kurang dalam memahami netral, jadi saya coba mau masukkin kata yang netral untuk mencoba.

In [6]:
data = {
    'riview' : 'The Facility is so bad and i dont like it, but the movie in plane was so fine'                                
}

data = pd.DataFrame([data])
data

Unnamed: 0,riview
0,"The Facility is so bad and i dont like it, but..."


In [7]:
# Preprocessing Data Inference
data['riview_processed'] = data['riview'].apply(lambda arg: text_preprocessing(arg))

#print
data

Unnamed: 0,riview,riview_processed
0,"The Facility is so bad and i dont like it, but...",facility bad dont like movie plane fine


In [8]:
# Membuat fungsi untuk return result prediksi
def result_prediction(predictions):
    '''
    Fungsi ini bertujuan untuk menampilkan hasil dari prediksi sesuai label jika 0=negative, 1=netral, 2=positive
    '''
    if predictions[0] == 0:
        result = 'Negative'
    elif predictions[0] == 1:
        result = 'Netral'
    else:
        result = 'Positive'

    return result

# **Predict**

In [9]:
# Prediksi
predictions = np.argmax(loaded_model.predict(data['riview_processed']), axis=-1)

# Call the function with the predictions
result = result_prediction(predictions)

# Print Result
print(result)

Netral


Dari hasil inferrence dari kata yang dimasukkan di data frame baru, hasilnya adalah benar terprediksi netral, karena dimulai dari negative riview lalu ditambahkan positive riview.