In [28]:
import nltk
nltk.download('stopwords')
nltk.download('punkt')

[nltk_data] Downloading package stopwords to /root/nltk_data...
[nltk_data]   Package stopwords is already up-to-date!
[nltk_data] Downloading package punkt to /root/nltk_data...
[nltk_data]   Package punkt is already up-to-date!


True

In [29]:
from nltk.stem import WordNetLemmatizer
nltk.download("wordnet")
nltk.download("omw-1.4")

[nltk_data] Downloading package wordnet to /root/nltk_data...
[nltk_data]   Package wordnet is already up-to-date!
[nltk_data] Downloading package omw-1.4 to /root/nltk_data...
[nltk_data]   Package omw-1.4 is already up-to-date!


True

In [30]:
# Import Library
import zipfile

import pandas as pd
import numpy as np
import tensorflow as tf

import string 
import re


from nltk.corpus import stopwords
from nltk.tokenize import word_tokenize
from nltk.stem import PorterStemmer
from nltk.stem import WordNetLemmatizer

from tensorflow.keras.models import load_model

In [31]:
# Load the Models

zip_filename = 'customer_review_model.zip'

# Extract the zip file
with zipfile.ZipFile(zip_filename, 'r') as zip_obj:
    zip_obj.extractall()

# Load the saved model
model = load_model('customer_review_model')

In [32]:
# Create New Data

data_inf = {
    'Review': 'I was shocked because no signs indicate cash only.'
}

data_inf = pd.DataFrame([data_inf])
data_inf

Unnamed: 0,Review
0,I was shocked because no signs indicate cash o...


> Preprocessing

In [33]:
# Dictionary of English Contractions
contractions_dict = { "ain't": "are not","'s":" is","aren't": "are not",
                     "can't": "cannot","can't've": "cannot have",
                     "'cause": "because","could've": "could have","couldn't": "could not",
                     "couldn't've": "could not have", "didn't": "did not","doesn't": "does not",
                     "don't": "do not","hadn't": "had not","hadn't've": "had not have",
                     "hasn't": "has not","haven't": "have not","he'd": "he would",
                     "he'd've": "he would have","he'll": "he will", "he'll've": "he will have",
                     "how'd": "how did","how'd'y": "how do you","how'll": "how will",
                     "I'd": "I would", "I'd've": "I would have","I'll": "I will",
                     "I'll've": "I will have","I'm": "I am","I've": "I have", "isn't": "is not",
                     "it'd": "it would","it'd've": "it would have","it'll": "it will",
                     "it'll've": "it will have", "let's": "let us","ma'am": "madam",
                     "mayn't": "may not","might've": "might have","mightn't": "might not", 
                     "mightn't've": "might not have","must've": "must have","mustn't": "must not",
                     "mustn't've": "must not have", "needn't": "need not",
                     "needn't've": "need not have","o'clock": "of the clock","oughtn't": "ought not",
                     "oughtn't've": "ought not have","shan't": "shall not","sha'n't": "shall not",
                     "shan't've": "shall not have","she'd": "she would","she'd've": "she would have",
                     "she'll": "she will", "she'll've": "she will have","should've": "should have",
                     "shouldn't": "should not", "shouldn't've": "should not have","so've": "so have",
                     "that'd": "that would","that'd've": "that would have", "there'd": "there would",
                     "there'd've": "there would have", "they'd": "they would",
                     "they'd've": "they would have","they'll": "they will",
                     "they'll've": "they will have", "they're": "they are","they've": "they have",
                     "to've": "to have","wasn't": "was not","we'd": "we would",
                     "we'd've": "we would have","we'll": "we will","we'll've": "we will have",
                     "we're": "we are","we've": "we have", "weren't": "were not","what'll": "what will",
                     "what'll've": "what will have","what're": "what are", "what've": "what have",
                     "when've": "when have","where'd": "where did", "where've": "where have",
                     "who'll": "who will","who'll've": "who will have","who've": "who have",
                     "why've": "why have","will've": "will have","won't": "will not",
                     "won't've": "will not have", "would've": "would have","wouldn't": "would not",
                     "wouldn't've": "would not have","y'all": "you all", "y'all'd": "you all would",
                     "y'all'd've": "you all would have","y'all're": "you all are",
                     "y'all've": "you all have", "you'd": "you would","you'd've": "you would have",
                     "you'll": "you will","you'll've": "you will have", "you're": "you are",
                     "you've": "you have"}

In [34]:
def lookup_words(input_text):
    words = input_text.split()
    new_words = []
    for word in words:
        if word.lower() in contractions_dict:
            word = contractions_dict[word.lower()]
        new_words.append(word)
    new_text = " ".join(new_words)
    return new_text

In [35]:
data_inf['Review']=data_inf['Review'].apply(lambda x:lookup_words(x))

In [36]:
# define a function to remove duplicate letter continuously repeating character from the word
def conti_rep_char(str1):
    tchr = str1.group(0)
    if len(tchr) >= 2: # angka dua dipilih agar kata yang memang disusun oleh dua huruf tidak ikut dihilangkan dalam proses ini seperti kata 'too', 'really'
      return tchr[0:2]
     
# define a function to check whether unique character is present or not
def check_unique_char(rep, sent_text):
   
    # regex for repetition of characters
    convert = re.sub(r'(\w)\1+', rep, sent_text)
     
    # returning the converted word
    return convert

In [37]:
# Converts Text to Lowercase 
data_inf['lowercase'] = data_inf['Review'].str.lower()

# Remove Punctuation
data_inf['re_punc'] = data_inf['lowercase'].str.replace('[{}]'.format(string.punctuation), '')

# Remove other than letters (number, emoji)
data_inf['re_other'] = data_inf['re_punc'].apply(lambda x: re.sub('\w*\d\w*','', x))

# remove duplicate letter
data_inf['re_duplicate_lett'] = data_inf['re_other'].apply(lambda x : check_unique_char(conti_rep_char, x))

# remove whitespace
data_inf['re_whitespc'] = data_inf['re_duplicate_lett'].replace(r'\s+', ' ', regex=True)

# remove duplicate text
data_inf['re_duplicate'] = data_inf['re_whitespc'].apply(lambda x: " ".join(sorted(set(x.split()), key=x.split().index)))

  data_inf['re_punc'] = data_inf['lowercase'].str.replace('[{}]'.format(string.punctuation), '')


In [38]:
stop_words = list(set(stopwords.words('english')))
new_stop_words = ['meh', 'pho', 'blah', 'would']
stop_words = stop_words + new_stop_words

word_tokens = data_inf['re_duplicate'].apply(lambda x: word_tokenize(x))
data_inf['re_stopwords'] = word_tokens.apply(lambda x: ' '.join([word for word in x if word not in stop_words]))

In [39]:
# stemming

stemmer = PorterStemmer()
stemmed_content = []

for doc in data_inf['re_stopwords']:
  result = [stemmer.stem(word) for word in doc.split()]
  result = ' '.join(result)
  stemmed_content.append(result)

data_inf['stem_n_lemma'] = stemmed_content

In [40]:
# lemma

wnl = WordNetLemmatizer()
lemmatized_content = []

for doc in data_inf['stem_n_lemma']:
  result = [wnl.lemmatize(word, pos="v") for word in doc.split()]
  result = ' '.join(result)
  lemmatized_content.append(result)

data_inf['stem_n_lemma'] = lemmatized_content

In [41]:
# Predict using Neural Network

y_pred_inf = model.predict(data_inf['stem_n_lemma'])
y_pred_inf = np.where(y_pred_inf >= 0.5, 1, 0)
y_pred_inf



array([[1]])

Dari data baru yang telah dibuat, customer tersebut terprediksi Liked (Sentiment Positif).