In [1]:
import numpy as np
import pandas as pd
from sklearn import preprocessing
from sklearn.feature_extraction.text import CountVectorizer, TfidfVectorizer
from sklearn.metrics import accuracy_score, confusion_matrix, plot_confusion_matrix, recall_score, precision_score
from nltk.corpus import stopwords
from nltk.stem.snowball import SnowballStemmer
import matplotlib
from matplotlib import pyplot as plt
import seaborn as sns
%matplotlib inline
%config InlineBackend.figure_format = 'retina'
import tensorflow as tf
from tensorflow.keras.preprocessing.text import Tokenizer
from tensorflow.keras.preprocessing.sequence import pad_sequences

# Preprocessing

Load data

In [2]:
data = pd.read_csv("tagged_only_casefold_label.csv")
data.head(10)

Unnamed: 0,Rating,Review,Name,Province,Type,cleanliness,service,accomodation,facility,crowd
0,1,not sure if it was due to storms etc but it wa...,Sanur Beach,Bali,Alam,1,0,0,0,0
1,1,ship aground just off puri santrian sanur bali...,Sanur Beach,Bali,Alam,0,1,0,0,0
2,1,yes it is a cute little sleepy town with a who...,Sanur Beach,Bali,Alam,0,0,0,1,0
3,2,we have been here in the rainy season so the p...,Sanur Beach,Bali,Alam,1,0,0,0,0
4,2,this is where all the boats land its busy poll...,Sanur Beach,Bali,Alam,1,0,0,0,1
5,2,the beach itself isnt really much of a beach i...,Sanur Beach,Bali,Alam,0,0,0,1,0
6,2,generally crowded and disorganized areathere a...,Sanur Beach,Bali,Alam,0,1,3,0,1
7,2,sandy beach surrounded by restaurants and few ...,Sanur Beach,Bali,Alam,0,1,0,3,0
8,2,this one is average reasonably clean and yello...,Sanur Beach,Bali,Alam,3,1,0,0,0
9,2,nice beach but sadly lots of dog poo which is ...,Sanur Beach,Bali,Alam,1,0,0,0,0


Stop words removal

In [3]:
import nltk
nltk.download('stopwords')
sw = stopwords.words('english')
np.array(sw)
def stopwords(text):
    text = [word.lower() for word in text.split() if word.lower() not in sw]
    return " ".join(text)
data['Review'] = data['Review'].apply(stopwords)
data.head(10)

[nltk_data] Downloading package stopwords to /root/nltk_data...
[nltk_data]   Unzipping corpora/stopwords.zip.


Unnamed: 0,Rating,Review,Name,Province,Type,cleanliness,service,accomodation,facility,crowd
0,1,sure due storms etc gross worth going kids hea...,Sanur Beach,Bali,Alam,1,0,0,0,0
1,1,ship aground puri santrian sanur bali see ship...,Sanur Beach,Bali,Alam,0,1,0,0,0
2,1,yes cute little sleepy town whole lot nothing ...,Sanur Beach,Bali,Alam,0,0,0,1,0
3,2,rainy season proof water quality could seen be...,Sanur Beach,Bali,Alam,1,0,0,0,0
4,2,boats land busy polluted noisy im sure would g...,Sanur Beach,Bali,Alam,1,0,0,0,1
5,2,beach isnt really much beach inlet theres surf...,Sanur Beach,Bali,Alam,0,0,0,1,0
6,2,generally crowded disorganized areathere many ...,Sanur Beach,Bali,Alam,0,1,3,0,1
7,2,sandy beach surrounded restaurants resorts unf...,Sanur Beach,Bali,Alam,0,1,0,3,0
8,2,one average reasonably clean yellow sand compa...,Sanur Beach,Bali,Alam,3,1,0,0,0
9,2,nice beach sadly lots dog poo shame little put...,Sanur Beach,Bali,Alam,1,0,0,0,0


Stemming

In [4]:
stemmer = SnowballStemmer("english")
def stemming(text):    
    text = [stemmer.stem(word) for word in text.split()]
    return " ".join(text) 
data['Review'] = data['Review'].apply(stemming)
data.head()

Unnamed: 0,Rating,Review,Name,Province,Type,cleanliness,service,accomodation,facility,crowd
0,1,sure due storm etc gross worth go kid heard mu...,Sanur Beach,Bali,Alam,1,0,0,0,0
1,1,ship aground puri santrian sanur bali see ship...,Sanur Beach,Bali,Alam,0,1,0,0,0
2,1,yes cute littl sleepi town whole lot noth beac...,Sanur Beach,Bali,Alam,0,0,0,1,0
3,2,raini season proof water qualiti could seen be...,Sanur Beach,Bali,Alam,1,0,0,0,0
4,2,boat land busi pollut noisi im sure would go u...,Sanur Beach,Bali,Alam,1,0,0,0,1


# Hyperparameter

In [5]:
vocab_size = 5000
embedding_dim = 64
max_length = 200
trunc_type = 'post'
padding_type = 'post'
oov_tok = '<OOV>'
training_portion = .8

# Tokenizing

In [6]:
tokenizer = Tokenizer(num_words = vocab_size, oov_token=oov_tok)
tokenizer.fit_on_texts(data['Review'])
word_index = tokenizer.word_index
dict(list(word_index.items())[0:10])

{'<OOV>': 1,
 'beach': 2,
 'go': 8,
 'good': 4,
 'great': 7,
 'hotel': 5,
 'place': 3,
 'room': 9,
 'stay': 6,
 'view': 10}

In [7]:
review_sequences = tokenizer.texts_to_sequences(data['Review'])
print('Contoh review yang sudah di-tokenizing:')
print(review_sequences[0])

Contoh review yang sudah di-tokenizing:
[115, 268, 2003, 354, 1810, 83, 8, 428, 668, 53, 287, 115, 144, 155, 501, 155, 107, 68, 11, 462, 35, 995, 564, 269, 194]


Padding: supaya seluruh records review berukuran sama.

In [8]:
review_padded = pad_sequences(review_sequences, maxlen=max_length, padding=padding_type, truncating=trunc_type)
review_padded

array([[ 115,  268, 2003, ...,    0,    0,    0],
       [1528, 3720, 1529, ...,    0,    0,    0],
       [ 540, 1261,  110, ...,    0,    0,    0],
       ...,
       [   6,    3,  155, ...,    0,    0,    0],
       [   7,    5,    4, ...,    0,    0,    0],
       [ 591,   30,  157, ...,    0,    0,    0]], dtype=int32)

# Membuat data training & testing tiap-tiap aspek

In [9]:
train_size = int(len(review_padded) * training_portion)

train_review = review_padded[0: train_size]
test_review = review_padded[train_size:]

train_cleanliness = data['cleanliness'][0: train_size]
test_cleanliness = data['cleanliness'][train_size:]

train_service = data['service'][0: train_size]
test_service = data['service'][train_size:]

train_accomodation = data['accomodation'][0: train_size]
test_accomodation = data['accomodation'][train_size:]

train_facility = data['facility'][0: train_size]
test_facility = data['facility'][train_size:]

train_crowd = data['crowd'][0: train_size]
test_crowd = data['crowd'][train_size:]

# LSTM - for each aspect

In [10]:
model_cleanliness = tf.keras.Sequential([
    # Add an Embedding layer expecting input vocab of size 5000, and output embedding dimension of size 64 we set at the top
    tf.keras.layers.Embedding(vocab_size, embedding_dim),
    tf.keras.layers.Bidirectional(tf.keras.layers.LSTM(embedding_dim)),
#   tf.keras.layers.Bidirectional(tf.keras.layers.LSTM(32)),
    # use ReLU in place of tanh function since they are very good alternatives of each other.
    tf.keras.layers.Dense(embedding_dim, activation='relu'),
    # Add a Dense layer with 6 units and softmax activation.
    # When we have multiple outputs, softmax convert outputs layers into a probability distribution.
    tf.keras.layers.Dense(4, activation='softmax')
])
model_cleanliness.summary()

Model: "sequential"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
embedding (Embedding)        (None, None, 64)          320000    
_________________________________________________________________
bidirectional (Bidirectional (None, 128)               66048     
_________________________________________________________________
dense (Dense)                (None, 64)                8256      
_________________________________________________________________
dense_1 (Dense)              (None, 4)                 260       
Total params: 394,564
Trainable params: 394,564
Non-trainable params: 0
_________________________________________________________________


In [11]:
model_cleanliness.compile(loss='sparse_categorical_crossentropy', optimizer='adam', metrics=['accuracy'])
num_epochs = 10
model_cleanliness.fit(train_review, train_cleanliness, epochs=num_epochs, validation_data=(test_review, test_cleanliness), verbose=2)

Epoch 1/10
53/53 - 11s - loss: 0.8145 - accuracy: 0.7907 - val_loss: 0.8504 - val_accuracy: 0.7249
Epoch 2/10
53/53 - 10s - loss: 0.6013 - accuracy: 0.7996 - val_loss: 0.8153 - val_accuracy: 0.7344
Epoch 3/10
53/53 - 10s - loss: 0.4107 - accuracy: 0.8433 - val_loss: 0.8416 - val_accuracy: 0.7464
Epoch 4/10
53/53 - 10s - loss: 0.2449 - accuracy: 0.9145 - val_loss: 0.8933 - val_accuracy: 0.7201
Epoch 5/10
53/53 - 10s - loss: 0.1217 - accuracy: 0.9617 - val_loss: 1.0783 - val_accuracy: 0.7656
Epoch 6/10
53/53 - 11s - loss: 0.0775 - accuracy: 0.9809 - val_loss: 1.4806 - val_accuracy: 0.7440
Epoch 7/10
53/53 - 10s - loss: 0.0582 - accuracy: 0.9833 - val_loss: 1.2166 - val_accuracy: 0.7488
Epoch 8/10
53/53 - 10s - loss: 0.0316 - accuracy: 0.9892 - val_loss: 1.1864 - val_accuracy: 0.7632
Epoch 9/10
53/53 - 10s - loss: 0.0227 - accuracy: 0.9928 - val_loss: 1.4079 - val_accuracy: 0.7703
Epoch 10/10
53/53 - 10s - loss: 0.0202 - accuracy: 0.9952 - val_loss: 1.3189 - val_accuracy: 0.7632


<tensorflow.python.keras.callbacks.History at 0x7f102c042e10>

In [12]:
score = model_cleanliness.evaluate(test_review, test_cleanliness, verbose=1)

print("Test Score:", score[0])
print("Test Accuracy:", score[1])

Test Score: 1.318856954574585
Test Accuracy: 0.7631579041481018


In [13]:
model_service = tf.keras.Sequential([
    # Add an Embedding layer expecting input vocab of size 5000, and output embedding dimension of size 64 we set at the top
    tf.keras.layers.Embedding(vocab_size, embedding_dim),
    tf.keras.layers.Bidirectional(tf.keras.layers.LSTM(embedding_dim)),
#    tf.keras.layers.Bidirectional(tf.keras.layers.LSTM(32)),
    # use ReLU in place of tanh function since they are very good alternatives of each other.
    tf.keras.layers.Dense(embedding_dim, activation='relu'),
    # Add a Dense layer with 6 units and softmax activation.
    # When we have multiple outputs, softmax convert outputs layers into a probability distribution.
    tf.keras.layers.Dense(4, activation='softmax')
])

model_service.compile(loss='sparse_categorical_crossentropy', optimizer='adam', metrics=['accuracy'])
num_epochs = 10
model_service.fit(train_review, train_service, epochs=num_epochs, validation_data=(test_review, test_service), verbose=2)

Epoch 1/10
53/53 - 12s - loss: 1.1690 - accuracy: 0.5299 - val_loss: 1.2084 - val_accuracy: 0.1818
Epoch 2/10
53/53 - 11s - loss: 0.9477 - accuracy: 0.6005 - val_loss: 0.9109 - val_accuracy: 0.5789
Epoch 3/10
53/53 - 10s - loss: 0.6368 - accuracy: 0.7614 - val_loss: 0.9934 - val_accuracy: 0.5909
Epoch 4/10
53/53 - 10s - loss: 0.4390 - accuracy: 0.8397 - val_loss: 0.9453 - val_accuracy: 0.6435
Epoch 5/10
53/53 - 10s - loss: 0.2857 - accuracy: 0.8965 - val_loss: 0.8754 - val_accuracy: 0.7273
Epoch 6/10
53/53 - 10s - loss: 0.1807 - accuracy: 0.9342 - val_loss: 1.0447 - val_accuracy: 0.7536
Epoch 7/10
53/53 - 10s - loss: 0.1289 - accuracy: 0.9629 - val_loss: 1.8049 - val_accuracy: 0.5861
Epoch 8/10
53/53 - 10s - loss: 0.0904 - accuracy: 0.9749 - val_loss: 1.3978 - val_accuracy: 0.6794
Epoch 9/10
53/53 - 10s - loss: 0.0637 - accuracy: 0.9821 - val_loss: 1.5664 - val_accuracy: 0.6794
Epoch 10/10
53/53 - 10s - loss: 0.0471 - accuracy: 0.9880 - val_loss: 2.0303 - val_accuracy: 0.6507


<tensorflow.python.keras.callbacks.History at 0x7f1020dc4550>

In [14]:
score = model_service.evaluate(test_review, test_service, verbose=1)

print("Test Score:", score[0])
print("Test Accuracy:", score[1])

Test Score: 2.030311107635498
Test Accuracy: 0.6507176756858826


In [15]:
model_accomodation = tf.keras.Sequential([
    # Add an Embedding layer expecting input vocab of size 5000, and output embedding dimension of size 64 we set at the top
    tf.keras.layers.Embedding(vocab_size, embedding_dim),
    tf.keras.layers.Bidirectional(tf.keras.layers.LSTM(embedding_dim)),
#    tf.keras.layers.Bidirectional(tf.keras.layers.LSTM(32)),
    # use ReLU in place of tanh function since they are very good alternatives of each other.
    tf.keras.layers.Dense(embedding_dim, activation='relu'),
    # Add a Dense layer with 6 units and softmax activation.
    # When we have multiple outputs, softmax convert outputs layers into a probability distribution.
    tf.keras.layers.Dense(4, activation='softmax')
])

model_accomodation.compile(loss='sparse_categorical_crossentropy', optimizer='adam', metrics=['accuracy'])
num_epochs = 10
model_accomodation.fit(train_review, train_accomodation, epochs=num_epochs, validation_data=(test_review, test_accomodation), verbose=2)

Epoch 1/10
53/53 - 10s - loss: 0.9530 - accuracy: 0.7147 - val_loss: 0.6376 - val_accuracy: 0.7799
Epoch 2/10
53/53 - 10s - loss: 0.7393 - accuracy: 0.7362 - val_loss: 0.6358 - val_accuracy: 0.7847
Epoch 3/10
53/53 - 10s - loss: 0.5144 - accuracy: 0.8266 - val_loss: 0.6563 - val_accuracy: 0.7703
Epoch 4/10
53/53 - 10s - loss: 0.3101 - accuracy: 0.8882 - val_loss: 0.8184 - val_accuracy: 0.7201
Epoch 5/10
53/53 - 10s - loss: 0.2100 - accuracy: 0.9234 - val_loss: 1.0617 - val_accuracy: 0.7823
Epoch 6/10
53/53 - 10s - loss: 0.1430 - accuracy: 0.9474 - val_loss: 1.0422 - val_accuracy: 0.7703
Epoch 7/10
53/53 - 10s - loss: 0.0987 - accuracy: 0.9641 - val_loss: 1.3260 - val_accuracy: 0.7727
Epoch 8/10
53/53 - 10s - loss: 0.0724 - accuracy: 0.9737 - val_loss: 1.3731 - val_accuracy: 0.7297
Epoch 9/10
53/53 - 10s - loss: 0.0635 - accuracy: 0.9833 - val_loss: 1.4976 - val_accuracy: 0.7392
Epoch 10/10
53/53 - 10s - loss: 0.0296 - accuracy: 0.9928 - val_loss: 1.6470 - val_accuracy: 0.7392


<tensorflow.python.keras.callbacks.History at 0x7f101f37b5c0>

In [16]:
score = model_accomodation.evaluate(test_review, test_accomodation, verbose=1)

print("Test Score:", score[0])
print("Test Accuracy:", score[1])

Test Score: 1.6470292806625366
Test Accuracy: 0.739234447479248


In [17]:
model_facility = tf.keras.Sequential([
    # Add an Embedding layer expecting input vocab of size 5000, and output embedding dimension of size 64 we set at the top
    tf.keras.layers.Embedding(vocab_size, embedding_dim),
    tf.keras.layers.Bidirectional(tf.keras.layers.LSTM(embedding_dim)),
#    tf.keras.layers.Bidirectional(tf.keras.layers.LSTM(32)),
    # use ReLU in place of tanh function since they are very good alternatives of each other.
    tf.keras.layers.Dense(embedding_dim, activation='relu'),
    # Add a Dense layer with 6 units and softmax activation.
    # When we have multiple outputs, softmax convert outputs layers into a probability distribution.
    tf.keras.layers.Dense(4, activation='softmax')
])

model_facility.compile(loss='sparse_categorical_crossentropy', optimizer='adam', metrics=['accuracy'])
num_epochs = 10
model_facility.fit(train_review, train_facility, epochs=num_epochs, validation_data=(test_review, test_facility), verbose=2)

Epoch 1/10
53/53 - 11s - loss: 1.0980 - accuracy: 0.4396 - val_loss: 0.9733 - val_accuracy: 0.5957
Epoch 2/10
53/53 - 10s - loss: 0.9706 - accuracy: 0.5706 - val_loss: 1.3724 - val_accuracy: 0.3254
Epoch 3/10
53/53 - 10s - loss: 0.7431 - accuracy: 0.7183 - val_loss: 1.1568 - val_accuracy: 0.5048
Epoch 4/10
53/53 - 10s - loss: 0.4904 - accuracy: 0.8116 - val_loss: 1.3977 - val_accuracy: 0.4665
Epoch 5/10
53/53 - 10s - loss: 0.2835 - accuracy: 0.8977 - val_loss: 1.9528 - val_accuracy: 0.4258
Epoch 6/10
53/53 - 10s - loss: 0.1471 - accuracy: 0.9522 - val_loss: 2.0463 - val_accuracy: 0.4593
Epoch 7/10
53/53 - 10s - loss: 0.0925 - accuracy: 0.9695 - val_loss: 2.5856 - val_accuracy: 0.4545
Epoch 8/10
53/53 - 10s - loss: 0.0444 - accuracy: 0.9904 - val_loss: 3.2944 - val_accuracy: 0.4426
Epoch 9/10
53/53 - 10s - loss: 0.0262 - accuracy: 0.9928 - val_loss: 3.5625 - val_accuracy: 0.4593
Epoch 10/10
53/53 - 10s - loss: 0.0249 - accuracy: 0.9952 - val_loss: 3.3937 - val_accuracy: 0.4952


<tensorflow.python.keras.callbacks.History at 0x7f101e8abb00>

In [18]:
score = model_facility.evaluate(test_review, test_facility, verbose=1)

print("Test Score:", score[0])
print("Test Accuracy:", score[1])

Test Score: 3.3936727046966553
Test Accuracy: 0.4952152967453003


In [19]:
model_crowd = tf.keras.Sequential([
    # Add an Embedding layer expecting input vocab of size 5000, and output embedding dimension of size 64 we set at the top
    tf.keras.layers.Embedding(vocab_size, embedding_dim),
    tf.keras.layers.Bidirectional(tf.keras.layers.LSTM(embedding_dim)),
#    tf.keras.layers.Bidirectional(tf.keras.layers.LSTM(32)),
    # use ReLU in place of tanh function since they are very good alternatives of each other.
    tf.keras.layers.Dense(embedding_dim, activation='relu'),
    # Add a Dense layer with 6 units and softmax activation.
    # When we have multiple outputs, softmax convert outputs layers into a probability distribution.
    tf.keras.layers.Dense(4, activation='softmax')
])

model_crowd.compile(loss='sparse_categorical_crossentropy', optimizer='adam', metrics=['accuracy'])
num_epochs = 10
model_crowd.fit(train_review, train_crowd, epochs=num_epochs, validation_data=(test_review, test_crowd), verbose=2)

Epoch 1/10
53/53 - 11s - loss: 0.6755 - accuracy: 0.8684 - val_loss: 0.1667 - val_accuracy: 0.9856
Epoch 2/10
53/53 - 10s - loss: 0.4761 - accuracy: 0.8684 - val_loss: 0.1020 - val_accuracy: 0.9856
Epoch 3/10
53/53 - 10s - loss: 0.3456 - accuracy: 0.8846 - val_loss: 0.1120 - val_accuracy: 0.9856
Epoch 4/10
53/53 - 10s - loss: 0.2103 - accuracy: 0.9103 - val_loss: 0.1374 - val_accuracy: 0.9809
Epoch 5/10
53/53 - 10s - loss: 0.1596 - accuracy: 0.9252 - val_loss: 0.1629 - val_accuracy: 0.9833
Epoch 6/10
53/53 - 10s - loss: 0.1334 - accuracy: 0.9468 - val_loss: 0.1789 - val_accuracy: 0.9833
Epoch 7/10
53/53 - 10s - loss: 0.1114 - accuracy: 0.9587 - val_loss: 0.2072 - val_accuracy: 0.9809
Epoch 8/10
53/53 - 10s - loss: 0.0816 - accuracy: 0.9707 - val_loss: 0.2245 - val_accuracy: 0.9809
Epoch 9/10
53/53 - 10s - loss: 0.0503 - accuracy: 0.9791 - val_loss: 0.2505 - val_accuracy: 0.9737
Epoch 10/10
53/53 - 10s - loss: 0.0603 - accuracy: 0.9833 - val_loss: 0.2216 - val_accuracy: 0.9809


<tensorflow.python.keras.callbacks.History at 0x7f101bd535c0>

In [20]:
score = model_crowd.evaluate(test_review, test_crowd, verbose=1)

print("Test Score:", score[0])
print("Test Accuracy:", score[1])

Test Score: 0.22163648903369904
Test Accuracy: 0.980861246585846


# Coba prediksi 6000an data

In [21]:
all_review = pd.read_csv('all_scraped_casefold.csv')
all_review_sequences = tokenizer.texts_to_sequences(all_review['Review'])
all_review_padded = pad_sequences(all_review_sequences, maxlen=max_length, padding=padding_type, truncating=trunc_type)

In [22]:
cleanliness_predict_result = model_cleanliness.predict(all_review_padded, verbose=2)
label = [0, 1, 2, 3]
cleanliness_predict = []
for item in cleanliness_predict_result:
    cleanliness_predict.append(label[np.argmax(item)])
all_review['cleanliness_predict'] = cleanliness_predict
all_review

206/206 - 8s


Unnamed: 0,Rating,Review,Name,Province,Type,cleanliness_predict
0,5.0,beautiful and stunning view of the beach clear...,Nusa Dua Beach,Bali,Alam,0
1,5.0,you arrifve and are immeditely overwhelmed by ...,Nusa Dua Beach,Bali,Alam,0
2,5.0,nusa dua are the best beaches i saw on bali nu...,Nusa Dua Beach,Bali,Alam,0
3,4.0,move to the central area wave and tides clean ...,Nusa Dua Beach,Bali,Alam,1
4,4.0,white sand beach with chairs and some restaura...,Nusa Dua Beach,Bali,Alam,0
...,...,...,...,...,...,...
6568,5.0,it takes a lot for me to rate this as a five s...,"Cocotinos Sekotong, Boutique Beach Resort & Spa",NTB,Urban,0
6569,5.0,we stayed here for a couple of nights and love...,"Cocotinos Sekotong, Boutique Beach Resort & Spa",NTB,Urban,0
6570,5.0,i stayed here for four nights after completing...,"Cocotinos Sekotong, Boutique Beach Resort & Spa",NTB,Urban,1
6571,5.0,went on honeymoon to bali and lombok last mont...,"Cocotinos Sekotong, Boutique Beach Resort & Spa",NTB,Urban,0


In [23]:
service_predict_result = model_service.predict(all_review_padded, verbose=2)
service_predict = []
for item in service_predict_result:
    service_predict.append(label[np.argmax(item)])
all_review['service_predict'] = service_predict
all_review

206/206 - 8s


Unnamed: 0,Rating,Review,Name,Province,Type,cleanliness_predict,service_predict
0,5.0,beautiful and stunning view of the beach clear...,Nusa Dua Beach,Bali,Alam,0,0
1,5.0,you arrifve and are immeditely overwhelmed by ...,Nusa Dua Beach,Bali,Alam,0,0
2,5.0,nusa dua are the best beaches i saw on bali nu...,Nusa Dua Beach,Bali,Alam,0,0
3,4.0,move to the central area wave and tides clean ...,Nusa Dua Beach,Bali,Alam,1,0
4,4.0,white sand beach with chairs and some restaura...,Nusa Dua Beach,Bali,Alam,0,0
...,...,...,...,...,...,...,...
6568,5.0,it takes a lot for me to rate this as a five s...,"Cocotinos Sekotong, Boutique Beach Resort & Spa",NTB,Urban,0,1
6569,5.0,we stayed here for a couple of nights and love...,"Cocotinos Sekotong, Boutique Beach Resort & Spa",NTB,Urban,0,0
6570,5.0,i stayed here for four nights after completing...,"Cocotinos Sekotong, Boutique Beach Resort & Spa",NTB,Urban,1,3
6571,5.0,went on honeymoon to bali and lombok last mont...,"Cocotinos Sekotong, Boutique Beach Resort & Spa",NTB,Urban,0,3


In [24]:
accomodation_predict_result = model_accomodation.predict(all_review_padded, verbose=2)
accomodation_predict = []
for item in accomodation_predict_result:
    accomodation_predict.append(label[np.argmax(item)])
all_review['accomodation_predict'] = accomodation_predict
all_review

206/206 - 7s


Unnamed: 0,Rating,Review,Name,Province,Type,cleanliness_predict,service_predict,accomodation_predict
0,5.0,beautiful and stunning view of the beach clear...,Nusa Dua Beach,Bali,Alam,0,0,0
1,5.0,you arrifve and are immeditely overwhelmed by ...,Nusa Dua Beach,Bali,Alam,0,0,0
2,5.0,nusa dua are the best beaches i saw on bali nu...,Nusa Dua Beach,Bali,Alam,0,0,0
3,4.0,move to the central area wave and tides clean ...,Nusa Dua Beach,Bali,Alam,1,0,0
4,4.0,white sand beach with chairs and some restaura...,Nusa Dua Beach,Bali,Alam,0,0,0
...,...,...,...,...,...,...,...,...
6568,5.0,it takes a lot for me to rate this as a five s...,"Cocotinos Sekotong, Boutique Beach Resort & Spa",NTB,Urban,0,1,0
6569,5.0,we stayed here for a couple of nights and love...,"Cocotinos Sekotong, Boutique Beach Resort & Spa",NTB,Urban,0,0,0
6570,5.0,i stayed here for four nights after completing...,"Cocotinos Sekotong, Boutique Beach Resort & Spa",NTB,Urban,1,3,0
6571,5.0,went on honeymoon to bali and lombok last mont...,"Cocotinos Sekotong, Boutique Beach Resort & Spa",NTB,Urban,0,3,0


In [25]:
facility_predict_result = model_facility.predict(all_review_padded, verbose=2)
facility_predict = []
for item in facility_predict_result:
    facility_predict.append(label[np.argmax(item)])
all_review['facility_predict'] = facility_predict
all_review

206/206 - 7s


Unnamed: 0,Rating,Review,Name,Province,Type,cleanliness_predict,service_predict,accomodation_predict,facility_predict
0,5.0,beautiful and stunning view of the beach clear...,Nusa Dua Beach,Bali,Alam,0,0,0,3
1,5.0,you arrifve and are immeditely overwhelmed by ...,Nusa Dua Beach,Bali,Alam,0,0,0,3
2,5.0,nusa dua are the best beaches i saw on bali nu...,Nusa Dua Beach,Bali,Alam,0,0,0,0
3,4.0,move to the central area wave and tides clean ...,Nusa Dua Beach,Bali,Alam,1,0,0,1
4,4.0,white sand beach with chairs and some restaura...,Nusa Dua Beach,Bali,Alam,0,0,0,3
...,...,...,...,...,...,...,...,...,...
6568,5.0,it takes a lot for me to rate this as a five s...,"Cocotinos Sekotong, Boutique Beach Resort & Spa",NTB,Urban,0,1,0,0
6569,5.0,we stayed here for a couple of nights and love...,"Cocotinos Sekotong, Boutique Beach Resort & Spa",NTB,Urban,0,0,0,1
6570,5.0,i stayed here for four nights after completing...,"Cocotinos Sekotong, Boutique Beach Resort & Spa",NTB,Urban,1,3,0,0
6571,5.0,went on honeymoon to bali and lombok last mont...,"Cocotinos Sekotong, Boutique Beach Resort & Spa",NTB,Urban,0,3,0,1


In [26]:
crowd_predict_result = model_crowd.predict(all_review_padded, verbose=2)
crowd_predict = []
for item in crowd_predict_result:
    crowd_predict.append(label[np.argmax(item)])
all_review['crowd_predict'] = crowd_predict
all_review

206/206 - 8s


Unnamed: 0,Rating,Review,Name,Province,Type,cleanliness_predict,service_predict,accomodation_predict,facility_predict,crowd_predict
0,5.0,beautiful and stunning view of the beach clear...,Nusa Dua Beach,Bali,Alam,0,0,0,3,0
1,5.0,you arrifve and are immeditely overwhelmed by ...,Nusa Dua Beach,Bali,Alam,0,0,0,3,0
2,5.0,nusa dua are the best beaches i saw on bali nu...,Nusa Dua Beach,Bali,Alam,0,0,0,0,0
3,4.0,move to the central area wave and tides clean ...,Nusa Dua Beach,Bali,Alam,1,0,0,1,0
4,4.0,white sand beach with chairs and some restaura...,Nusa Dua Beach,Bali,Alam,0,0,0,3,0
...,...,...,...,...,...,...,...,...,...,...
6568,5.0,it takes a lot for me to rate this as a five s...,"Cocotinos Sekotong, Boutique Beach Resort & Spa",NTB,Urban,0,1,0,0,0
6569,5.0,we stayed here for a couple of nights and love...,"Cocotinos Sekotong, Boutique Beach Resort & Spa",NTB,Urban,0,0,0,1,0
6570,5.0,i stayed here for four nights after completing...,"Cocotinos Sekotong, Boutique Beach Resort & Spa",NTB,Urban,1,3,0,0,0
6571,5.0,went on honeymoon to bali and lombok last mont...,"Cocotinos Sekotong, Boutique Beach Resort & Spa",NTB,Urban,0,3,0,1,0


In [27]:
all_review.to_csv("prediksi6000.csv")