In [23]:
import pandas as pd
from Sastrawi.StopWordRemover.StopWordRemoverFactory import StopWordRemoverFactory
from Sastrawi.Stemmer.StemmerFactory import StemmerFactory
import nltk

# STOP WORD REMOVER
stop_word = StopWordRemoverFactory().create_stop_word_remover()
# STEMMER
stemmer = StemmerFactory().create_stemmer()

remove_tanda_baca = nltk.RegexpTokenizer(r"\w+")

# Fungsi untuk menghapus stopwords dari teks


def pre_process_text(text):
    text = ' '.join(remove_tanda_baca.tokenize(text))
    return stop_word.remove(stemmer.stem(text))

# Fungsi untuk mengganti nilai rating
def convert_rating(rating):
    if rating > 3:
        return "positif"
    elif rating < 3:
        return "negatif"
    else:
        return "netral"


df = pd.read_csv('./reviews_mandiri.csv')

rating_positif = df[df['rating'] > 3].sample(n=2000, axis=0)
rating_negatif = df[df['rating'] < 3].sample(n=2000, axis=0)

slice_df = pd.concat([rating_positif, rating_negatif])
slice_df['rating'] = slice_df['rating'].apply(convert_rating)
slice_df['review'] = slice_df['review'].apply(pre_process_text)
rating = pd.get_dummies(slice_df['rating'])

new_set = pd.concat([slice_df,rating],axis=1)
new_set.drop(columns=['rating','date','thumbs_up','version'])

Unnamed: 0,review,negatif,positif
80229,bagaimana cara filter transaksi by person,False,True
32393,sangat bantu,False,True
141670,neng banget pakai livin by mandiri sangat bantu,False,True
66530,,False,True
148959,aplikasi sangat bantu,False,True
...,...,...,...
42559,mau transfer shopeepay kok gabisa terus dari b...,True,False
44903,bad regist terus gak jadi2,True,False
49960,suram,True,False
12201,susah bangeeeeet ken masuk doang geh sinyal gi...,True,False


In [77]:
from sklearn.model_selection import train_test_split
from tensorflow.keras.preprocessing.text import Tokenizer
from tensorflow.keras.preprocessing.sequence import pad_sequences

attribute = new_set['review'].values
label = new_set[['negatif', 'positif']].values

data_latih, data_test, label_latih, label_test = train_test_split(attribute,label,test_size=0.2)
tokenizer = Tokenizer(num_words=20000,oov_token='x')
tokenizer.fit_on_texts(data_latih)

sekuens_latih = tokenizer.texts_to_sequences(data_latih)
sekuens_test = tokenizer.texts_to_sequences(data_test)

pad_latih = pad_sequences(sekuens_latih,padding='post',maxlen=200)
pad_test = pad_sequences(sekuens_test,padding='post',maxlen=200)

In [84]:
import tensorflow as tf
from tensorflow.keras.preprocessing.sequence import pad_sequences
import numpy as np

# Contoh model dasar
model_base = tf.keras.Sequential([
    tf.keras.layers.Embedding(input_dim=15000, output_dim=3, input_length=200),
    tf.keras.layers.LSTM(64),
    tf.keras.layers.Dense(2, activation='softmax')
])

# Model dasar dengan penyetelan hyperparameter
model_base.compile(
    optimizer=tf.keras.optimizers.Adam(learning_rate=0.1),
    loss='binary_crossentropy',
    metrics=['accuracy']
)

ACCURACY_TRESHOLD = 80e-2
class MyCallback(tf.keras.callbacks.Callback):
    def on_epoch_end(self, epoch, logs = None):
        accuracy = logs.get('accuracy')
        if accuracy >= ACCURACY_TRESHOLD:
            print(f'\n Epoch {epoch}\n Accuracy has reach = {logs["accuracy"]*100:.2f}%/n training has been stopped.')
            self.model.stop_training = True

model_base.fit(
    pad_latih,
    label_latih,
    epochs=50,
    validation_data=(pad_test, label_test),
    callbacks=[MyCallback()]
)



Epoch 1/50
Epoch 2/50
 Epoch 1
 Accuracy has reach = 88.22%/n training has been stopped.


<keras.src.callbacks.History at 0x2a4086530>

In [102]:
test_sequence = tokenizer.texts_to_sequences(["omg, jelek banget"])
padded_sentence =  pad_sequences(test_sequence,padding='post',maxlen=200)

# Prediksi dengan model
predictions = model_base.predict(padded_sentence)
predicted_class = np.argmax(predictions)


# Hasil prediksi
if predicted_class == 0:
    print("Kalimat Anda diprediksi sebagai negatif.")
else:
    print("Kalimat Anda diprediksi sebagai positif.")


Kalimat Anda diprediksi sebagai negatif.
