# 1.Load Dataset


In [8]:
import tensorflow as tf
from tensorflow.keras import layers, models
from tensorflow.keras.preprocessing.text import Tokenizer
from tensorflow.keras.preprocessing.sequence import pad_sequences
import pandas as pd
import numpy as np
import re
from sklearn.model_selection import train_test_split


file_path = "C:/kuliah/semester 4/KAL/combined_sentiment_data.csv"
df = pd.read_csv(file_path)




# 2.Preprocessing


In [3]:
# Preprocessing function
def clean_text(text):
    text = text.lower()
    text = re.sub(r'[^a-z0-9\s]', '', text)
    return text

df['sentence'] = df['sentence'].apply(clean_text)
df['sentiment'] = df['sentiment'].map({'negative': 0, 'positive': 1})


max_features = 20000  
maxlen = 300  
tokenizer = Tokenizer(num_words=max_features, oov_token="<OOV>")
tokenizer.fit_on_texts(df['sentence'])

x = tokenizer.texts_to_sequences(df['sentence'])
x = pad_sequences(x, maxlen=maxlen)
y = df['sentiment'].values


x_train, x_test, y_train, y_test = train_test_split(x, y, test_size=0.2, random_state=42)



# 3.Membangun Model LSTM

In [4]:

model = models.Sequential([
    layers.Embedding(max_features, 128),
    layers.Conv1D(64, kernel_size=5, activation='relu'),
    layers.MaxPooling1D(pool_size=2),
    layers.Bidirectional(layers.LSTM(128, dropout=0.3, recurrent_dropout=0.3, return_sequences=True)),
    layers.Bidirectional(layers.LSTM(64, dropout=0.3, recurrent_dropout=0.3)),
    layers.Dense(64, activation='relu'),
    layers.Dropout(0.3),
    layers.Dense(1, activation='sigmoid')
])


model.compile(optimizer=tf.keras.optimizers.Adam(learning_rate=0.0005),
              loss='binary_crossentropy',
              metrics=['accuracy'])




# 4.Training Model

In [5]:
model.fit(x_train, y_train, epochs=7, batch_size=64, validation_data=(x_test, y_test))




Epoch 1/7
[1m42/42[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m235s[0m 4s/step - accuracy: 0.5089 - loss: 0.6938 - val_accuracy: 0.4955 - val_loss: 0.6929
Epoch 2/7
[1m42/42[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m160s[0m 4s/step - accuracy: 0.5294 - loss: 0.6913 - val_accuracy: 0.5921 - val_loss: 0.6830
Epoch 3/7
[1m42/42[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m163s[0m 4s/step - accuracy: 0.7362 - loss: 0.6112 - val_accuracy: 0.7749 - val_loss: 0.4806
Epoch 4/7
[1m42/42[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m155s[0m 4s/step - accuracy: 0.8974 - loss: 0.2857 - val_accuracy: 0.8006 - val_loss: 0.5009
Epoch 5/7
[1m42/42[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m57s[0m 1s/step - accuracy: 0.9598 - loss: 0.1518 - val_accuracy: 0.8172 - val_loss: 0.5622
Epoch 6/7
[1m42/42[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m54s[0m 1s/step - accuracy: 0.9785 - loss: 0.0895 - val_accuracy: 0.8187 - val_loss: 0.6611
Epoch 7/7
[1m42/42[0m [32m━━━━━━━━━━━━━

<keras.src.callbacks.history.History at 0x1906ac132f0>

# 5.Evaluasi Model

In [6]:
test_loss, test_acc = model.evaluate(x_test, y_test, verbose=2)
print(f'Akurasi Sentimen Model: {test_acc:.4f}')

21/21 - 3s - 150ms/step - accuracy: 0.8202 - loss: 0.6772
Akurasi Sentimen Model: 0.8202


# 6.Prediksi Model

In [None]:
import ipywidgets as widgets
from IPython.display import display


def predict_sentiment(text):
    text = clean_text(text)
    seq = tokenizer.texts_to_sequences([text])
    padded = pad_sequences(seq, maxlen=maxlen)
    prediction = model.predict(padded)[0][0]
    sentiment = "positive" if prediction > 0.5 else "negative"
    print(f"Prediksi Sentimen: {sentiment} ({prediction:.4f})")


text_box = widgets.Text(placeholder="Masukkan teks...")
button = widgets.Button(description="Prediksi")

def on_button_clicked(b):
    text = text_box.value
    predict_sentiment(text)

button.on_click(on_button_clicked)
display(text_box, button)
