In [2]:
import tensorflow as tf
from tensorflow.keras.callbacks import EarlyStopping, ModelCheckpoint, ReduceLROnPlateau
from tensorflow.keras.preprocessing.text import Tokenizer
from tensorflow.keras.preprocessing.sequence import pad_sequences
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import LabelBinarizer
from sklearn.svm import SVC
from sklearn.naive_bayes import MultinomialNB
from sklearn.neighbors import KNeighborsClassifier
from sklearn.metrics import accuracy_score, classification_report, precision_score, recall_score, f1_score
from imblearn.over_sampling import SMOTE
import pandas as pd

In [3]:
# Load your dataset
data = pd.read_csv('dataset/mobile_jkn.csv')  # Load your CSV file

# Assume 'content' contains review text and 'score' contains sentiment labels
X = data['content']  # Feature: text reviews
y = data['score']    # Label: sentiment

# Split dataset into training and testing sets
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# Hyperparameters
max_words = 5000  # Maximum number of words to consider
max_len = 200     # Maximum length of input sequences
embedding_dim = 128  # Embedding size for each token

# Tokenization of the text data
tokenizer = Tokenizer(num_words=max_words, oov_token="<OOV>")
tokenizer.fit_on_texts(X_train)

# Convert the text data to sequences
X_train_seq = tokenizer.texts_to_sequences(X_train)
X_test_seq = tokenizer.texts_to_sequences(X_test)

# Pad the sequences to ensure uniform input shape
X_train_padded = pad_sequences(X_train_seq, maxlen=max_len, padding='post', truncating='post')
X_test_padded = pad_sequences(X_test_seq, maxlen=max_len, padding='post', truncating='post')

# Convert labels to categorical (one-hot encoding)
lb = LabelBinarizer()
y_train_onehot = lb.fit_transform(y_train)
y_test_onehot = lb.transform(y_test)

In [4]:
# -------------------- Penyeimbangan Data Menggunakan SMOTE -------------------- #

# Resample training data using SMOTE
smote = SMOTE(random_state=42)
X_train_padded_resampled, y_train_onehot_resampled = smote.fit_resample(X_train_padded, y_train_onehot)

# Flatten y_train_onehot_resampled and y_test_onehot for SVM, Naive Bayes, and KNN compatibility
y_train_flat = y_train_onehot_resampled.argmax(axis=1)
y_test_flat = y_test_onehot.argmax(axis=1)


In [6]:
# -------------------- LSTM Model -------------------- #

# Build the LSTM model using TensorFlow
lstm_model = tf.keras.Sequential([
    tf.keras.layers.Embedding(max_words, embedding_dim, input_length=max_len),
    tf.keras.layers.SpatialDropout1D(0.2),
    tf.keras.layers.LSTM(128, return_sequences=True),
    tf.keras.layers.GlobalMaxPooling1D(),
    tf.keras.layers.Dense(64, activation='relu'),
    tf.keras.layers.Dropout(0.5),
    tf.keras.layers.Dense(y_train_onehot.shape[1], activation='softmax')  # Adjust output classes dynamically
])

# Compile the LSTM model
lstm_model.compile(loss='categorical_crossentropy', optimizer='adam', metrics=['accuracy'])

# Callbacks
early_stopping = EarlyStopping(monitor='val_loss', patience=3, restore_best_weights=True)
model_checkpoint = ModelCheckpoint('best_lstm_model.h5', save_best_only=True, monitor='val_loss')
reduce_lr = ReduceLROnPlateau(monitor='val_loss', factor=0.2, patience=2, min_lr=0.0001)

# Train the LSTM model with callbacks
lstm_model.fit(
    X_train_padded_resampled, 
    y_train_onehot_resampled, 
    epochs=10, 
    batch_size=128, 
    validation_data=(X_test_padded, y_test_onehot),
    callbacks=[early_stopping, model_checkpoint, reduce_lr]
)

# Evaluate the LSTM model on the test set
lstm_test_loss, lstm_test_accuracy = lstm_model.evaluate(X_test_padded, y_test_onehot)

# Predict with LSTM
y_pred_lstm = lstm_model.predict(X_test_padded)
y_pred_lstm_flat = y_pred_lstm.argmax(axis=1)

# LSTM Precision, Recall, F1-Score
lstm_precision = precision_score(y_test_flat, y_pred_lstm_flat, average='weighted')
lstm_recall = recall_score(y_test_flat, y_pred_lstm_flat, average='weighted')
lstm_f1 = f1_score(y_test_flat, y_pred_lstm_flat, average='weighted')

print(f"LSTM Test Accuracy: {lstm_test_accuracy}")
print(f"LSTM Precision: {lstm_precision}")
print(f"LSTM Recall: {lstm_recall}")
print(f"LSTM F1-Score: {lstm_f1}")


Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
LSTM Test Accuracy: 0.820900022983551
LSTM Precision: 0.7808043063174731
LSTM Recall: 0.8209
LSTM F1-Score: 0.7949926657977024


In [9]:
# -------------------- SVM per Batch with SGDClassifier -------------------- #

# Standarisasi fitur input (karena SVM sensitif terhadap skala fitur)
scaler = StandardScaler()
X_train_scaled = scaler.fit_transform(X_train_padded_resampled)
X_test_scaled = scaler.transform(X_test_padded)

# Inisialisasi SGDClassifier untuk linear SVM
svm_batch_model = SGDClassifier(loss='hinge', random_state=42)

# Konfigurasi batch
batch_size = 10000  # Ukuran batch diatur ke 10.000
n_batches = len(X_train_scaled) // batch_size + 1  # Menentukan jumlah batch

# Variabel untuk menyimpan total
total_accuracy = 0
total_precision = 0
total_recall = 0
total_f1 = 0

# Train SVM model per batch
for epoch in range(5):  # Tentukan jumlah epoch
    X_train_scaled, y_train_flat = shuffle(X_train_scaled, y_train_flat, random_state=42)  # Shuffle data di setiap epoch
    print(f"Epoch {epoch+1}/{5}")
    
    for i in range(0, len(X_train_scaled), batch_size):
        # Mendapatkan batch saat ini
        X_batch = X_train_scaled[i:i+batch_size]
        y_batch = y_train_flat[i:i+batch_size]
        
        # Partial fit menggunakan batch
        svm_batch_model.partial_fit(X_batch, y_batch, classes=np.unique(y_train_flat))
        
        # Menampilkan progress setiap batch
        progress = (i + len(X_batch)) / len(X_train_scaled) * 100
        print(f"Processed {progress:.2f}% of data")

# Predict on test data
y_pred_svm_batch = svm_batch_model.predict(X_test_scaled)

# SVM Accuracy
svm_batch_accuracy = accuracy_score(y_test_flat, y_pred_svm_batch)
print(f"SVM per Batch Test Accuracy: {svm_batch_accuracy}")

# SVM Precision, Recall, F1-Score
svm_batch_precision = precision_score(y_test_flat, y_pred_svm_batch, average='weighted')
svm_batch_recall = recall_score(y_test_flat, y_pred_svm_batch, average='weighted')
svm_batch_f1 = f1_score(y_test_flat, y_pred_svm_batch, average='weighted')

# Menambahkan hasil ke total
total_accuracy += svm_batch_accuracy
total_precision += svm_batch_precision
total_recall += svm_batch_recall
total_f1 += svm_batch_f1

print(f"SVM per Batch Precision: {svm_batch_precision}")
print(f"SVM per Batch Recall: {svm_batch_recall}")
print(f"SVM per Batch F1-Score: {svm_batch_f1}")

# Tampilkan hasil total SVM
print("\nTotal Hasil Evaluasi SVM:")
print(f"Total Accuracy: {total_accuracy}")
print(f"Total Precision: {total_precision}")
print(f"Total Recall: {total_recall}")
print(f"Total F1-Score: {total_f1}")


Epoch 1/5
Processed 4.05% of data
Processed 8.09% of data
Processed 12.14% of data
Processed 16.18% of data
Processed 20.23% of data
Processed 24.27% of data
Processed 28.32% of data
Processed 32.36% of data
Processed 36.41% of data
Processed 40.45% of data
Processed 44.50% of data
Processed 48.54% of data
Processed 52.59% of data
Processed 56.63% of data
Processed 60.68% of data
Processed 64.72% of data
Processed 68.77% of data
Processed 72.81% of data
Processed 76.86% of data
Processed 80.90% of data
Processed 84.95% of data
Processed 88.99% of data
Processed 93.04% of data
Processed 97.08% of data
Processed 100.00% of data
Epoch 2/5
Processed 4.05% of data
Processed 8.09% of data
Processed 12.14% of data
Processed 16.18% of data
Processed 20.23% of data
Processed 24.27% of data
Processed 28.32% of data
Processed 32.36% of data
Processed 36.41% of data
Processed 40.45% of data
Processed 44.50% of data
Processed 48.54% of data
Processed 52.59% of data
Processed 56.63% of data
Processe

In [8]:
# -------------------- Naive Bayes Model -------------------- #

# Train Naive Bayes model with resampled training data
nb_model = MultinomialNB()
nb_model.fit(X_train_padded_resampled, y_train_flat)

# Predict on test data
y_pred_nb = nb_model.predict(X_test_padded)

# Naive Bayes Accuracy
nb_accuracy = accuracy_score(y_test_flat, y_pred_nb)
print(f"Naive Bayes Test Accuracy: {nb_accuracy}")

# Naive Bayes Precision, Recall, F1-Score
nb_precision = precision_score(y_test_flat, y_pred_nb, average='weighted')
nb_recall = recall_score(y_test_flat, y_pred_nb, average='weighted')
nb_f1 = f1_score(y_test_flat, y_pred_nb, average='weighted')

print(f"Naive Bayes Precision: {nb_precision}")
print(f"Naive Bayes Recall: {nb_recall}")
print(f"Naive Bayes F1-Score: {nb_f1}")


Naive Bayes Test Accuracy: 0.22685
Naive Bayes Precision: 0.6207400784221465
Naive Bayes Recall: 0.22685
Naive Bayes F1-Score: 0.2863439658345516


In [10]:
# -------------------- KNN Model -------------------- #

# Train KNN model with resampled training data
knn_model = KNeighborsClassifier(n_neighbors=5)
knn_model.fit(X_train_padded_resampled, y_train_flat)

# Predict on test data
y_pred_knn = knn_model.predict(X_test_padded)

# KNN Accuracy
knn_accuracy = accuracy_score(y_test_flat, y_pred_knn)
print(f"KNN Test Accuracy: {knn_accuracy}")

# KNN Precision, Recall, F1-Score
knn_precision = precision_score(y_test_flat, y_pred_knn, average='weighted')
knn_recall = recall_score(y_test_flat, y_pred_knn, average='weighted')
knn_f1 = f1_score(y_test_flat, y_pred_knn, average='weighted')

print(f"KNN Precision: {knn_precision}")
print(f"KNN Recall: {knn_recall}")
print(f"KNN F1-Score: {knn_f1}")

KNN Test Accuracy: 0.1537
KNN Precision: 0.3906920264990941
KNN Recall: 0.1537
KNN F1-Score: 0.1675206580793207


In [None]:
# -------------------- Comparison of Results -------------------- #

print("\nComparison of Model Results:")
print(f"LSTM Test Accuracy: {lstm_test_accuracy}, Precision: {lstm_precision}, Recall: {lstm_recall}, F1-Score: {lstm_f1}")
print(f"SVM Test Accuracy: {svm_accuracy}, Precision: {svm_precision}, Recall: {svm_recall}, F1-Score: {svm_f1}")
print(f"Naive Bayes Test Accuracy: {nb_accuracy}, Precision: {nb_precision}, Recall: {nb_recall}, F1-Score: {nb_f1}")
print(f"KNN Test Accuracy: {knn_accuracy}, Precision: {knn_precision}, Recall: {knn_recall}, F1-Score: {knn_f1}")