**EKSPERIMEN 1**

Menggunakan algoritma naive bayes dan ekstraksi fitur TF-IDF

In [None]:
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.naive_bayes import MultinomialNB
from sklearn.metrics import classification_report, accuracy_score

df = pd.read_csv("/content/drive/MyDrive/250_Data_Pelabelan_Manual.csv")

X = df['review_cleaned']
y = df['label']
X_train, X_test, y_train, y_test = train_test_split(
    X, y, test_size=0.2, random_state=42
)

vectorizer = TfidfVectorizer()
X_train_tfidf = vectorizer.fit_transform(X_train)
X_test_tfidf = vectorizer.transform(X_test)

nb = MultinomialNB()
nb.fit(X_train_tfidf, y_train)

y_pred = nb.predict(X_test_tfidf)
acc = accuracy_score(y_test, y_pred)

print("Akurasi Model: ", acc)
print("\nClassification Report:")
print(classification_report(y_test, y_pred))

Akurasi Model:  0.84

Classification Report:
              precision    recall  f1-score   support

     negatif       0.95      0.95      0.95        20
      netral       0.50      0.33      0.40         6
     positif       0.81      0.88      0.84        24

    accuracy                           0.84        50
   macro avg       0.75      0.72      0.73        50
weighted avg       0.83      0.84      0.83        50



**EKSPERIMEN 2**

Menggunakan algoritma SVM dan ektraksi fitur TF-IDF


In [None]:
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.svm import LinearSVC
from sklearn.metrics import classification_report, accuracy_score

df = pd.read_csv("/content/drive/MyDrive/250_Data_Pelabelan_Manual.csv")

X = df['review_cleaned']
y = df['label']

X_train, X_test, y_train, y_test = train_test_split(
    X, y, test_size=0.2, random_state=42
)

vectorizer = TfidfVectorizer()
X_train_tfidf = vectorizer.fit_transform(X_train)
X_test_tfidf = vectorizer.transform(X_test)

svm_model = LinearSVC()
svm_model.fit(X_train_tfidf, y_train)

y_pred = svm_model.predict(X_test_tfidf)

acc = accuracy_score(y_test, y_pred)
print("Akurasi Model SVM:", acc)
print("\nClassification Report:")
print(classification_report(y_test, y_pred))

Akurasi Model SVM: 0.76

Classification Report:
              precision    recall  f1-score   support

     negatif       0.81      0.85      0.83        20
      netral       0.30      0.50      0.38         6
     positif       0.95      0.75      0.84        24

    accuracy                           0.76        50
   macro avg       0.69      0.70      0.68        50
weighted avg       0.81      0.76      0.78        50



**EKSPERIMEN 3**

Menggunakan Algoritma LSTM

In [None]:
import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import LabelEncoder
from tensorflow.keras.preprocessing.text import Tokenizer
from tensorflow.keras.preprocessing.sequence import pad_sequences
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Embedding, LSTM, Dense, Dropout
from tensorflow.keras.utils import to_categorical
from sklearn.metrics import classification_report

df = pd.read_csv("/content/drive/MyDrive/250_Data_Pelabelan_Manual.csv")

X = df['review_cleaned']
y = df['label']

label_encoder = LabelEncoder()
y_encoded = label_encoder.fit_transform(y)
y_encoded = to_categorical(y_encoded)

X_train, X_test, y_train, y_test = train_test_split(
    X, y_encoded, test_size=0.2, random_state=42
)

max_words = 5000
max_len = 100

tokenizer = Tokenizer(num_words=max_words, split=' ')
tokenizer.fit_on_texts(X_train)

X_train_seq = tokenizer.texts_to_sequences(X_train)
X_test_seq = tokenizer.texts_to_sequences(X_test)

X_train_pad = pad_sequences(X_train_seq, maxlen=max_len)
X_test_pad = pad_sequences(X_test_seq, maxlen=max_len)

model = Sequential()
model.add(Embedding(max_words, 128, input_length=max_len))
model.add(LSTM(128, dropout=0.2, recurrent_dropout=0.2))
model.add(Dense(64, activation='relu'))
model.add(Dropout(0.3))
model.add(Dense(y_encoded.shape[1], activation='softmax'))

model.compile(
    loss='categorical_crossentropy',
    optimizer='adam',
    metrics=['accuracy']
)

history = model.fit(
    X_train_pad,
    y_train,
    epochs=10,
    batch_size=8,
    validation_data=(X_test_pad, y_test)
)

loss, acc = model.evaluate(X_test_pad, y_test)
print(f"Akurasi Model LSTM: {acc:.2f}")

y_test_arg = np.argmax(y_test, axis=1)
y_pred = np.argmax(model.predict(X_test_pad), axis=1)

print(classification_report(y_test_arg, y_pred, target_names=label_encoder.classes_))


Epoch 1/10




[1m25/25[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m11s[0m 267ms/step - accuracy: 0.3441 - loss: 1.0929 - val_accuracy: 0.5200 - val_loss: 1.0051
Epoch 2/10
[1m25/25[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m4s[0m 174ms/step - accuracy: 0.4900 - loss: 1.0490 - val_accuracy: 0.4200 - val_loss: 0.9645
Epoch 3/10
[1m25/25[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m4s[0m 176ms/step - accuracy: 0.7176 - loss: 0.8986 - val_accuracy: 0.7400 - val_loss: 0.6267
Epoch 4/10
[1m25/25[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m6s[0m 255ms/step - accuracy: 0.8091 - loss: 0.5340 - val_accuracy: 0.8000 - val_loss: 0.5411
Epoch 5/10
[1m25/25[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m4s[0m 174ms/step - accuracy: 0.9149 - loss: 0.2136 - val_accuracy: 0.8200 - val_loss: 0.5340
Epoch 6/10
[1m25/25[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m4s[0m 179ms/step - accuracy: 0.9870 - loss: 0.0803 - val_accuracy: 0.7000 - val_loss: 0.7491
Epoch 7/10
[1m25/25[0m [32m━━━━━━━━



[1m1/2[0m [32m━━━━━━━━━━[0m[37m━━━━━━━━━━[0m [1m0s[0m 861ms/step



[1m2/2[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 417ms/step
              precision    recall  f1-score   support

     negatif       0.76      0.95      0.84        20
      netral       0.50      0.50      0.50         6
     positif       0.89      0.71      0.79        24

    accuracy                           0.78        50
   macro avg       0.72      0.72      0.71        50
weighted avg       0.79      0.78      0.78        50



**EKSPERIMEN 4**

Menggunakan algoritma naive bayes dan ekstraksi fitur word2vec

In [None]:
!pip install gensim



In [None]:
import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split
from sklearn.naive_bayes import GaussianNB
from sklearn.metrics import classification_report, accuracy_score
from gensim.models import Word2Vec
from tqdm import tqdm

df = pd.read_csv("/content/drive/MyDrive/250_Data_Pelabelan_Manual.csv")

X = df['review_cleaned']
y = df['label']

X_train, X_test, y_train, y_test = train_test_split(
    X, y, test_size=0.2, random_state=42
)

train_sentences = [row.split() for row in X_train]
test_sentences  = [row.split() for row in X_test]

w2v_model = Word2Vec(
    sentences=train_sentences,
    vector_size=100,
    window=5,
    min_count=1,
    workers=4
)

def get_sentence_vector(words, model, vector_size=100):
    word_vecs = np.zeros((vector_size,))
    count = 0
    for word in words:
        if word in model.wv:
            word_vecs += model.wv[word]
            count += 1
    if count > 0:
        word_vecs /= count
    return word_vecs

X_train_vec = np.array([get_sentence_vector(words, w2v_model) for words in train_sentences])
X_test_vec  = np.array([get_sentence_vector(words, w2v_model) for words in test_sentences])

nb = GaussianNB()
nb.fit(X_train_vec, y_train)

y_pred = nb.predict(X_test_vec)

print("Akurasi:", accuracy_score(y_test, y_pred))
print("\nClassification Report:")
print(classification_report(y_test, y_pred))


Akurasi: 0.74

Classification Report:
              precision    recall  f1-score   support

     negatif       0.88      0.70      0.78        20
      netral       0.38      0.50      0.43         6
     positif       0.77      0.83      0.80        24

    accuracy                           0.74        50
   macro avg       0.67      0.68      0.67        50
weighted avg       0.76      0.74      0.75        50

