In [85]:
import pandas as pd
import numpy as np
from sklearn.metrics import f1_score
import keras

from preprocessing import LSTMPreprocess
from model.embedding import EmbeddingScratch
from model.bidirectional import BidirectionalScratch
from model.dense import DenseScratch
from model.sequential import ModelScratch

In [86]:
# Load Dataset
test_set = pd.read_csv("data/test.csv")

In [87]:
# Preprocess dataset (SPLIT)
label_map = {"positive": 0, "neutral": 1, "negative": 2}
label_reverse_map = {v: k for k, v in label_map.items()}
num_classes = len(label_map)

print("Memproses dataset...")
test_texts = test_set["text"]
test_labels = np.array(test_set["label"].map(lambda x: label_map[x]))

# Preprocess dataset (Tokenization)
preprocess_cls = LSTMPreprocess()

test_token = preprocess_cls.tokenization(test_texts)

Memproses dataset...


In [88]:
# Load Trained Model

"""
# 0. Embedding Layer
# 1. Bidirectional LSTM Layer
# 2. Dropout Layer
# 3. Dense Layer (Hidden)
# 4. Dropout Layer
# 5. Dense Layer (Output)
"""

try:
    model = keras.models.load_model('lstm_keras.h5')
except OSError:
    print("Model tidak ditemukan, silakan latih model terlebih dahulu dengan menjalankan 'lstm.ipynb'.")
    exit(1)


embedding_weights = model.layers[0].get_weights()
bidirectional_weights = model.layers[1].get_weights()
dense1_weights = model.layers[3].get_weights()
dense2_weights = model.layers[5].get_weights()



In [89]:


# Modeling
# Dropout layers are not included because they have no use in foward pass

# Make sure lstm units match the model's architecture
lstm_units = 64 

embedding_scratch = EmbeddingScratch(embedding_weights)
bidirectional_scratch = BidirectionalScratch(lstm_units, bidirectional_weights)
dense1_scratch = DenseScratch(dense1_weights, activation_name='relu')
dense2_scratch = DenseScratch(dense2_weights, activation_name='softmax')

scratch_model = ModelScratch([
    embedding_scratch,
    bidirectional_scratch,
    dense1_scratch,
    dense2_scratch
])

In [90]:

# Predict Scratch
predictions_scratch_probs = scratch_model.predict(test_token)
predicted_labels_scratch = np.argmax(predictions_scratch_probs, axis=1)

# Predict Keras
predictions_keras_probs = model.predict(test_token)
predicted_labels_keras = np.argmax(predictions_keras_probs, axis=1)

# Hitung dan Bandingkan F1 Score
f1_keras = f1_score(test_labels, predicted_labels_keras, average='macro')
f1_scratch = f1_score(test_labels, predicted_labels_scratch, average='macro')

print()
print("--- Perbandingan Hasil ---")
print(f"F1 Score (Macro) Keras : {f1_keras:.4f}")
print(f"F1 Score (Macro) Scratch: {f1_scratch:.4f}")
print()

# Cek beberapa prediksi
print()
print("Contoh Prediksi (Scratch vs Keras vs Asli):")
for i in range(5):
    print(f"Data ke-{i+1}: Scratch={label_reverse_map[predicted_labels_scratch[i]]}, Keras={label_reverse_map[predicted_labels_keras[i]]}, True={label_reverse_map[test_labels[i]]}")
print()

[1m13/13[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 79ms/step

--- Perbandingan Hasil ---
F1 Score (Macro) Keras : 0.3744
F1 Score (Macro) Scratch: 0.3044


Contoh Prediksi (Scratch vs Keras vs Asli):
Data ke-1: Scratch=negative, Keras=negative, True=positive
Data ke-2: Scratch=negative, Keras=negative, True=neutral
Data ke-3: Scratch=negative, Keras=negative, True=negative
Data ke-4: Scratch=negative, Keras=negative, True=positive
Data ke-5: Scratch=negative, Keras=negative, True=neutral

