In [1]:
import re
from Sastrawi.Stemmer.StemmerFactory import StemmerFactory
from nltk.corpus import stopwords
from nltk.tokenize import word_tokenize

factory = StemmerFactory()
stemmer = factory.create_stemmer()
stop_words = set(stopwords.words('indonesian'))

mention_whitelist = ['prabowo']

normalisasi_kata = {'gak': 'tidak','ga': 'tidak','yg': 'yang','utk': 'untuk',
    'dgn': 'dengan','apik': 'baik','sdh': 'sudah','krn': 'karena',
    'apik': 'bagus','tdk': 'tidak','klo': 'kalo','sbg': 'sebagai',
    'gue': 'aku','dlm': 'dalam','jgn': 'jangan','jkw': 'jokowi','org': 'orang',
    'nggak': 'tidak','aja': '','amp': '','nya': '','ya': '','gitu': '','loh': '',
    'dong': '','sih': '','deh': '','nih': '','kok': '',
}

def preprocessing(text):
    text = re.sub(r'@(\w+)', lambda m: m.group() if m.group(1).lower() in mention_whitelist else '', text)

    text = re.sub(r'http\S+|www\S+|<.*?>|#', ' ', text)
    
    text = re.sub(r'<.*?>', '', text)

    # non-huruf (angka, simbol)
    text = re.sub(r'[^a-zA-Z\s]', ' ', text)
    
    text = text.lower()

    # Tokenisasi
    tokens = word_tokenize(text)

    # Normalisasi kata informal
    tokens = [normalisasi_kata.get(word, word) for word in tokens]

    tokens = [word for word in tokens if word not in stop_words and len(word) > 2 and word != '']

    tokens = [stemmer.stem(word) for word in tokens]
    
    return ' '.join(tokens)


In [2]:
import pickle
with open("sentiment_app/tokenizer.pkl", "rb") as f:
    tokenizer = pickle.load(f)

In [5]:
import numpy as np
from tensorflow.keras.models import load_model
from tensorflow.keras.preprocessing.sequence import pad_sequences

label_map = {0: "Netral", 1: "Positif", 2: "Negatif"}
best_model = load_model("models/best_model_LSTM.h5")

MAX_LEN = 36

def predict_text(text):
    clean_text = preprocessing(text)
    sequence = tokenizer.texts_to_sequences([clean_text])
    padded = pad_sequences(sequence, maxlen=MAX_LEN, padding='post', truncating='post')
    prediction = best_model.predict(padded)
    predicted_class = np.argmax(prediction)
    
    print(f"Teks: {text}")
    print(f"Prediksi Kelas: {predicted_class} → {label_map[predicted_class]}")

text = 'prabowo kinerjanya bagus, ia akan menjadi presiden terbaik' 
predict_text(text)

Teks: prabowo kinerjanya bagus, ia akan menjadi presiden terbaik
Prediksi Kelas: 1 → Positif
