## Inference

In [None]:
df = pd.read_csv("/content/gojek_reviews_cleaned.csv")
df.head()

Unnamed: 0,cleaned_content,sentiment
0,tingkat inovasi promo yg tarik guna setia goje...,2
1,bantu,2
2,numeroo unoo,2
3,bantu,2
4,mantap,2


In [None]:
df.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 10000 entries, 0 to 9999
Data columns (total 2 columns):
 #   Column           Non-Null Count  Dtype 
---  ------           --------------  ----- 
 0   cleaned_content  9731 non-null   object
 1   sentiment        10000 non-null  int64 
dtypes: int64(1), object(1)
memory usage: 156.4+ KB


In [None]:
df.fillna({"cleaned_content": ""}, inplace=True)

In [None]:
import pickle
import joblib
import numpy as np
from tensorflow.keras.preprocessing.sequence import pad_sequences
from tensorflow.keras.models import load_model
from gensim.models import Word2Vec
from sklearn.feature_extraction.text import TfidfVectorizer
from tensorflow.keras.preprocessing.text import Tokenizer

# Muat kembali model & vectorizer
with open("tfidf_vectorizer.pkl", "rb") as file:
    tfidf_vectorizer = pickle.load(file)

svm_model = joblib.load("tfidf_svm_model.pkl")
rf_model = joblib.load("tfidf_rf_model.pkl")
word2vec_model = Word2Vec.load("word2vec_model.bin")
lstm_model = load_model("word2vec_lstm_model.h5", compile=False)


In [None]:
# Fungsi untuk konversi teks ke format vektor
def preprocess_text_tfidf(text):
    return tfidf_vectorizer.transform([text])

def preprocess_text_word2vec(text, model, vector_size=100):
    words = text.split()
    word_vectors = [model.wv[word] for word in words if word in model.wv]
    return np.mean(word_vectors, axis=0) if word_vectors else np.zeros(vector_size)

def preprocess_text_lstm(text, tokenizer, max_len=50):
    seq = tokenizer.texts_to_sequences([text])
    return pad_sequences(seq, maxlen=max_len)

# Tokenizer untuk LSTM
tokenizer = Tokenizer()
tokenizer.fit_on_texts(df['cleaned_content'])

In [None]:
# Fungsi untuk melakukan prediksi
def predict_sentiment(text):
    # TF-IDF + SVM
    tfidf_vector = preprocess_text_tfidf(text)
    pred_svm = svm_model.predict(tfidf_vector)[0]

    # TF-IDF + Random Forest
    pred_rf = rf_model.predict(tfidf_vector)[0]

    # Word2Vec + LSTM
    word2vec_vector = np.array([preprocess_text_word2vec(text, word2vec_model)])
    lstm_input = preprocess_text_lstm(text, tokenizer)
    pred_lstm = np.argmax(lstm_model.predict(lstm_input), axis=1)[0]

    return {
        "TF-IDF + SVM": pred_svm,
        "TF-IDF + Random Forest": pred_rf,
        "Word2Vec + LSTM": pred_lstm
    }

In [None]:
# Prediksi
while True:
    text_sample = input("Masukkan teks review (atau ketik 'exit' untuk keluar): ")
    if text_sample.lower() == "exit":
        break
    predictions = predict_sentiment(text_sample)
    print("\n Hasil Prediksi:")
    for model, pred in predictions.items():
        print(f"{model}: Sentimen {pred}")
    print("\n" + "-"*50 + "\n")

Masukkan teks review (atau ketik 'exit' untuk keluar): aplikasinya bagus banget
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 371ms/step

 Hasil Prediksi:
TF-IDF + SVM: Sentimen 2
TF-IDF + Random Forest: Sentimen 2
Word2Vec + LSTM: Sentimen 2

--------------------------------------------------

Masukkan teks review (atau ketik 'exit' untuk keluar): aplikasinya jelek banget
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 46ms/step

 Hasil Prediksi:
TF-IDF + SVM: Sentimen 0
TF-IDF + Random Forest: Sentimen 0
Word2Vec + LSTM: Sentimen 0

--------------------------------------------------

Masukkan teks review (atau ketik 'exit' untuk keluar): tambahkan dong fitur nya
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 42ms/step

 Hasil Prediksi:
TF-IDF + SVM: Sentimen 0
TF-IDF + Random Forest: Sentimen 2
Word2Vec + LSTM: Sentimen 2

--------------------------------------------------

Masukkan teks review (atau ketik 'exit' untuk keluar): exit
