In [1]:
import pandas as pd
import random
from sklearn.feature_extraction.text import CountVectorizer
from sklearn.naive_bayes import MultinomialNB
from sklearn.pipeline import Pipeline
from sklearn.model_selection import train_test_split
from sklearn.metrics import classification_report
from Sastrawi.StopWordRemover.StopWordRemoverFactory import StopWordRemoverFactory
from Sastrawi.Stemmer.StemmerFactory import StemmerFactory

# 1. Load dataset
df = pd.read_csv('ulasan_gojek.csv')  # ganti dengan nama file CSV Anda

# 2. Label sentimen berdasarkan rating
def get_sentiment(rating):
    if rating <= 2:
        return 'negative'
    elif rating == 3:
        return 'neutral'
    else:
        return 'positive'

df['sentiment'] = df['rating'].apply(get_sentiment)

# 3. Preprocessing dengan Sastrawi
stop_factory = StopWordRemoverFactory()
stopword = stop_factory.create_stop_word_remover()

stem_factory = StemmerFactory()
stemmer = stem_factory.create_stemmer()

def preprocess(text):
    text = stopword.remove(text.lower())
    text = stemmer.stem(text)
    return text

df['clean_ulasan'] = df['ulasan'].astype(str).apply(preprocess)

# 4. Split data
X_train, X_test, y_train, y_test = train_test_split(df['clean_ulasan'], df['sentiment'], test_size=0.2, random_state=42)

# 5. Pipeline vectorizer + classifier
model = Pipeline([
    ('vectorizer', CountVectorizer()),
    ('classifier', MultinomialNB())
])

# 6. Train
model.fit(X_train, y_train)

# 7. Prediksi pada 5 data random
random_data = df.sample(5, random_state=42).copy()
random_data['predicted_sentiment'] = model.predict(random_data['clean_ulasan'])

# 8. Tampilkan hasil
print(random_data[['ulasan', 'rating', 'sentiment', 'predicted_sentiment']])


                                                 ulasan  rating sentiment  \
1501                                         Banyak bug       2  negative   
2586                           tak e es es batu pahat s       3   neutral   
2653  Orderan sy dpt lawan arah koq bisaya  Sy buat ...       3   neutral   
1055                    Bug pada jaringan saat mengirim       2  negative   
705   Trauma pakek aplikasi ini ngasih pin salah say...       1  negative   

     predicted_sentiment  
1501            positive  
2586            positive  
2653            negative  
1055            negative  
705             negative  
