### Import Library

In [31]:
import pandas as pd
from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.model_selection import train_test_split
from sklearn.svm import SVC
from sklearn.metrics import accuracy_score
import nltk
from nltk.corpus import stopwords
from nltk.stem import PorterStemmer
import re
nltk.download('stopwords')

[nltk_data] Downloading package stopwords to C:\Users\Wafiul
[nltk_data]     Achdi\AppData\Roaming\nltk_data...
[nltk_data]   Package stopwords is already up-to-date!


True

### Data Loading


In [32]:

df = pd.read_csv('../data/dataset.csv')


print(df.head())

                                             content  score sentiment
0  Setelah update , kendala "petama: no yg sudah ...      2   negatif
1  Aku kasih bintang 2 ,semenjak wa update terus ...      2   negatif
2  Buat aplikasi whatsApp saya minta tolong untuk...      4   positif
3  Terima kasih Aplikasi ini sangat membantu untu...      4   positif
4  makin di update, makin parah sih kualitas apli...      1   negatif


In [33]:

stop_words = set(stopwords.words('indonesian'))
stemmer = PorterStemmer()

def clean_text(text):
   
    text = text.lower()
    
   
    text = re.sub(r'[^a-zA-Z\s]', '', text)
    
  
    text = ' '.join(word for word in text.split() if word not in stop_words)
    
  
    text = ' '.join(stemmer.stem(word) for word in text.split())
    
    return text


df = pd.read_csv('../data/dataset.csv')


print("Data sebelum pemrosesan:")
print(df.head())


df['cleaned_content'] = df['content'].apply(clean_text)


print("\nData setelah pemrosesan:")
print(df[['content', 'cleaned_content']].head())


Data sebelum pemrosesan:
                                             content  score sentiment
0  Setelah update , kendala "petama: no yg sudah ...      2   negatif
1  Aku kasih bintang 2 ,semenjak wa update terus ...      2   negatif
2  Buat aplikasi whatsApp saya minta tolong untuk...      4   positif
3  Terima kasih Aplikasi ini sangat membantu untu...      4   positif
4  makin di update, makin parah sih kualitas apli...      1   negatif

Data setelah pemrosesan:
                                             content  \
0  Setelah update , kendala "petama: no yg sudah ...   
1  Aku kasih bintang 2 ,semenjak wa update terus ...   
2  Buat aplikasi whatsApp saya minta tolong untuk...   
3  Terima kasih Aplikasi ini sangat membantu untu...   
4  makin di update, makin parah sih kualitas apli...   

                                     cleaned_content  
0  updat kendala petama no yg disimpan ilang angk...  
1  kasih bintang semenjak wa updat kebanyakan nge...  
2  aplikasi whatsapp tolong

In [34]:
vectorizer = TfidfVectorizer()
X = vectorizer.fit_transform(df['cleaned_content'])
y = df['sentiment']

 
from sklearn.model_selection import train_test_split

X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

In [35]:
model = SVC()
model.fit(X_train, y_train)


y_pred = model.predict(X_test)


y_train_pred = model.predict(X_train)
train_accuracy = accuracy_score(y_train, y_train_pred)
print(f'Akurasi pada data pelatihan: {train_accuracy * 100:.2f}%')


y_pred = model.predict(X_test)


test_accuracy = accuracy_score(y_test, y_pred)
print(f'Akurasi pada data pengujian: {test_accuracy * 100:.2f}%')

Akurasi pada data pelatihan: 100.00%
Akurasi pada data pengujian: 100.00%


In [36]:
def predict_sentiment(comment):
    cleaned_comment = clean_text(comment) 
    comment_vectorized = vectorizer.transform([cleaned_comment])
    return model.predict(comment_vectorized)[0]

In [37]:
new_comment = "Aplikasi ini sangat bagus!"
predicted_sentiment = predict_sentiment(new_comment)
print(f'Sentimen: {predicted_sentiment}')

Sentimen: positif


In [38]:
new_comment = "Aplikasi ini sangat jelek!"
predicted_sentiment = predict_sentiment(new_comment)
print(f'Sentimen: {predicted_sentiment}')

Sentimen: negatif
