In [1]:
!pip install pandas vadersentiment nltk sastrawi

Defaulting to user installation because normal site-packages is not writeable


In [2]:
import pandas as pd
from vaderSentiment.vaderSentiment import SentimentIntensityAnalyzer
import nltk
from Sastrawi.Stemmer.StemmerFactory import StemmerFactory

In [3]:
#1 using xlsx file, the text on Column A 'Text'
df = pd.read_excel('practice3.xlsx')

#2 Sentiment analyzer
analyzer = SentimentIntensityAnalyzer()

#3 Stemmer from PySastrawi for Bahasa Indonesia languange
factory = StemmerFactory()
stemmer = factory.create_stemmer()

#4 Remove symbols/non-alphabetic, tokenize, stemming
def preprocess_text(text):
    # Remove non-alphabetic characters
    text = ''.join(c for c in text if c.isalpha() or c.isspace())
    # Lowercase text
    text = text.lower()
    # Tokenize text
    tokens = nltk.word_tokenize(text)
    # Stem text
    stemmed_tokens = [stemmer.stem(token) for token in tokens]
    # Join stemmed tokens back into a single string
    preprocessed_text = ' '.join(stemmed_tokens)
    return preprocessed_text

#5 Function to get sentiment scores
def get_sentiment_scores(row):
    if isinstance(row['Text'], str) and len(row['Text'].split()) > 1:
        preprocessed_text = preprocess_text(row['Text'])
        scores = analyzer.polarity_scores(preprocessed_text)
        row['Negative'] = scores['neg']
        row['Neutral'] = scores['neu']
        row['Positive'] = scores['pos']
        row['Compound'] = scores['compound']
    return row

#6 Sentiment analysis for each sentence on each row
df = df.apply(get_sentiment_scores, axis=1)

#7 Sentiment Classifying (NEEDS TO BE ADJUSTED)
df['Sentiment'] = ''
df.loc[df['Compound'] > 0.05, 'Sentiment'] = 'Positive'
df.loc[df['Compound'] < -0.05, 'Sentiment'] = 'Negative'
df.loc[(df['Compound'] >= -0.05) & (df['Compound'] <= 0.05), 'Sentiment'] = 'Neutral'

#8 Overall Sentiments in Percentage
sentiment_counts = df['Sentiment'].value_counts()
positive_pct = (sentiment_counts['Positive'] / len(df)) * 100
neutral_pct = (sentiment_counts['Neutral'] / len(df)) * 100
negative_pct = (sentiment_counts['Negative'] / len(df)) * 100

print(f"Overall Sentiment: \nPositive: {positive_pct:.2f}% \nNeutral: {neutral_pct:.2f}% \nNegative: {negative_pct:.2f}%\n")

#10 10 examples for eachs sentiments
print("Example Sentences:\n")
for sentiment in df['Sentiment'].unique():
    print(sentiment.upper() + ':')
    count = 0
    for sentence in df.loc[(df['Sentiment'] == sentiment) & (df['Text'].apply(lambda x: isinstance(x, str) and len(x.split()) > 1)), 'Text'].values:
        if count < 10:
            count += 1
            print(f"{count}. {sentence}")
    print('\n')


Overall Sentiment: 
Positive: 12.60% 
Neutral: 83.38% 
Negative: 1.22%

Example Sentences:

NEUTRAL:
1. Sesuai dengan hal diatas perlu ditunjang pengiriman yang aman, handal dan tepat waktu
2. TOLONG DIPERBANYAK VIA C.O.D NYA
3. Saya pejuang otomotif jadi saya ingin produk setiap motor harus ada
4. Belum ada saran
5. tdk ada
6. Perbanyak produk2 import branded
7. Pokoknya harga terjangkau
8. Buat lebih efisen lg
9. Bagus sekali
10. Bagi konsumen yg baik..kwalitas dan jaminan akan menjadi Penilaian yg terbaik


NEGATIVE:
1. Kalo bisa jangan terus2an pembaharuan soalnya kadang system jadi ada error harus instal ulang terus padahal sudah diperbaharui
2. Lebih baik kalau ada sorenya sendiri
3. Kasih penalty ke Toko kalau gagal kirim barang
4. Untuk melakukan pertanyan perlu juga no hp penjual. Dari segi pelangan saya melalui tokopedia setelah saya coba baru.saya selesaikan
5. Selalu update ketersediaan stok. Penjual harus memberikan alasan atas pembatalan otomatis karena batas waktu respon