# 1. Sentiment

## a. Analysis

In [2]:
import pandas as pd
from transformers import pipeline

  from .autonotebook import tqdm as notebook_tqdm


In [3]:
dataset = pd.read_csv("google_reviews_no_index.csv")
reviews = list(dataset["Reviews"])
reviews

['enak banget tempatnya buat adain acara komunitas gitu, bersih, accessible dan luas juga tempatnya. Makasi cakap!',
 'kemarin abis ke cakap, tempatnya cozy dan nyaman banget dipake buat belajar buat meeting buat apapun',
 'Ikut kartu prakerja agak nyesel ngambil kelas di sini, waktu pengumpulan tugasnya ga pasti yang mana. Terbuang sia2 waktu yang saya gunakan☹. Waktu hitung mundur masih ada untuk mengumpul tugas, tetapi sudah tidak bisa. Tidak ada rasa empati kepada siswa dalam membantu menyelesaikan kelas. Pemberian informasi deadline tugas tidak lengkap. Semoga ke depannya waktu hitung mundur dan adminnya sama dalam memberikan waktu deadline tugas akhir.',
 'Tempatnya nyaman bikin betah dan cocok untuk belajar',
 'tempatnya bener bener nyamann bikin pengunjung betah belajar disanaa',
 'Tempat terbaik untuk mengasah kemampuan. Bekerja sambil belajar.',
 '26 kredit hangus tanpa pemberitahuan sebelumnya',
 'Belajarnya nyaman krn jadwal bisa fleksibel, tutor dan materinya mudah dimenge

In [4]:
def analyze_sentiment(text_to_analyze):
    # Specify the model and revision explicitly
    model_name = "w11wo/indonesian-roberta-base-sentiment-classifier"

    # Load the sentiment analysis pipeline with explicit model
    sentiment_analyzer = pipeline(
        "sentiment-analysis",
        model=model_name,
        tokenizer=model_name
    )

    # Perform sentiment analysis
    sentiment_result = sentiment_analyzer(text_to_analyze)

    return sentiment_result

In [5]:
sentiments = analyze_sentiment(reviews)
sentiments

[{'label': 'positive', 'score': 0.9988749623298645},
 {'label': 'positive', 'score': 0.9972100853919983},
 {'label': 'negative', 'score': 0.9989174604415894},
 {'label': 'positive', 'score': 0.9993093013763428},
 {'label': 'positive', 'score': 0.9993950128555298},
 {'label': 'positive', 'score': 0.9855474233627319},
 {'label': 'neutral', 'score': 0.9902029037475586},
 {'label': 'positive', 'score': 0.997343122959137},
 {'label': 'negative', 'score': 0.9993496537208557},
 {'label': 'positive', 'score': 0.9727933406829834},
 {'label': 'positive', 'score': 0.8009659051895142},
 {'label': 'positive', 'score': 0.9938482642173767},
 {'label': 'positive', 'score': 0.9906094074249268},
 {'label': 'positive', 'score': 0.9983508586883545},
 {'label': 'positive', 'score': 0.997636079788208},
 {'label': 'positive', 'score': 0.996038556098938},
 {'label': 'negative', 'score': 0.9980946183204651},
 {'label': 'positive', 'score': 0.9993799924850464},
 {'label': 'positive', 'score': 0.9030721187591553

In [6]:
def get_df(sentences, sentiments):
    sents = {
        'review': sentences,
        'label': [senti['label'].upper() for senti in sentiments],
        'score': [senti['score'] for senti in sentiments]
    }

    df = pd.DataFrame(sents)
    return df

In [7]:
senti_df = get_df(reviews, sentiments)
senti_df.head()

Unnamed: 0,review,label,score
0,enak banget tempatnya buat adain acara komunit...,POSITIVE,0.998875
1,"kemarin abis ke cakap, tempatnya cozy dan nyam...",POSITIVE,0.99721
2,Ikut kartu prakerja agak nyesel ngambil kelas ...,NEGATIVE,0.998917
3,Tempatnya nyaman bikin betah dan cocok untuk b...,POSITIVE,0.999309
4,tempatnya bener bener nyamann bikin pengunjung...,POSITIVE,0.999395


In [9]:
senti_df.to_csv("google_reviews_sentiment.csv", index=False)

## b. Visualization

In [12]:
import altair as alt

### 1) sentiments distribution

In [16]:
def pie_chart(poss, negs, neuts):
    total = poss + negs
    poss_perc = str(poss/total*100)+"%"
    negs_perc = str(negs/total*100)+"%"
    neuts_perc = str(neuts/total*100)+"%"

    source = pd.DataFrame({
        "Label": ["Positive", "Negative", "Neutral"],
        "Count": [poss, negs, neuts],
        "Percentage": [poss_perc, negs_perc, neuts_perc]
    })

    pie = alt.Chart(source).mark_arc().encode(
        theta="Count",
        color=alt.Color(
            "Label",
            scale=alt.Scale(
                domain=["Positive", "Negative", "Neutral"],
                range=["#19c2fa", "#fc3f3f", "#808080"]
            )),
        tooltip=["Label", "Count", "Percentage"]
    )

    return pie

In [17]:
label_count = senti_df['label'].value_counts()
poss = label_count.get('POSITIVE')
negs = label_count.get('NEGATIVE')
neuts = label_count.get("NEUTRAL")

pie = pie_chart(poss, negs, neuts)
pie.interactive()

### 2) rating prediction

In [20]:
# Removing neutral sentiments because they aren't affecting
# the positivity or negativity
pos_neg_df = senti_df[senti_df["label"] != "NEUTRAL"]
pos_neg_df

Unnamed: 0,review,label,score
0,enak banget tempatnya buat adain acara komunit...,POSITIVE,0.998875
1,"kemarin abis ke cakap, tempatnya cozy dan nyam...",POSITIVE,0.997210
2,Ikut kartu prakerja agak nyesel ngambil kelas ...,NEGATIVE,0.998917
3,Tempatnya nyaman bikin betah dan cocok untuk b...,POSITIVE,0.999309
4,tempatnya bener bener nyamann bikin pengunjung...,POSITIVE,0.999395
...,...,...,...
314,insightful!!,NEGATIVE,0.910059
315,Bagus,POSITIVE,0.930638
316,good,POSITIVE,0.999060
317,sangat membantu,POSITIVE,0.995917


In [21]:
pos_df = pd.DataFrame({
    "review": pos_neg_df["review"],
    "positivity": senti_df.apply(lambda row: 1 - row['score'] if row['label'] == 'NEGATIVE' else row['score'], axis=1)
})

pos_df.head()

Unnamed: 0,review,positivity
0,enak banget tempatnya buat adain acara komunit...,0.998875
1,"kemarin abis ke cakap, tempatnya cozy dan nyam...",0.99721
2,Ikut kartu prakerja agak nyesel ngambil kelas ...,0.001083
3,Tempatnya nyaman bikin betah dan cocok untuk b...,0.999309
4,tempatnya bener bener nyamann bikin pengunjung...,0.999395


In [22]:
positivity = pos_df["positivity"]

normalized_scores = [(score * 4) + 1 for score in (positivity)]
# average_score = sum(normalized_scores) / len(normalized_scores)

source = pd.DataFrame({
    "score": normalized_scores
})
alt.Chart(source).mark_bar().encode(
    alt.X("score:Q", title="Ratings", bin=True),
    y='count()',
)

# 2. Topic Modelling

In [23]:
from gensim import corpora
from gensim.models import LdaModel
from gensim.parsing.preprocessing import preprocess_string
import pyLDAvis.gensim_models as gensimvis
import pyLDAvis

In [24]:
documents = reviews

In [25]:
# Preprocess text
processed_docs = [preprocess_string(doc) for doc in documents]

# Create dictionary
dictionary = corpora.Dictionary(processed_docs)

# Create bag-of-words corpus
corpus = [dictionary.doc2bow(doc) for doc in processed_docs]

In [26]:
# Train LDA model
lda_model = LdaModel(corpus=corpus, id2word=dictionary, num_topics=3, passes=10)

In [27]:
# Visualize topics
vis_data = gensimvis.prepare(lda_model, corpus, dictionary)
pyLDAvis.display(vis_data)

# 3. Word Cloud