In [1]:
import pandas as pd
import numpy as np

#### Data

In [2]:
data = pd.read_csv('../data/data.csv')
data.head()

Unnamed: 0,kalimat,translated
0,Saya telah mengikuti perkembangan beberapa har...,I have been following developments in recent d...
1,Juga ada peristiwa di mana seorang petugas men...,There was also an incident where an officer hi...
2,Atas nama pribadi dan Pemerintah Republik Indo...,On behalf of myself and the Government of the ...
3,Saya sangat prihatin dan sangat sedih terjadi ...,I am very concerned and very sad that this inc...
4,Pemerintah akan menjamin kehidupan keluarganya...,The government will guarantee the family's liv...


In [3]:
data.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 23 entries, 0 to 22
Data columns (total 2 columns):
 #   Column      Non-Null Count  Dtype 
---  ------      --------------  ----- 
 0   kalimat     23 non-null     object
 1   translated  23 non-null     object
dtypes: object(2)
memory usage: 496.0+ bytes


In [4]:
data['kalimat'].iloc[0]

'Saya telah mengikuti perkembangan beberapa hari ini, terutama peristiwa tadi malam, di mana terjadi demonstrasi yang mengarah kepada tindakan anarkis.'

In [5]:
data['translated'].iloc[0]

"I have been following developments in recent days, especially last night's incident, where a demonstration led to anarchic actions."

#### Emotion (SenticNet English)

In [6]:
from senticnet.senticnet import SenticNet

In [7]:
sn = SenticNet()

In [8]:
# Function to analyze emotion and polarity
def analyze_emotion(text):
    words = text.lower().split()
    emotions = []
    polarity = []
    
    for word in words:
        try:
            # Get emotion from SenticNet
            polarity_value = float(sn.polarity_value(word))
            moodtags = sn.moodtags(word)
            emotions.extend(moodtags)
            polarity.append(polarity_value)
        except:
            continue
    
    if polarity:
        avg_polarity = sum(polarity) / len(polarity)
    else:
        avg_polarity = 0
    
    return {
        "emotions": list(set(emotions)),   # Unique emotions
        "avg_polarity": avg_polarity
    }

In [9]:
data["analysis"] = data["translated"].apply(analyze_emotion)

In [10]:
data['kalimat'].iloc[0]

'Saya telah mengikuti perkembangan beberapa hari ini, terutama peristiwa tadi malam, di mana terjadi demonstrasi yang mengarah kepada tindakan anarkis.'

In [11]:
data['analysis'].iloc[0]

{'emotions': ['#eagerness', '#sadness', '#pleasantness', '#joy'],
 'avg_polarity': 0.31}

In [12]:
# Extract emotions into a separate column
data["emotions"] = data["analysis"].apply(lambda x: x["emotions"])

# Drop less common emotions
data["emotions"] = data["emotions"].apply(lambda x: x[:2])

In [13]:
data['emotions'].iloc[0]

['#eagerness', '#sadness']

In [14]:
# Extract average polarity into a separate column
data["avg_polarity"] = data["analysis"].apply(lambda x: x["avg_polarity"])

In [15]:
data['avg_polarity'].iloc[0]

np.float64(0.31)

In [16]:
data.head()

Unnamed: 0,kalimat,translated,analysis,emotions,avg_polarity
0,Saya telah mengikuti perkembangan beberapa har...,I have been following developments in recent d...,"{'emotions': ['#eagerness', '#sadness', '#plea...","[#eagerness, #sadness]",0.31
1,Juga ada peristiwa di mana seorang petugas men...,There was also an incident where an officer hi...,"{'emotions': ['#sadness', '#joy', '#calmness',...","[#sadness, #joy]",0.416
2,Atas nama pribadi dan Pemerintah Republik Indo...,On behalf of myself and the Government of the ...,"{'emotions': ['#eagerness', '#pleasantness', '...","[#eagerness, #pleasantness]",0.85
3,Saya sangat prihatin dan sangat sedih terjadi ...,I am very concerned and very sad that this inc...,"{'emotions': ['#sadness', '#pleasantness'], 'a...","[#sadness, #pleasantness]",-0.69
4,Pemerintah akan menjamin kehidupan keluarganya...,The government will guarantee the family's liv...,"{'emotions': ['#calmness', '#eagerness', '#ple...","[#calmness, #eagerness]",0.7185


#### Indo SenticNet

In [17]:
import string

In [18]:
data = pd.read_csv('../data/data.csv')
data.head()

Unnamed: 0,kalimat,translated
0,Saya telah mengikuti perkembangan beberapa har...,I have been following developments in recent d...
1,Juga ada peristiwa di mana seorang petugas men...,There was also an incident where an officer hi...
2,Atas nama pribadi dan Pemerintah Republik Indo...,On behalf of myself and the Government of the ...
3,Saya sangat prihatin dan sangat sedih terjadi ...,I am very concerned and very sad that this inc...
4,Pemerintah akan menjamin kehidupan keluarganya...,The government will guarantee the family's liv...


In [19]:
def preprocess(text):
    # lowercase
    text = text.lower()
    # delete punctuation
    text = text.translate(str.maketrans("", "", string.punctuation))
    # tokenize
    tokens = text.split()
    return tokens

In [20]:
# load xlsx
df = pd.read_excel("../data/senticnet.xlsx")
df.head()

Unnamed: 0,CONCEPT,INTROSPECTION,TEMPER,ATTITUDE,SENSITIVITY,PRIMARY EMOTION,SECONDAY EMOTION,POLARITY VALUE,POLARITY INTENSITY,SEMANTICS,Unnamed: 10,Unnamed: 11,Unnamed: 12,Unnamed: 13
0,aah,-0.56,0.0,0.0,0.0,#sadness,,negative,-0.56,keawaman,barok,kecemasan_sebelumnya,kelas_komersial,tertutup
1,abadi,0.0,0.0,0.964,0.0,#delight,,positive,0.964,istabraq,meluncur_ke_bawah,dari_yang_diperlukan,serpina,kurricule
2,abdominoplasti,0.0,0.0,0.29,0.0,#acceptance,,positive,0.29,bersuku_kata_banyak,ketidakcocokan,jip,gaya_bebas,norelco
3,aberrate,0.0,0.0,-0.554,0.0,#disgust,,negative,-0.554,rompakan_permata,tripoint,formasi,walsh,perang_salib
4,abey,0.0,0.0,0.0,-0.329,#anxiety,,negative,-0.329,mengurangi,membasmi,loop_synth,pankreas,shabarisha


In [21]:
# Dictionary
polarity_dict = dict(zip(df["CONCEPT"], df["POLARITY INTENSITY"]))
emotion_dict  = dict(zip(df["CONCEPT"], df["PRIMARY EMOTION"]))

In [22]:
# Function to map tokens to sentiment and emotions
def map_sentiment(tokens):
    polarities = []
    emotions = []
    for t in tokens:
        if t in polarity_dict:
            polarities.append(polarity_dict[t])
        if t in emotion_dict:
            emotions.append(emotion_dict[t])
    # calculate average polarity
    avg_polarity = sum(polarities)/len(polarities) if polarities else 0
    
    # set sentiment based on avg_polarity
    if avg_polarity > 0.05:
        sentiment = "positive"
    elif avg_polarity < -0.05:
        sentiment = "negative"
    else:
        sentiment = "neutral"
    
    return avg_polarity, emotions, sentiment

In [23]:
data["tokens"] = data["kalimat"].apply(preprocess)
data.head()

Unnamed: 0,kalimat,translated,tokens
0,Saya telah mengikuti perkembangan beberapa har...,I have been following developments in recent d...,"[saya, telah, mengikuti, perkembangan, beberap..."
1,Juga ada peristiwa di mana seorang petugas men...,There was also an incident where an officer hi...,"[juga, ada, peristiwa, di, mana, seorang, petu..."
2,Atas nama pribadi dan Pemerintah Republik Indo...,On behalf of myself and the Government of the ...,"[atas, nama, pribadi, dan, pemerintah, republi..."
3,Saya sangat prihatin dan sangat sedih terjadi ...,I am very concerned and very sad that this inc...,"[saya, sangat, prihatin, dan, sangat, sedih, t..."
4,Pemerintah akan menjamin kehidupan keluarganya...,The government will guarantee the family's liv...,"[pemerintah, akan, menjamin, kehidupan, keluar..."


In [24]:
data['kalimat'].iloc[0]

'Saya telah mengikuti perkembangan beberapa hari ini, terutama peristiwa tadi malam, di mana terjadi demonstrasi yang mengarah kepada tindakan anarkis.'

In [25]:
data['tokens'].iloc[0]

['saya',
 'telah',
 'mengikuti',
 'perkembangan',
 'beberapa',
 'hari',
 'ini',
 'terutama',
 'peristiwa',
 'tadi',
 'malam',
 'di',
 'mana',
 'terjadi',
 'demonstrasi',
 'yang',
 'mengarah',
 'kepada',
 'tindakan',
 'anarkis']

In [26]:
# Apply the mapping function to get polarity, emotions and sentiment
data[["avg_polarity", "emotions", "sentiment"]] = data["tokens"].apply(lambda x: pd.Series(map_sentiment(x)))

# Drop less common emotions
data["emotions"] = data["emotions"].apply(lambda x: x[:2])

In [27]:
data.head()

Unnamed: 0,kalimat,translated,tokens,avg_polarity,emotions,sentiment
0,Saya telah mengikuti perkembangan beberapa har...,I have been following developments in recent d...,"[saya, telah, mengikuti, perkembangan, beberap...",0.058857,"[#dislike, #ecstasy]",positive
1,Juga ada peristiwa di mana seorang petugas men...,There was also an incident where an officer hi...,"[juga, ada, peristiwa, di, mana, seorang, petu...",0.317857,"[#pleasantness, #enthusiasm]",positive
2,Atas nama pribadi dan Pemerintah Republik Indo...,On behalf of myself and the Government of the ...,"[atas, nama, pribadi, dan, pemerintah, republi...",0.161,"[#ecstasy, #rage]",positive
3,Saya sangat prihatin dan sangat sedih terjadi ...,I am very concerned and very sad that this inc...,"[saya, sangat, prihatin, dan, sangat, sedih, t...",-0.494833,"[#dislike, #grief]",negative
4,Pemerintah akan menjamin kehidupan keluarganya...,The government will guarantee the family's liv...,"[pemerintah, akan, menjamin, kehidupan, keluar...",0.746167,"[#delight, #acceptance]",positive


In [28]:
data['emotions'].iloc[0]

['#dislike', '#ecstasy']

In [29]:
data['avg_polarity'].iloc[0]

np.float64(0.05885714285714288)

In [30]:
data['sentiment'].iloc[0]

'positive'

#### Compare

In the example sentence

“Saya telah mengikuti perkembangan beberapa hari ini, terutama peristiwa tadi malam, di mana terjadi demonstrasi yang mengarah kepada tindakan anarkis.”, 

the Indo-based SenticNet approach produced dominant emotions such as #ecstasy and #dislike, which are less appropriate to the context since an anarchic demonstration should evoke negative emotions.

In contrast, the English/translated SenticNet approach yielded emotions like #sadness and #eagerness, which better reflect the actual meaning of the sentence as they capture both the negative nuance and the emotional involvement in the event.