In [1]:
import pandas as pd
import numpy as np

#### Data

In [2]:
# read data
data = pd.read_csv('../data/comments_data.csv')

# ignore columns parent_id
data = data.drop(columns=['parent_id'])
data.head()

Unnamed: 0,link,comment_id,username,text,created_at,like_count,reply_count
0,https://www.instagram.com/p/DOV0Rn2DNf-/,17916292923173899,lorenzoabim,Anjir Dahnil kebagian kue nya :),1757333808,192,16
1,https://www.instagram.com/p/DOV0Rn2DNf-/,18095910739726736,fndnrz,@risqi_aguspianto hahaha bukan pendukung prabo...,1757372912,0,0
2,https://www.instagram.com/p/DOV0Rn2DNf-/,17980524434883939,muchlizdarma1,@lorenzoabim kerjanya setaun sekali,1757387323,0,0
3,https://www.instagram.com/p/DOV0Rn2DNf-/,18173137474359052,ikbarsinaga999,@al_hazhouse.land Kaesang hahahha,1757676329,0,0
4,https://www.instagram.com/p/DOV0Rn2DNf-/,18038689634666892,rahmadsejati,"harapan rakyat sederhana 🙏 kerja nyata, hasil ...",1757334122,757,2


In [3]:
# convert comment_time to datetime
data['created_at'] = pd.to_datetime(data['created_at'], unit='s')

# extract date from comment_time
data['date'] = data['created_at'].dt.date
data.head()

Unnamed: 0,link,comment_id,username,text,created_at,like_count,reply_count,date
0,https://www.instagram.com/p/DOV0Rn2DNf-/,17916292923173899,lorenzoabim,Anjir Dahnil kebagian kue nya :),2025-09-08 12:16:48,192,16,2025-09-08
1,https://www.instagram.com/p/DOV0Rn2DNf-/,18095910739726736,fndnrz,@risqi_aguspianto hahaha bukan pendukung prabo...,2025-09-08 23:08:32,0,0,2025-09-08
2,https://www.instagram.com/p/DOV0Rn2DNf-/,17980524434883939,muchlizdarma1,@lorenzoabim kerjanya setaun sekali,2025-09-09 03:08:43,0,0,2025-09-09
3,https://www.instagram.com/p/DOV0Rn2DNf-/,18173137474359052,ikbarsinaga999,@al_hazhouse.land Kaesang hahahha,2025-09-12 11:25:29,0,0,2025-09-12
4,https://www.instagram.com/p/DOV0Rn2DNf-/,18038689634666892,rahmadsejati,"harapan rakyat sederhana 🙏 kerja nyata, hasil ...",2025-09-08 12:22:02,757,2,2025-09-08


#### Mention Interaction

In [4]:
import re
# Get username in text
mention_pattern = re.compile(r'@(\w+)')

In [5]:
# calculate edge weights from mentions (author -> mentioned)
edge_weights = {}

for _, row in data.iterrows():
    author = row['username']
    text = str(row['text']) if pd.notna(row['text']) else ""
    mentions = mention_pattern.findall(text)

    for mentioned in mentions:
        if author != mentioned:  # Avoid self-loops
            edge_weights[(author, mentioned)] = edge_weights.get((author, mentioned), 0) + 1

In [6]:
# sort edge weights by value descending
edge_weights = dict(sorted(edge_weights.items(), key=lambda item: item[1], reverse=True))
edge_weights

{('rdt_prm', 'm'): 3,
 ('brooklyn_cheesee', 'fzi'): 3,
 ('denisarimbi06', 'prabowo'): 2,
 ('enia130901', 'plutonioneon'): 2,
 ('ranggaprimadasa', 'dinatauhida'): 2,
 ('prasetyo_emilarie', 'bahlillahadalia'): 2,
 ('prasetyo_emilarie', 'zul'): 2,
 ('prasetyo_emilarie', 'listyosigitprabowo'): 2,
 ('prasetyo_emilarie', 'erickthohir'): 2,
 ('rafly.wibowo240101', 'prabowo'): 2,
 ('ashurin.1010', 'prabowo'): 2,
 ('pandu_ozil11', 'hadicobruts'): 2,
 ('rhama.bpn.sgta', 'sumargodenny'): 2,
 ('chllprk', 'salmainstbrz'): 2,
 ('faaaaadiiil', 'heriant_international'): 2,
 ('taniasoktaviani', '_nsrimulyani11'): 2,
 ('_nsrimulyani11', 'taniasoktaviani'): 2,
 ('marjuniaaa', 'anp_studio13'): 2,
 ('ukmindo.id', 'prabowo'): 2,
 ('tomsuryapanji', 'prabowo'): 2,
 ('egiiiinn', 'sekretariat'): 2,
 ('mrs_susinisharin', 'xreal_2202'): 2,
 ('downhill_jaksquad', 'maria_quyya'): 2,
 ('rudi.gaol', 'smindrawati'): 2,
 ('fndnrz', 'risqi_aguspianto'): 1,
 ('muchlizdarma1', 'lorenzoabim'): 1,
 ('ikbarsinaga999', 'al_ha

#### User Info

In [7]:
# comment count and sort by value descending
comment_count = data['username'].value_counts().to_dict()
comment_count = dict(sorted(comment_count.items(), key=lambda item: item[1], reverse=True))
comment_count

{'octhavvia6': 9,
 'heyjoykuy': 8,
 'achmad.rehansyah.1': 5,
 'mustangid.mustangid': 5,
 'ttk.project': 5,
 'floflorav': 5,
 'deddy.gondrong.3910': 5,
 'khoirulfl': 5,
 'kebenaranpastiterjadi': 5,
 'dwanharun': 4,
 'scorgees': 4,
 'arkina_mei': 4,
 'wahyu_rajin_menabung_': 4,
 'lenirahardja': 4,
 'gbriieel': 4,
 'thea_dyah': 4,
 'io9713': 4,
 'andriawanarif24': 4,
 'alifikri632': 4,
 'wvictorym': 4,
 'brooklyn_cheesee': 4,
 'panduadiputrawijaya': 4,
 'ikbarsinaga999': 4,
 'ilhamelsoth': 3,
 'docturnal_': 3,
 'irtizaqdalahmas': 3,
 'foraes4': 3,
 'naeliatitik': 3,
 'hendyhedyawan': 3,
 'donal79': 3,
 'hideyuki.id': 3,
 'sonnydharmawan1707': 3,
 'bergkamp_p': 3,
 'aryosamuelhartanto': 3,
 'amy_jakarta': 3,
 'azissamruu': 3,
 'xtrememerch': 3,
 'marjuniaaa': 3,
 'bgsabdrrhmn_29': 3,
 'pp.imamsyafii_alhasan': 3,
 'rvvlnsyah12': 3,
 'indragnwn330': 3,
 'marselinop13': 3,
 'fadciks': 3,
 'damara18_': 3,
 'mrs_susinisharin': 3,
 'vian_osd': 3,
 'ekybong365': 3,
 'muhammad_noah_elsafik': 3,
 '

In [8]:
# like sum and sort by value descending
like_sum = data.groupby('username')['like_count'].sum().to_dict()
like_sum = dict(sorted(like_sum.items(), key=lambda item: item[1], reverse=True))
like_sum

{'junaydfloyd': 9825,
 'tikawngso': 6891,
 'difi_raw': 5275,
 'praharja.bangga': 4366,
 'jasaolahdata.co': 2719,
 'jessy_humirtap': 2415,
 'veren_hayon': 2351,
 'bataramanik': 2088,
 'bagusradhitya': 2023,
 'jasapindahanbali': 1587,
 'ikanterikiyowo': 1481,
 'dimsdwii': 1207,
 'thoniluthfi': 1033,
 'fhmilham': 966,
 'karunasankara01': 944,
 'heldums': 917,
 'dimassaktinugraha': 898,
 'bandung.gigs': 883,
 'tgk_ricky.a': 883,
 'ry.iyyd': 854,
 'nvioletas': 833,
 'rahmadsejati': 757,
 'mahendraprks': 755,
 'wnda_fp': 746,
 'rezabahroem': 693,
 'airfaanin': 677,
 '_rinadh': 662,
 'xreal_2202': 634,
 'adivnm': 633,
 'netfllixx_murah': 604,
 'youngdla_': 590,
 'geriorag': 561,
 'muhammad_hezra': 519,
 'arifback11': 515,
 'fzi.ibongg_': 506,
 'yehezkielde12': 481,
 'rimahanidar': 456,
 'wantose_': 455,
 'marindasari': 449,
 'susiloturjoyo': 447,
 'dirisendirri': 441,
 'ardianirwan__': 434,
 'septian.purnama': 424,
 'yeti_nurmayati': 419,
 'melaniera12': 413,
 'fikriadhiez': 411,
 'hengkiheng

In [9]:
# mention count
mention_count = {}
for (a,b), w in edge_weights.items():
    mention_count[a] = mention_count.get(a, 0) + w
    mention_count[b] = mention_count.get(b, 0) + w

# sort mention count by value descending
mention_count = dict(sorted(mention_count.items(), key=lambda item: item[1], reverse=True))
mention_count

{'prabowo': 236,
 'smindrawati': 111,
 'presidenrepublikindonesia': 24,
 'kemenpar': 22,
 'bahlillahadalia': 19,
 'official': 15,
 'gerindra': 15,
 'listyosigitprabowo': 14,
 'rajaantoni': 14,
 'prasetyo_emilarie': 9,
 'rafly.wibowo240101': 9,
 'rocky': 9,
 'heyjoykuy': 8,
 'zul': 7,
 'erickthohir': 7,
 'egiiiinn': 7,
 'fahria.zxcc': 7,
 'alamsyahsubakri': 7,
 'tomsuryapanji': 6,
 'sekretariat': 6,
 'mohmahfudmd': 6,
 'oensragil.id': 6,
 'octhavvia6': 6,
 'gibran_rakabuming': 5,
 'yuanitagayatri': 5,
 'bungfahmiali': 5,
 'achmad.rehansyah.1': 5,
 'luhut': 5,
 'javanoase': 5,
 'nestheaceae': 5,
 'khoirulfl': 5,
 'rhama.bpn.sgta': 4,
 'sumargodenny': 4,
 'taniasoktaviani': 4,
 '_nsrimulyani11': 4,
 'marjuniaaa': 4,
 'lenirahardja': 4,
 'syakurhilmy_': 4,
 'ketua_psms': 4,
 'narasinewsroom': 4,
 'dr': 4,
 'dewan_perwakilan_netizen_ri': 4,
 'hammmmmmmag09': 4,
 'mustangid.mustangid': 4,
 'jessy_humirtap': 4,
 'titokarnavian': 4,
 'ansrhdyn03': 4,
 'aeztrishavaniabi': 4,
 'tirexmadep': 4,
 

#### Build Graph

In [10]:
import networkx as nx

In [11]:
# filter users with at least 2 mentions
min_mentions = 2
active_users = {u for u, c in mention_count.items() if c >= min_mentions}

# create graph
G = nx.DiGraph()
for (a, b), w in edge_weights.items():
    if a in active_users and b in active_users:
        G.add_edge(a, b, weight=w)

print("Nodes:", G.number_of_nodes(), "Edges:", G.number_of_edges())

Nodes: 355 Edges: 431


In [12]:
from pyvis.network import Network
import os

In [13]:
# create results directory if not exists
os.makedirs('../results', exist_ok=True)

In [14]:
import math

In [15]:
# create pyvis network
net = Network(notebook=True, directed=True)

# add nodes with attributes
for node in G.nodes():
    mentions = mention_count.get(node, 0)
    size = math.sqrt(mentions)*5+5
    net.add_node(
        node,
        title=f"User: {node}<br>Comments: {comment_count.get(node, 0)}<br>Likes: {like_sum.get(node, 0)}<br>Mentions: {mention_count.get(node, 0)}",
        value=size
    )

# add edges with weights
for u, v, info in G.edges(data=True):
    net.add_edge(u, v, value=info['weight'], title=f"Mentions: {info['weight']}")

# save result
net.show('../results/mention_network.html', notebook=False)

../results/mention_network.html


#### Comunity Detection

In [16]:
import community as community_louvain

In [17]:
# using louvain
for node in G.nodes():
    G.nodes[node]['comment_count'] = comment_count.get(node, 0)
    G.nodes[node]['like_sum'] = like_sum.get(node, 0)
    G.nodes[node]['mention_count'] = mention_count.get(node, 0)

partition = community_louvain.best_partition(G.to_undirected())
partition

{'rdt_prm': 0,
 'm': 0,
 'brooklyn_cheesee': 1,
 'fzi': 1,
 'denisarimbi06': 2,
 'prabowo': 2,
 'enia130901': 3,
 'plutonioneon': 3,
 'ranggaprimadasa': 4,
 'dinatauhida': 4,
 'prasetyo_emilarie': 5,
 'bahlillahadalia': 5,
 'zul': 5,
 'listyosigitprabowo': 5,
 'erickthohir': 5,
 'rafly.wibowo240101': 6,
 'ashurin.1010': 2,
 'pandu_ozil11': 7,
 'hadicobruts': 7,
 'rhama.bpn.sgta': 6,
 'sumargodenny': 6,
 'chllprk': 8,
 'salmainstbrz': 8,
 'faaaaadiiil': 9,
 'heriant_international': 9,
 'taniasoktaviani': 10,
 '_nsrimulyani11': 10,
 'marjuniaaa': 11,
 'anp_studio13': 11,
 'ukmindo.id': 2,
 'tomsuryapanji': 5,
 'egiiiinn': 12,
 'sekretariat': 12,
 'mrs_susinisharin': 13,
 'xreal_2202': 13,
 'downhill_jaksquad': 14,
 'maria_quyya': 14,
 'rudi.gaol': 15,
 'smindrawati': 15,
 'sarden.ina': 16,
 'ardii_lukman': 16,
 'io9713': 16,
 'ikbarsinaga999': 17,
 'bang': 17,
 'wongtani_padi': 18,
 'rahayuindah_': 18,
 'eyesmaul': 12,
 'diah': 12,
 'gibran_rakabuming': 12,
 'i.m.reys': 12,
 'lenirahardj

In [18]:
# save user info with community
users_df = pd.DataFrame({'username': data['username'].dropna().unique()})
comm_df = pd.DataFrame(list(partition.items()), columns=['username', 'community'])
users_com = users_df.merge(comm_df, on='username', how='left')

# save to csv
users_com.to_csv('../results/users_community.csv', index=False)

In [19]:
# plot community graph
net = Network(notebook=True, directed=True)
net.from_nx(G)

# color nodes by community
for node in net.nodes:
    user = node['id']
    size = math.sqrt(mention_count.get(user, 0))*5+5
    node['group'] = partition[node['id']]
    node['value'] = size
    node['title'] = (
        f"User: {user}"
        f"<br>Comments: {comment_count.get(user, 0)}"
        f"<br>Likes: {like_sum.get(user, 0)}"
        f"<br>Mentions: {mention_count.get(user, 0)}"
        f"<br>Community: {partition[user]}"
    )

# save community graph
net.show('../results/mention_community_network.html', notebook=False)

../results/mention_community_network.html


#### Community Deep Dive

In [20]:
from collections import Counter

In [21]:
# create results sub folder if not exists
os.makedirs('../results/sub', exist_ok=True)

In [22]:
# calculate the edge sum
edge_communities = []
for u, v in G.edges():
    if u in partition and v in partition and partition[u] == partition[v]:
        edge_communities.append(partition[u])

edge_count_per_comm = Counter(edge_communities)

# get community with >= 10 edges
valid_comms = [comm for comm, count in edge_count_per_comm.items() if count >= 10]
valid_comms

[2, 5, 12, 15, 20]

In [23]:
# create and save subgraph for each valid/big community
for comm in valid_comms:
    
    # create subgraph
    comm_nodes = [n for n, c in partition.items() if c == comm]
    subG = G.subgraph(comm_nodes)

    # create pyvis network
    net = Network(notebook=True, directed=True)
    net.from_nx(subG)

    # color nodes by community
    for node in net.nodes:
        user = node['id']
        size = math.sqrt(mention_count.get(user, 0))*5+5
        node['group'] = comm
        node['value'] = size
        node['title'] = (
            f"User: {user}"
            f"<br>Comments: {comment_count.get(user, 0)}"
            f"<br>Likes: {like_sum.get(user, 0)}"
            f"<br>Mentions: {mention_count.get(user, 0)}"
        )

    # save subgraph
    out_path = f"../results/sub/community_{comm}.html"
    net.show(out_path, notebook=False)
    print(f"Saved {out_path} ({subG.number_of_nodes()} nodes, {subG.number_of_edges()} edges)")

../results/sub/community_2.html
Saved ../results/sub/community_2.html (55 nodes, 86 edges)
../results/sub/community_5.html
Saved ../results/sub/community_5.html (28 nodes, 45 edges)
../results/sub/community_12.html
Saved ../results/sub/community_12.html (29 nodes, 34 edges)
../results/sub/community_15.html
Saved ../results/sub/community_15.html (20 nodes, 21 edges)
../results/sub/community_20.html
Saved ../results/sub/community_20.html (13 nodes, 13 edges)


In [24]:
from collections import Counter

for comm in valid_comms:
    comm_nodes = {n for n, c in partition.items() if c == comm}

    sent_counter = Counter()
    received_counter = Counter()

    for u, v, info in G.edges(data=True):
        # only consider edges within the community
        if u in comm_nodes and v in comm_nodes:
            w = info.get('weight', 1)
            sent_counter[u] += w
            received_counter[v] += w

    print(f"Community {comm}")
    print("Top 3 mention (sender):", sent_counter.most_common(3))
    print("Top 3 get mention (receiver):", received_counter.most_common(3))
    print()

Community 2
Top 3 mention (sender): [('riowahyuda58', 3), ('daffi.azhari', 2), ('asnia_nanta', 2)]
Top 3 get mention (receiver): [('prabowo', 46), ('kemenpar', 21), ('gerindra', 10)]

Community 5
Top 3 mention (sender): [('fahria.zxcc', 6), ('alamsyahsubakri', 6), ('prasetyo_emilarie', 4)]
Top 3 get mention (receiver): [('bahlillahadalia', 12), ('listyosigitprabowo', 8), ('zul', 5)]

Community 12
Top 3 mention (sender): [('egiiiinn', 3), ('dewan_perwakilan_netizen_ri', 3), ('javanoase', 3)]
Top 3 get mention (receiver): [('presidenrepublikindonesia', 17), ('sekretariat', 4), ('kemensetneg', 4)]

Community 15
Top 3 mention (sender): [('hengkiheng98', 2), ('jibriaratna', 2), ('andriawanarif24', 2)]
Top 3 get mention (receiver): [('smindrawati', 14), ('zulhammuchtar', 1), ('hengkiheng98', 1)]

Community 20
Top 3 mention (sender): [('yuanitagayatri', 2), ('muhammadkahfi___', 2), ('edisaja1983', 2)]
Top 3 get mention (receiver): [('mohmahfudmd', 5), ('official', 4), ('kejaksaan', 2)]



#### Sentiment and Emotion Analysis

In [25]:
# read data
data = pd.read_csv('../data/comments_data.csv')

# ignore columns parent_id
data = data.drop(columns=['parent_id'])
data.head()

Unnamed: 0,link,comment_id,username,text,created_at,like_count,reply_count
0,https://www.instagram.com/p/DOV0Rn2DNf-/,17916292923173899,lorenzoabim,Anjir Dahnil kebagian kue nya :),1757333808,192,16
1,https://www.instagram.com/p/DOV0Rn2DNf-/,18095910739726736,fndnrz,@risqi_aguspianto hahaha bukan pendukung prabo...,1757372912,0,0
2,https://www.instagram.com/p/DOV0Rn2DNf-/,17980524434883939,muchlizdarma1,@lorenzoabim kerjanya setaun sekali,1757387323,0,0
3,https://www.instagram.com/p/DOV0Rn2DNf-/,18173137474359052,ikbarsinaga999,@al_hazhouse.land Kaesang hahahha,1757676329,0,0
4,https://www.instagram.com/p/DOV0Rn2DNf-/,18038689634666892,rahmadsejati,"harapan rakyat sederhana 🙏 kerja nyata, hasil ...",1757334122,757,2


In [26]:
# convert comment_time to datetime
data['created_at'] = pd.to_datetime(data['created_at'], unit='s')

# extract date from comment_time
data['date'] = data['created_at'].dt.date
data.head()

Unnamed: 0,link,comment_id,username,text,created_at,like_count,reply_count,date
0,https://www.instagram.com/p/DOV0Rn2DNf-/,17916292923173899,lorenzoabim,Anjir Dahnil kebagian kue nya :),2025-09-08 12:16:48,192,16,2025-09-08
1,https://www.instagram.com/p/DOV0Rn2DNf-/,18095910739726736,fndnrz,@risqi_aguspianto hahaha bukan pendukung prabo...,2025-09-08 23:08:32,0,0,2025-09-08
2,https://www.instagram.com/p/DOV0Rn2DNf-/,17980524434883939,muchlizdarma1,@lorenzoabim kerjanya setaun sekali,2025-09-09 03:08:43,0,0,2025-09-09
3,https://www.instagram.com/p/DOV0Rn2DNf-/,18173137474359052,ikbarsinaga999,@al_hazhouse.land Kaesang hahahha,2025-09-12 11:25:29,0,0,2025-09-12
4,https://www.instagram.com/p/DOV0Rn2DNf-/,18038689634666892,rahmadsejati,"harapan rakyat sederhana 🙏 kerja nyata, hasil ...",2025-09-08 12:22:02,757,2,2025-09-08


In [27]:
user_comm = pd.read_csv('../results/users_community.csv')
user_comm.head()

Unnamed: 0,username,community
0,lorenzoabim,
1,fndnrz,
2,muchlizdarma1,
3,ikbarsinaga999,17.0
4,rahmadsejati,


In [42]:
# merge
merged = data.merge(user_comm, on='username', how='left')
merged = merged.dropna(subset=['community'])
merged = merged.reset_index(drop=True)
merged.head()

Unnamed: 0,link,comment_id,username,text,created_at,like_count,reply_count,date,community
0,https://www.instagram.com/p/DOV0Rn2DNf-/,18173137474359052,ikbarsinaga999,@al_hazhouse.land Kaesang hahahha,2025-09-12 11:25:29,0,0,2025-09-12,17.0
1,https://www.instagram.com/p/DOV0Rn2DNf-/,18066568718464364,io9713,Sama aja😂😂😂,2025-09-08 22:38:37,0,0,2025-09-08,16.0
2,https://www.instagram.com/p/DOV0Rn2DNf-/,18519537625024458,lenirahardja,@ayurachanda bagi Tuhan gak ada yg mustahil ❤️,2025-09-09 03:25:31,0,0,2025-09-09,12.0
3,https://www.instagram.com/p/DOV0Rn2DNf-/,18103206322588474,denisarimbi06,"@prabowo betul itu, orang2 sisaa Mulyono tidak...",2025-09-10 04:57:51,0,0,2025-09-10,2.0
4,https://www.instagram.com/p/DOV0Rn2DNf-/,17921412669139143,ikbarsinaga999,"Lah rotasi udahan, PKB -> Golkar",2025-09-10 18:44:48,0,0,2025-09-10,17.0


In [43]:
# clean text
def clean_text(text):
    # delete mention
    text = re.sub(r'@\w+', '', text)

    # lowercase
    text = text.lower()
    
    # delete link
    text = re.sub(r'http\S+|www\S+', '', text)
    
    # delete non-printable characters except emoticons
    text = re.sub(r'[^\x00-\x7F]+', lambda m: m.group(0), text)
    
    # delete whitespace
    text = re.sub(r'\s+', ' ', text).strip()

    # delete emoji
    text = re.sub(r'[^\w\s,]', '', text)
    
    # tokenize
    tokens = text.split()
    return tokens

merged['clean_text'] = merged['text'].astype(str).apply(clean_text)
merged.head()

Unnamed: 0,link,comment_id,username,text,created_at,like_count,reply_count,date,community,clean_text
0,https://www.instagram.com/p/DOV0Rn2DNf-/,18173137474359052,ikbarsinaga999,@al_hazhouse.land Kaesang hahahha,2025-09-12 11:25:29,0,0,2025-09-12,17.0,"[land, kaesang, hahahha]"
1,https://www.instagram.com/p/DOV0Rn2DNf-/,18066568718464364,io9713,Sama aja😂😂😂,2025-09-08 22:38:37,0,0,2025-09-08,16.0,"[sama, aja]"
2,https://www.instagram.com/p/DOV0Rn2DNf-/,18519537625024458,lenirahardja,@ayurachanda bagi Tuhan gak ada yg mustahil ❤️,2025-09-09 03:25:31,0,0,2025-09-09,12.0,"[bagi, tuhan, gak, ada, yg, mustahil]"
3,https://www.instagram.com/p/DOV0Rn2DNf-/,18103206322588474,denisarimbi06,"@prabowo betul itu, orang2 sisaa Mulyono tidak...",2025-09-10 04:57:51,0,0,2025-09-10,2.0,"[betul, itu,, orang2, sisaa, mulyono, tidak, b..."
4,https://www.instagram.com/p/DOV0Rn2DNf-/,17921412669139143,ikbarsinaga999,"Lah rotasi udahan, PKB -> Golkar",2025-09-10 18:44:48,0,0,2025-09-10,17.0,"[lah, rotasi, udahan,, pkb, golkar]"


In [44]:
merged.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 479 entries, 0 to 478
Data columns (total 10 columns):
 #   Column       Non-Null Count  Dtype         
---  ------       --------------  -----         
 0   link         479 non-null    object        
 1   comment_id   479 non-null    int64         
 2   username     479 non-null    object        
 3   text         478 non-null    object        
 4   created_at   479 non-null    datetime64[ns]
 5   like_count   479 non-null    int64         
 6   reply_count  479 non-null    int64         
 7   date         479 non-null    object        
 8   community    479 non-null    float64       
 9   clean_text   479 non-null    object        
dtypes: datetime64[ns](1), float64(1), int64(3), object(5)
memory usage: 37.5+ KB


In [45]:
# load xlsx SenticNet Indo
df = pd.read_excel("../data/senticnet.xlsx")
df.head()

Unnamed: 0,CONCEPT,INTROSPECTION,TEMPER,ATTITUDE,SENSITIVITY,PRIMARY EMOTION,SECONDAY EMOTION,POLARITY VALUE,POLARITY INTENSITY,SEMANTICS,Unnamed: 10,Unnamed: 11,Unnamed: 12,Unnamed: 13
0,aah,-0.56,0.0,0.0,0.0,#sadness,,negative,-0.56,keawaman,barok,kecemasan_sebelumnya,kelas_komersial,tertutup
1,abadi,0.0,0.0,0.964,0.0,#delight,,positive,0.964,istabraq,meluncur_ke_bawah,dari_yang_diperlukan,serpina,kurricule
2,abdominoplasti,0.0,0.0,0.29,0.0,#acceptance,,positive,0.29,bersuku_kata_banyak,ketidakcocokan,jip,gaya_bebas,norelco
3,aberrate,0.0,0.0,-0.554,0.0,#disgust,,negative,-0.554,rompakan_permata,tripoint,formasi,walsh,perang_salib
4,abey,0.0,0.0,0.0,-0.329,#anxiety,,negative,-0.329,mengurangi,membasmi,loop_synth,pankreas,shabarisha


In [46]:
# set dictionary
polarity_dict = dict(zip(df["CONCEPT"], df["POLARITY INTENSITY"]))
emotion_dict  = dict(zip(df["CONCEPT"], df["PRIMARY EMOTION"]))

In [47]:
# function to map tokens to sentiment and emotions
def map_sentiment(tokens):
    polarities = []
    emotions = []
    for t in tokens:
        if t in polarity_dict:
            polarities.append(polarity_dict[t])
        if t in emotion_dict:
            emotions.append(emotion_dict[t])
    # calculate average polarity
    avg_polarity = sum(polarities)/len(polarities) if polarities else 0
    
    # set sentiment based on avg_polarity
    if avg_polarity > 0.05:
        sentiment = "positive"
    elif avg_polarity < -0.05:
        sentiment = "negative"
    else:
        sentiment = "neutral"
    
    return avg_polarity, emotions, sentiment

In [50]:
# apply the mapping function to get polarity, emotions and sentiment
merged[["avg_polarity", "emotions", "sentiment"]] = merged["clean_text"].apply(lambda x: pd.Series(map_sentiment(x)))

# drop less common emotions
merged["emotions"] = merged["emotions"].apply(lambda x: x[:1])
merged.head()

Unnamed: 0,link,comment_id,username,text,created_at,like_count,reply_count,date,community,clean_text,avg_polarity,emotions,sentiment
0,https://www.instagram.com/p/DOV0Rn2DNf-/,18173137474359052,ikbarsinaga999,@al_hazhouse.land Kaesang hahahha,2025-09-12 11:25:29,0,0,2025-09-12,17.0,"[land, kaesang, hahahha]",0.0,[],neutral
1,https://www.instagram.com/p/DOV0Rn2DNf-/,18066568718464364,io9713,Sama aja😂😂😂,2025-09-08 22:38:37,0,0,2025-09-08,16.0,"[sama, aja]",0.0,[],neutral
2,https://www.instagram.com/p/DOV0Rn2DNf-/,18519537625024458,lenirahardja,@ayurachanda bagi Tuhan gak ada yg mustahil ❤️,2025-09-09 03:25:31,0,0,2025-09-09,12.0,"[bagi, tuhan, gak, ada, yg, mustahil]",0.370333,[#ecstasy],positive
3,https://www.instagram.com/p/DOV0Rn2DNf-/,18103206322588474,denisarimbi06,"@prabowo betul itu, orang2 sisaa Mulyono tidak...",2025-09-10 04:57:51,0,0,2025-09-10,2.0,"[betul, itu,, orang2, sisaa, mulyono, tidak, b...",-0.825,[#grief],negative
4,https://www.instagram.com/p/DOV0Rn2DNf-/,17921412669139143,ikbarsinaga999,"Lah rotasi udahan, PKB -> Golkar",2025-09-10 18:44:48,0,0,2025-09-10,17.0,"[lah, rotasi, udahan,, pkb, golkar]",0.881,[#ecstasy],positive


In [55]:
merged.to_csv('../results/senti_emo.csv', index=False)

In [52]:
merged["sentiment"].value_counts()

sentiment
positive    241
neutral     164
negative     74
Name: count, dtype: int64

In [51]:
merged["emotions"].explode().value_counts()

emotions
#ecstasy           89
#enthusiasm        44
#grief             35
#pleasantness      23
#serenity          22
#annoyance         17
#dislike           11
#responsiveness    11
#delight           10
#acceptance        10
#joy                8
#calmness           7
#eagerness          7
#sadness            7
#terror             5
#rage               5
#anxiety            5
#anger              4
#melancholy         4
#loathing           3
#fear               3
#disgust            2
#bliss              2
#contentment        1
Name: count, dtype: int64

In [54]:
# filter for valid communities
target_comms = [2, 5, 12, 15, 20]
df = merged[merged["community"].isin(target_comms)].copy()

# explode emotions
df_exploded = df.explode("emotions")

# calculate sentiment distribution per community
sentiment_dist = (
    df_exploded.groupby(["community", "sentiment"])
    .size()
    .unstack(fill_value=0)
)

# calculate emotion distribution per community
emotion_dist = (
    df_exploded.groupby(["community", "emotions"])
    .size()
    .unstack(fill_value=0)
)

# get results
print("Sentiment Distribution by Community:")
display(sentiment_dist)

print("\nEmotion Distribution by Community:")
display(emotion_dist)

Sentiment Distribution by Community:


sentiment,negative,neutral,positive
community,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
2.0,8,17,35
5.0,1,11,15
12.0,3,11,13
15.0,9,6,20
20.0,2,6,4



Emotion Distribution by Community:


emotions,#acceptance,#anger,#annoyance,#bliss,#calmness,#delight,#disgust,#dislike,#eagerness,#ecstasy,#enthusiasm,#fear,#grief,#joy,#melancholy,#pleasantness,#rage,#responsiveness,#sadness,#serenity
community,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1
2.0,1,2,3,2,1,2,1,4,0,11,7,1,5,0,1,2,0,1,0,2
5.0,0,0,0,0,0,0,0,0,0,0,5,0,1,1,0,6,0,0,0,3
12.0,0,1,0,0,0,0,0,0,0,6,3,0,1,0,0,2,0,1,1,1
15.0,1,0,1,0,0,0,0,1,2,7,2,1,6,1,1,1,0,1,1,3
20.0,0,0,1,0,0,0,0,0,0,3,0,0,0,0,0,2,1,0,0,1
