# Import Libraries

In [1]:
import pandas as pd
import re

from IPython.display import clear_output, display, HTML
from tqdm import tqdm
from transformers import pipeline, AutoTokenizer, AutoModelForSequenceClassification

# Load Dataset

In [2]:
dir_ = "dataset/"
file_path = dir_ + 'oshibe_spv_comments_2025-01-15.csv'
df = pd.read_csv(file_path)
df.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 22996 entries, 0 to 22995
Data columns (total 8 columns):
 #   Column      Non-Null Count  Dtype  
---  ------      --------------  -----  
 0   ID          22996 non-null  object 
 1   ParentID    11079 non-null  object 
 2   Timestamp   22996 non-null  object 
 3   Username    22996 non-null  object 
 4   Comment     22993 non-null  object 
 5   LikeCount   22996 non-null  int64  
 6   ReplyCount  11917 non-null  float64
 7   Date        22996 non-null  object 
dtypes: float64(1), int64(1), object(6)
memory usage: 1.4+ MB


# Data Preprocessing

## Data Types

In [3]:
# Convert data type of 'ReplyCount' to integer
df['ReplyCount'] = pd.to_numeric(df['ReplyCount'], errors='coerce').astype('Int64')
df.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 22996 entries, 0 to 22995
Data columns (total 8 columns):
 #   Column      Non-Null Count  Dtype 
---  ------      --------------  ----- 
 0   ID          22996 non-null  object
 1   ParentID    11079 non-null  object
 2   Timestamp   22996 non-null  object
 3   Username    22996 non-null  object
 4   Comment     22993 non-null  object
 5   LikeCount   22996 non-null  int64 
 6   ReplyCount  11917 non-null  Int64 
 7   Date        22996 non-null  object
dtypes: Int64(1), int64(1), object(6)
memory usage: 1.4+ MB


## Missing Values

In [4]:
# Check Missing Values
df[df['Comment'].isnull()]

Unnamed: 0,ID,ParentID,Timestamp,Username,Comment,LikeCount,ReplyCount,Date
302,UgwUen0WTAZIqnC6hKJ4AaABAg,,2024-10-24T02:01:16Z,@ehasitijulaeha2522,,1,0,2024-10-24T02:01:16Z
584,UgwYhw0GzhZZSOTA8il4AaABAg,,2024-08-30T14:41:09Z,@stepme,,0,0,2024-08-30T14:41:09Z
18667,UgyQVu755DqSxX5PTIV4AaABAg,,2023-03-13T20:08:42Z,@ghandithesupremeleader9740,,4,0,2023-03-13T20:08:42Z


- Terdapat 3 baris dengan komentar kosong (kemungkinan hanya berisi karakter yang tidak berhasil di-encode di api call), ketiga baris ini bisa dihapus saja

In [5]:
df.dropna(subset=['Comment'], inplace=True)
df.shape

(22993, 8)

## Duplicated Data

In [6]:
# Drop Duplicates
df = df.drop_duplicates(subset=['Username', 'Comment']).reset_index(drop=True)
df.shape

(22160, 8)

- Komentar yang sama persis (duplikat) dari username yang sama juga cukup dipertahankan satu saja

## Top Level Comments

In [7]:
# Check Top Comments and retain the original 'ID' index
comments = df.copy()
comments = comments[comments['ParentID'].isnull()].sort_values(by=['LikeCount', 'ReplyCount'], ascending=False)
comments = comments[['ID', 'Username', 'Comment', 'LikeCount', 'ReplyCount', 'Date']]
comments

Unnamed: 0,ID,Username,Comment,LikeCount,ReplyCount,Date
13367,UgzWvu72I8m9-U8pq8F4AaABAg,@onthebluesky,"Guys, lagu ini bukan tentang LGBT, tapi tentan...",19405,751,2023-03-14T11:13:13Z
12665,Ugxb2yPnhvOFAaF_b2d4AaABAg,@driezkh,Performance Videonya kaya memberitahu kita ten...,2319,70,2023-03-20T17:56:27Z
20990,UgzCYP-5eQOScO828UZ4AaABAg,@adanjir1923,Satu persatu member diberikan kesempatan buat...,1885,63,2023-03-13T13:16:02Z
16159,UgwQ0xdL1_z3bGf9UM94AaABAg,@Jkt48990,"fiks, kalau kedepan jkt48 release single MVnya...",1863,80,2023-03-14T05:24:15Z
2359,UgyO_jkZ191_KXd7EUR4AaABAg,@ahmadfikri5186,Malam ini rahasia ya\nKamu tak boleh bilang si...,967,13,2023-10-21T18:45:55Z
...,...,...,...,...,...,...
22086,Ugyv4YBfwgcaua5rGux4AaABAg,@rizalfahri6435,"Apakah Shani jadi center lagi, ataukah dipanta...",0,0,2023-03-13T09:45:54Z
22098,UgwoO7UeC3qIc7KhmVd4AaABAg,@isnanyusuf3575,infokan,0,0,2023-03-13T09:36:15Z
22142,UgwJpMpkqqf_ABdjzHp4AaABAg,@johanafandi11,Nitip,0,0,2023-03-13T08:57:49Z
22150,UgwusAPX-itWdsA-SKh4AaABAg,@fahmiaditakurnia3734,ninggalin jejak,0,0,2023-03-13T08:51:32Z


## Comment Replies

In [8]:
# Check Replies
replies = df.copy()
replies = replies[replies['ParentID'].notnull()]
replies = replies[['ID', 'ParentID', 'Comment', 'LikeCount', 'Date']]
replies

Unnamed: 0,ID,ParentID,Comment,LikeCount,Date
11,UgxCK8DSLpRl2ZWP6pp4AaABAg.ACvSDxWXifuAD8OszKbyrr,UgxCK8DSLpRl2ZWP6pp4AaABAg,"Ini bukan lgbt, ini menceritakan tentang salah...",1,2025-01-11T03:26:35Z
12,UgxCK8DSLpRl2ZWP6pp4AaABAg.ACvSDxWXifuAD8PrrtD5HV,UgxCK8DSLpRl2ZWP6pp4AaABAg,@Christyyyy-bt5ps guru gembul aja bilang ini ...,0,2025-01-11T03:35:10Z
15,UgyO59JOKmUo-5QjmLB4AaABAg.ACuX5XgWREnACuijG8AjqW,UgyO59JOKmUo-5QjmLB4AaABAg,Jelas sekali bapak nya ga ngerti lagunya artin...,0,2025-01-05T10:40:09Z
16,UgyO59JOKmUo-5QjmLB4AaABAg.ACuX5XgWREnACukZrgQyyQ,UgyO59JOKmUo-5QjmLB4AaABAg,"@Melvinbryanchiri iya si ya, emang liriknya aj...",1,2025-01-05T10:56:12Z
23,Ugwb_ySgHtkmV4N2rzt4AaABAg.ACuQo4KFnXKACuRacBYOFR,Ugwb_ySgHtkmV4N2rzt4AaABAg,Kali ini bakal dari komunitas atau pengikut da...,0,2025-01-05T08:01:41Z
...,...,...,...,...,...
22130,UgxCXJhXOWtorqLHqCZ4AaABAg.9nBq1cHMtM79nBrWchLTdX,UgxCXJhXOWtorqLHqCZ4AaABAg,New song dalam keterangannya di Twitter,0,2023-03-13T09:35:37Z
22138,UgxRImDNvvZLHAQpMdN4AaABAg.9nBoDtxPza09nCGKl_liwE,UgxRImDNvvZLHAQpMdN4AaABAg,Tebakan yang sangat akurat,0,2023-03-13T13:21:11Z
22144,UgwbfBy7tSP4XWpmQjx4AaABAg.9nBn3nSxK9l9nBqKItxfvS,UgwbfBy7tSP4XWpmQjx4AaABAg,@@AbdulSalam-xe2cq kenapa woy 😑😑,0,2023-03-13T09:25:12Z
22148,Ugwxd5VGdiMxfkC4Ck14AaABAg.9nBmZqUHwKH9nBqClC7kHb,Ugwxd5VGdiMxfkC4Ck14AaABAg,Dh lewat ngav kwkw,0,2023-03-13T09:24:10Z


- Sentiment analysis hanya dilakukan pada top_level_comments (tidak termasuk replies/balasan komentar), karena top_level_comments inilah yang ditujukan untuk videonya

## Japanese Characters

In [9]:
# Function to check for Japanese characters in a string
def contains_japanese(text):
    return bool(re.search(r"[\u3040-\u30FF\u4E00-\u9FFF]", text))

japanese_rows = comments[comments['Comment'].apply(contains_japanese)]
japanese_rows

Unnamed: 0,ID,Username,Comment,LikeCount,ReplyCount,Date
12371,Ugxh_GTZZhdvvVeI_514AaABAg,@triadamas,電影攝影非常好，聲音和視覺效果都是傑作,67,3,2023-03-14T15:13:58Z
5477,UgweRdTljtSvfhA7xfB4AaABAg,@UnikUmbra,Fakta nya Lagu ini mempunyai Versi aslinya asa...,15,0,2023-03-23T04:31:39Z
12019,Ugx0-rG-HWFNxwY09Id4AaABAg,@kiami26202,オリジナル曲だと思ったらおしべ🦋でした,13,0,2023-03-14T19:15:53Z
5305,Ugwp2gvUDMIEqpuQ1y94AaABAg,@angieminipin3536,インドネシア人はアラビアンナイト好きなの？,11,4,2023-03-24T12:10:43Z
6566,UgzLCcGNWBDIa2kdoPt4AaABAg,@nasikamin,おしべとめしべと夜の蝶々,8,1,2023-03-18T14:18:25Z
19280,UgzpphrivqoxRuua3_J4AaABAg,@menshiro777,nice video !! 最高でしたー。　やっぱりjkt48は最高です。,7,0,2023-03-13T14:48:47Z
3840,UgwZ4z6P1V5R-ygfmQd4AaABAg,@halleyhuang5700,Pertama denger malah di SNH48 夜蝶 （kupu malam)\...,5,0,2023-04-26T23:21:44Z
9774,UgxSVoAshXQ3S1L_KF94AaABAg,@Ginojhisusei,すごい,5,0,2023-03-15T11:45:32Z
18541,UgwZISpkQIgciPvs8_d4AaABAg,@can-cn9vs,マーシャ可愛い❤センターおめでとう㊗️,5,0,2023-03-13T16:36:07Z
2582,UgyXvpM7_QrF5WJubeF4AaABAg,@wataru541604,アラビアン風素敵です。,2,1,2023-09-14T00:40:06Z


In [10]:
len(japanese_rows)

15

- Terdapat 15 komentar yang mengandung huruf Jepang, bagian komentar berhuruf Jepang ini akan dihapus

In [11]:
# Function to clean text by removing non-ASCII characters and reducing multiple spaces
def clean_text(text):
    # Remove non-ASCII characters
    text = re.sub(r"[^\x00-\x7F]+", '', text)
    # Remove Japanese characters (Hiragana, Katakana, Kanji)
    text = re.sub(r"[\u3040-\u30FF\u4E00-\u9FFF]+", '', text)
    # Replace multiple spaces with a single space
    text = re.sub(r'\s+', ' ', text)
    # Strip leading and trailing spaces
    return text.strip()

comments['Comment_clean'] = comments['Comment'].apply(clean_text)
comments[['Comment', 'Comment_clean']].head(15)

Unnamed: 0,Comment,Comment_clean
13367,"Guys, lagu ini bukan tentang LGBT, tapi tentan...","Guys, lagu ini bukan tentang LGBT, tapi tentan..."
12665,Performance Videonya kaya memberitahu kita ten...,Performance Videonya kaya memberitahu kita ten...
20990,Satu persatu member diberikan kesempatan buat...,Satu persatu member diberikan kesempatan buat ...
16159,"fiks, kalau kedepan jkt48 release single MVnya...","fiks, kalau kedepan jkt48 release single MVnya..."
2359,Malam ini rahasia ya\nKamu tak boleh bilang si...,Malam ini rahasia ya Kamu tak boleh bilang sia...
6792,"Terlepas dari kontroversi yang ada, sejujurnya...","Terlepas dari kontroversi yang ada, sejujurnya..."
21119,Terlepas dari hate comen 18+. Jujur ini suatu ...,Terlepas dari hate comen 18+. Jujur ini suatu ...
5083,"Gila konsep MV nya keren banget, good job JKT48","Gila konsep MV nya keren banget, good job JKT48"
20771,Congrats JKT48 NEW ERA atas mini albumnya. JKT...,Congrats JKT48 NEW ERA atas mini albumnya. JKT...
1882,"buay yg blg lesbi itu salah besar ya, ini tuh ...","buay yg blg lesbi itu salah besar ya, ini tuh ..."


In [12]:
# Display rows where the 'Comment_clean' column is empty (NaN or empty string)
empty_comments = comments[comments['Comment_clean'].isna() | (comments['Comment_clean'].str.strip() == '')]
empty_comments[['Comment', 'Comment_clean']]

Unnamed: 0,Comment,Comment_clean
12371,電影攝影非常好，聲音和視覺效果都是傑作,
12019,オリジナル曲だと思ったらおしべ🦋でした,
5305,インドネシア人はアラビアンナイト好きなの？,
7201,❤,
6566,おしべとめしべと夜の蝶々,
...,...,...
21748,😳,
21871,🔥🔥🔥,
21936,❤❤❤❤❤,
21944,🔥🔥,


- Komentar yang hanya menggunakan huruf Jepang atau emoticon akan dihapus

In [13]:
comments.shape

(11855, 7)

In [14]:
comments = comments[comments['Comment_clean'].notna() & (comments['Comment_clean'] != '')]
comments.shape

(11650, 7)

## Long Comments

In [15]:
# Calculate the size of each comment and create a new column 'Comment_size'
comments = comments.copy()
comments['Comment_size'] = comments['Comment_clean'].apply(len)
comments[['Comment_clean', 'Comment_size']]

Unnamed: 0,Comment_clean,Comment_size
13367,"Guys, lagu ini bukan tentang LGBT, tapi tentan...",579
12665,Performance Videonya kaya memberitahu kita ten...,3178
20990,Satu persatu member diberikan kesempatan buat ...,174
16159,"fiks, kalau kedepan jkt48 release single MVnya...",203
2359,Malam ini rahasia ya Kamu tak boleh bilang sia...,1319
...,...,...
22086,"Apakah Shani jadi center lagi, ataukah dipanta...",72
22098,infokan,7
22142,Nitip,5
22150,ninggalin jejak,15


In [16]:
# Filter for rows where Comment_size is greater than 512
long_comments = comments[comments['Comment_size'] > 512]
len(long_comments)

118

- Terdapat 118 komentar yang akan dilabeli secara manual atau dengan bantuan genAI karena size nya melebihi limit yang bisa dihandle pre-trained model yang akan digunakan dalam data labelling

# Data Labelling

## Labelling with pre-trained BERT Model

In [None]:
# Load the model and tokenizer
pretrained = "mdhugol/indonesia-bert-sentiment-classification"
model = AutoModelForSequenceClassification.from_pretrained(pretrained)
tokenizer = AutoTokenizer.from_pretrained(pretrained)
sentiment_analysis = pipeline("sentiment-analysis", model=model, tokenizer=tokenizer)

# Label mapping
label_index = {'LABEL_0': 'positive', 'LABEL_1': 'neutral', 'LABEL_2': 'negative'}

# Function to analyze sentiment
def analyze_sentiment(text):
    # Skip analysis if text length is greater than 512 characters
    if len(text) > 512:
        return "", 0

    # Perform sentiment analysis for comments wit0
    # 0hin the limit
    result = sentiment_analysis(text)
    label = label_index[result[0]['label']]
    score = result[0]['score']
    
    return label, score

# Apply sentiment analysis with progress bar
tqdm.pandas(desc="Analyzing Sentiment")
comments[['Sentiment', 'Confidence']] = comments['Comment_clean'].progress_apply(analyze_sentiment).apply(pd.Series)


Device set to use cpu
Analyzing Sentiment: 100%|██████████| 11650/11650 [22:20<00:00,  8.69it/s]


In [18]:
comments[['Comment', 'Sentiment', 'Confidence']].head(10)

Unnamed: 0,Comment,Sentiment,Confidence
13367,"Guys, lagu ini bukan tentang LGBT, tapi tentan...",,0.0
12665,Performance Videonya kaya memberitahu kita ten...,,0.0
20990,Satu persatu member diberikan kesempatan buat...,positive,0.972226
16159,"fiks, kalau kedepan jkt48 release single MVnya...",positive,0.996596
2359,Malam ini rahasia ya\nKamu tak boleh bilang si...,,0.0
6792,"Terlepas dari kontroversi yang ada, sejujurnya...",,0.0
21119,Terlepas dari hate comen 18+. Jujur ini suatu ...,positive,0.994971
5083,"Gila konsep MV nya keren banget, good job JKT48",positive,0.996474
20771,Congrats JKT48 NEW ERA atas mini albumnya. JKT...,positive,0.574264
1882,"buay yg blg lesbi itu salah besar ya, ini tuh ...",,0.0


- Masih terdapat komentar dengan sentiment yang belum terisi (sengaja dilewati karena melebihi batas panjang karakter untuk pre-trained model yang digunakan)
- Sentiment yang masih kosong ini akan diisi secara manual

## Manual Labelling

In [19]:
# Function to label sentiment manually for long comments
def label_sentiment(long_comments):
    total_comments = len(long_comments)
    i = 1
    for index, row in long_comments.iterrows():
        text = row['Comment_clean']
        while True:
            # Clear the previous outputs
            clear_output(wait=False)
            
            # Display the current progress
            print(f"Labeling long comment {i}/{total_comments}:")
            display(HTML(f"<div style='white-space: pre-wrap;'>{text}</div>"))
            sentiment = input("Please enter 1 for positive, 0 for neutral, -1 for negative, then hit ENTER: ")
            
            # Convert input to integer and handle possible errors
            try:
                sentiment = int(sentiment)
                # Map input to label
                if sentiment == 1:
                    comments.at[index, 'Sentiment'] = "positive"
                    comments.at[index, 'Confidence'] = 0
                    i += 1
                    break
                elif sentiment == 0:
                    comments.at[index, 'Sentiment'] = "neutral"
                    comments.at[index, 'Confidence'] = 0
                    i += 1
                    break
                elif sentiment == -1:
                    comments.at[index, 'Sentiment'] = "negative"
                    comments.at[index, 'Confidence'] = 0
                    i += 1
                    break
                else:
                    print("Invalid input. Please enter 1, 0, or -1.")
            except ValueError:
                print("Invalid input. Please enter a numeric value (1, 0, or -1).")

# Call the labeling function for long comments
label_sentiment(long_comments)

Labeling long comment 118/118:


In [20]:
comments[['Comment', 'Sentiment', 'Confidence']].head(10)

Unnamed: 0,Comment,Sentiment,Confidence
13367,"Guys, lagu ini bukan tentang LGBT, tapi tentan...",neutral,0.0
12665,Performance Videonya kaya memberitahu kita ten...,neutral,0.0
20990,Satu persatu member diberikan kesempatan buat...,positive,0.972226
16159,"fiks, kalau kedepan jkt48 release single MVnya...",positive,0.996596
2359,Malam ini rahasia ya\nKamu tak boleh bilang si...,neutral,0.0
6792,"Terlepas dari kontroversi yang ada, sejujurnya...",neutral,0.0
21119,Terlepas dari hate comen 18+. Jujur ini suatu ...,positive,0.994971
5083,"Gila konsep MV nya keren banget, good job JKT48",positive,0.996474
20771,Congrats JKT48 NEW ERA atas mini albumnya. JKT...,positive,0.574264
1882,"buay yg blg lesbi itu salah besar ya, ini tuh ...",neutral,0.0


- Komentar dengan labelling manual ditandai dengan nilai confidence 0

In [21]:
comments[comments['Confidence']==0][['Comment', 'Sentiment']]

Unnamed: 0,Comment,Sentiment
13367,"Guys, lagu ini bukan tentang LGBT, tapi tentan...",neutral
12665,Performance Videonya kaya memberitahu kita ten...,neutral
2359,Malam ini rahasia ya\nKamu tak boleh bilang si...,neutral
6792,"Terlepas dari kontroversi yang ada, sejujurnya...",neutral
1882,"buay yg blg lesbi itu salah besar ya, ini tuh ...",neutral
...,...,...
216,Malam ini rahasia ya\nKamu tak boleh bilang si...,neutral
1040,Malam ini rahasia ya\nKamu tak boleh bilang si...,neutral
2012,Malam ini rahasia ya\nKamu tak boleh bilang si...,neutral
2033,Malam ini rahasia ya\nKamu tak boleh bilang si...,neutral


In [22]:
comments[comments['Sentiment'].isnull()]

Unnamed: 0,ID,Username,Comment,LikeCount,ReplyCount,Date,Comment_clean,Comment_size,Sentiment,Confidence


- Sudah tidak ada Sentiment yang kosong

## Check Labelling Result

In [23]:
# Check Comments with Positive Sentiment
positive_comments = comments[comments['Sentiment'] == 'neutral']
positive_comments[['Comment', 'Sentiment', 'Confidence']].head(10)

Unnamed: 0,Comment,Sentiment,Confidence
13367,"Guys, lagu ini bukan tentang LGBT, tapi tentan...",neutral,0.0
12665,Performance Videonya kaya memberitahu kita ten...,neutral,0.0
2359,Malam ini rahasia ya\nKamu tak boleh bilang si...,neutral,0.0
6792,"Terlepas dari kontroversi yang ada, sejujurnya...",neutral,0.0
1882,"buay yg blg lesbi itu salah besar ya, ini tuh ...",neutral,0.0
8562,11 tahun lebih mereka berkiprah di dunia musik...,neutral,0.0
15602,Ini lagu ttg pelajaran IPA. Benang sari (janta...,neutral,0.434662
19329,Three words for this new special performance i...,neutral,0.982986
8867,Si baju putih yang polos dan masih suci itu di...,neutral,0.0
21074,BRAVO JKT48 NEW ERA!!!,neutral,0.968161


In [24]:
# Check Comments with Neutral Sentiment
neutral_comments = comments[comments['Sentiment'] == 'neutral']
neutral_comments[['Comment', 'Sentiment', 'Confidence']].head(10)

Unnamed: 0,Comment,Sentiment,Confidence
13367,"Guys, lagu ini bukan tentang LGBT, tapi tentan...",neutral,0.0
12665,Performance Videonya kaya memberitahu kita ten...,neutral,0.0
2359,Malam ini rahasia ya\nKamu tak boleh bilang si...,neutral,0.0
6792,"Terlepas dari kontroversi yang ada, sejujurnya...",neutral,0.0
1882,"buay yg blg lesbi itu salah besar ya, ini tuh ...",neutral,0.0
8562,11 tahun lebih mereka berkiprah di dunia musik...,neutral,0.0
15602,Ini lagu ttg pelajaran IPA. Benang sari (janta...,neutral,0.434662
19329,Three words for this new special performance i...,neutral,0.982986
8867,Si baju putih yang polos dan masih suci itu di...,neutral,0.0
21074,BRAVO JKT48 NEW ERA!!!,neutral,0.968161


- Masih ada yang miss untuk sentiment neutral sperti komentar dengan index 19329, 21074, 12790, & 16379 seharusnya positive

In [25]:
# Check Comments with Negative Sentiment
negative_comments = comments[comments['Sentiment'] == 'negative']
negative_comments[['Comment', 'Sentiment', 'Confidence']].head(10)

Unnamed: 0,Comment,Sentiment,Confidence
15040,"Ini jujur, TERLALU INDAH. bagus banget JOT! Da...",negative,0.765288
11341,"Gila keren parah udah trending 1 aja, dan tren...",negative,0.981162
7176,Kalo dari judul dan lirik sendiri sebenarnya i...,negative,0.939781
17953,"Diluar semua kontroversi, big applause untuk a...",negative,0.775857
4627,Media bilang ini single baru padahal cuman spe...,negative,0.988137
5422,"Perbanyak mv masterpiece seperti ini , komplek...",negative,0.508472
7144,Makin lama makin di dengar makin bgus lebih masuk,negative,0.564185
18635,"Woaaahh kaget dong, keren banget ini mah. Sema...",negative,0.732071
6036,JKT48 KOK JADI KEREN GINI WOY???! DARI MANA SA...,negative,0.989798
9587,Ampunn JKT48 SEKARANG SEBAGUS INI 😍,negative,0.995161


- Masih banyak yang miss untuk sentiment negative sperti komentar dengan index 15040, 11341, 7176, 5422, 7144, 18635, 6036, & 9587 seharusnya positive

## Save to CSV

In [26]:
# Save the labeled dataset
output_path = file_path.replace(".csv", "_labeled.csv")
comments.to_csv(output_path, index=False)

print(f"Sentiment-labeled dataset saved to {output_path}")

Sentiment-labeled dataset saved to dataset/oshibe_spv_comments_2025-01-15_labeled.csv
