In [2]:
import pandas as pd

comments = pd.read_csv('mimiminu_comments.csv')

In [4]:
import re
import nltk
from nltk.sentiment.vader import SentimentIntensityAnalyzer

# 필요한 nltk 리소스 다운로드
nltk.download('stopwords')
nltk.download('wordnet')
nltk.download('vader_lexicon')
nltk.download('omw-1.4')

[nltk_data] Downloading package stopwords to
[nltk_data]     /data/ephemeral/home/nltk_data...
[nltk_data]   Unzipping corpora/stopwords.zip.
[nltk_data] Downloading package wordnet to
[nltk_data]     /data/ephemeral/home/nltk_data...
[nltk_data] Downloading package vader_lexicon to
[nltk_data]     /data/ephemeral/home/nltk_data...
[nltk_data] Downloading package omw-1.4 to
[nltk_data]     /data/ephemeral/home/nltk_data...


True

In [6]:
# 감정 분석을 위한 코드 구현
def preprocess(data):
    data['pre_comments'] = data['textDisplay'].str.replace("[^a-zA-Z#]", " ", regex=True)
    data['pre_comments'] = data['pre_comments'].map(lambda x : ' '.join([w for w in x.split() if len(w) > 3]))
    data['pre_comments'] = data['pre_comments'].map(str.lower)
    return data

def sentiment_helper(data):
    def classify(score):
        if score > 0:
            return 'Positive'
        elif score < 0:
            return 'Negative'
        else:
            return 'Neutral'
    sia = SentimentIntensityAnalyzer()
    data['sentiment score'] = data['pre_comments'].map(lambda x : sia.polarity_scores(x)['compound'])
    data['sentiment category'] = data['sentiment score'].map(classify)
    data.drop('pre_comments', axis=1, inplace=True)
    return data

# 데이터 전처리와 감정 분석 수행
processed_data = preprocess(comments)
sentiment_data = sentiment_helper(processed_data)

In [7]:
sentiment_data

Unnamed: 0,vId,cId,textDisplay,authorDisplayName,likeCount,commentPublishedAt,commentUpdatedAt,replyCount,replies,sentiment score,sentiment category
0,7O2zu7Ec924,UgwWeht33z-i43p7HRF4AaABAg,한글날을 경시하며 투바투 팬클럽의 존재 자체를 부정하고 지금도 어렵게 살아가시는 취...,@vincero9706,63616,2023-09-30T10:44:33Z,2023-09-30T11:34:42Z,240,"[['ㅅㅂ ㅋㅋㅋㅋㅋㅋㅋㅋㅋㅋㅋㅋ', '@nelllloom'], ['ㅋㅋㅋㅋㅋㅋㅋㅋ...",0.0,Neutral
1,7O2zu7Ec924,Ugwe843HfPFJH8rlr0N4AaABAg,"<a href=""https://www.youtube.com/watch?v=7O2zu...",@미미미누,31345,2023-09-30T13:45:16Z,2023-09-30T13:45:16Z,118,"[['개웃기네 ㅋㅋㅋㅋㅋㅋ', '@쮸마'], ['그렇지않습니다', '@이일-v3c'...",0.0,Neutral
2,7O2zu7Ec924,UgxA3lFktxhVpD_Ez4N4AaABAg,6수해서 피식대학이면 잘갔다 형,@곽민준-e6q,26435,2023-09-30T10:02:36Z,2023-09-30T10:02:36Z,41,"[['ㅋㅋㅋㅋㅋㅋㅋㅋㅋㅋㅋㅅㅂ 개웃기네', '@김지훈-f6u'], ['10점.. 1...",0.0,Neutral
3,7O2zu7Ec924,Ugzq3iKGhYE4RZ024Rt4AaABAg,대학은 수능을 5번이나 보시고 가셨지만<br>나락은 한 번에 가겠다는 미누님의 의지...,@alphamanager,23472,2023-09-30T10:11:12Z,2023-09-30T10:11:12Z,42,"[['나나나나나락', '@parkssword'], ['너무해 ㅋㅋㅋㅋㅋㅋㅋㅋㅋㅋㅋㅋ...",0.0,Neutral
4,7O2zu7Ec924,UgxHW3XmSa9K-6Gd7ud4AaABAg,아 나락퀴즈쇼가 개웃긴 게 피식대학 멤버들이 문제 틀려도 안타깝다는 기색 1도 없이...,@user-td7pl4zy2z,13396,2023-10-04T19:55:30Z,2023-10-04T19:55:30Z,6,[['ㅋㅋㅋ ㅋ ㅋ ㅋ ㅋㅋㅋ ㅋ ㅋㅋ ㅌ ㅌ ㅌ ㅌ ㅌ ㅌ ㅌ ㅌㅌ ㅇㅇ ㅌㅌ...,0.0,Neutral
...,...,...,...,...,...,...,...,...,...,...,...
295,7O2zu7Ec924,Ugx-q93VBueGB_4cSX94AaABAg,이사람 반응이 제일 재밌네 연기좋다,@sjshdheixhakaj18e7,3,2024-03-21T22:21:04Z,2024-03-21T22:21:04Z,0,,0.0,Neutral
296,7O2zu7Ec924,Ugw-UTULsRA_rRyEYr94AaABAg,안경을 쓰고 지식을 쌓은 사람은 독립 운동보다는 친일을 택한다는 미3누님 항상 응원...,@HAD-HaveADrink,3,2023-11-18T18:35:34Z,2023-11-18T18:35:34Z,0,,0.0,Neutral
297,7O2zu7Ec924,Ugx61tNf7gimca32VDh4AaABAg,"<a href=""https://www.youtube.com/watch?v=7O2zu...",@lets_toast,2,2023-10-07T02:34:32Z,2023-10-07T02:34:42Z,0,,0.0,Neutral
298,7O2zu7Ec924,UgwLl4crZ7s4BqH7rsJ4AaABAg,그동안 구독 안했었는데 나락퀴즈쇼에 GG 치고 구독 박았다 ㅋㅋㅋㅋㅋㅋㅋㅋㅋㅋㅋㅋㅋ...,@eternalwinner911,2,2023-10-10T10:36:53Z,2023-10-10T10:36:53Z,0,,0.0,Neutral


In [12]:
sentiment_data[sentiment_data['sentiment category']=='Neutral']

Unnamed: 0,vId,cId,textDisplay,authorDisplayName,likeCount,commentPublishedAt,commentUpdatedAt,replyCount,replies,sentiment score,sentiment category
0,7O2zu7Ec924,UgwWeht33z-i43p7HRF4AaABAg,한글날을 경시하며 투바투 팬클럽의 존재 자체를 부정하고 지금도 어렵게 살아가시는 취...,@vincero9706,63616,2023-09-30T10:44:33Z,2023-09-30T11:34:42Z,240,"[['ㅅㅂ ㅋㅋㅋㅋㅋㅋㅋㅋㅋㅋㅋㅋ', '@nelllloom'], ['ㅋㅋㅋㅋㅋㅋㅋㅋ...",0.0,Neutral
1,7O2zu7Ec924,Ugwe843HfPFJH8rlr0N4AaABAg,"<a href=""https://www.youtube.com/watch?v=7O2zu...",@미미미누,31345,2023-09-30T13:45:16Z,2023-09-30T13:45:16Z,118,"[['개웃기네 ㅋㅋㅋㅋㅋㅋ', '@쮸마'], ['그렇지않습니다', '@이일-v3c'...",0.0,Neutral
2,7O2zu7Ec924,UgxA3lFktxhVpD_Ez4N4AaABAg,6수해서 피식대학이면 잘갔다 형,@곽민준-e6q,26435,2023-09-30T10:02:36Z,2023-09-30T10:02:36Z,41,"[['ㅋㅋㅋㅋㅋㅋㅋㅋㅋㅋㅋㅅㅂ 개웃기네', '@김지훈-f6u'], ['10점.. 1...",0.0,Neutral
3,7O2zu7Ec924,Ugzq3iKGhYE4RZ024Rt4AaABAg,대학은 수능을 5번이나 보시고 가셨지만<br>나락은 한 번에 가겠다는 미누님의 의지...,@alphamanager,23472,2023-09-30T10:11:12Z,2023-09-30T10:11:12Z,42,"[['나나나나나락', '@parkssword'], ['너무해 ㅋㅋㅋㅋㅋㅋㅋㅋㅋㅋㅋㅋ...",0.0,Neutral
4,7O2zu7Ec924,UgxHW3XmSa9K-6Gd7ud4AaABAg,아 나락퀴즈쇼가 개웃긴 게 피식대학 멤버들이 문제 틀려도 안타깝다는 기색 1도 없이...,@user-td7pl4zy2z,13396,2023-10-04T19:55:30Z,2023-10-04T19:55:30Z,6,[['ㅋㅋㅋ ㅋ ㅋ ㅋ ㅋㅋㅋ ㅋ ㅋㅋ ㅌ ㅌ ㅌ ㅌ ㅌ ㅌ ㅌ ㅌㅌ ㅇㅇ ㅌㅌ...,0.0,Neutral
...,...,...,...,...,...,...,...,...,...,...,...
295,7O2zu7Ec924,Ugx-q93VBueGB_4cSX94AaABAg,이사람 반응이 제일 재밌네 연기좋다,@sjshdheixhakaj18e7,3,2024-03-21T22:21:04Z,2024-03-21T22:21:04Z,0,,0.0,Neutral
296,7O2zu7Ec924,Ugw-UTULsRA_rRyEYr94AaABAg,안경을 쓰고 지식을 쌓은 사람은 독립 운동보다는 친일을 택한다는 미3누님 항상 응원...,@HAD-HaveADrink,3,2023-11-18T18:35:34Z,2023-11-18T18:35:34Z,0,,0.0,Neutral
297,7O2zu7Ec924,Ugx61tNf7gimca32VDh4AaABAg,"<a href=""https://www.youtube.com/watch?v=7O2zu...",@lets_toast,2,2023-10-07T02:34:32Z,2023-10-07T02:34:42Z,0,,0.0,Neutral
298,7O2zu7Ec924,UgwLl4crZ7s4BqH7rsJ4AaABAg,그동안 구독 안했었는데 나락퀴즈쇼에 GG 치고 구독 박았다 ㅋㅋㅋㅋㅋㅋㅋㅋㅋㅋㅋㅋㅋ...,@eternalwinner911,2,2023-10-10T10:36:53Z,2023-10-10T10:36:53Z,0,,0.0,Neutral


In [8]:
import torch
from transformers import BertTokenizer, BertForSequenceClassification
import torch.nn.functional as F

model_name = "kykim/bert-kor-base"
model = BertForSequenceClassification.from_pretrained(model_name, num_labels=2)
tokenizer = BertTokenizer.from_pretrained(model_name)

Some weights of BertForSequenceClassification were not initialized from the model checkpoint at kykim/bert-kor-base and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


In [14]:
def classify_emotion(text):
    # 텍스트 토큰화 및 패딩
    tokens = tokenizer(text, padding=True, truncation=True, return_tensors="pt")

    # 예측 수행
    with torch.no_grad():
        prediction = model(**tokens)

    # 예측 결과를 바탕으로 감정 분석
    prediction = F.softmax(prediction.logits, dim=1)
    output = prediction.argmax(dim=1).item()
    return output  # 긍정적 혹은 부정적 감정의 인덱스를 반환

def process_comments(file_path):
    # CSV 파일 읽기
    data = pd.read_csv(file_path)
    
    # 감정 분석 수행
    labels = ["부정적", "긍정적"]
    data['emotion'] = data['textDisplay'].apply(lambda x: labels[classify_emotion(x)])
    
    # 결과 저장
    data.to_csv('updated_file.csv', index=False)
    print("분석 완료: 'updated_file.csv'에 결과가 저장되었습니다.")

# 파일 경로
file_path = 'mimiminu_comments.csv'  # 실제 파일 경로로 변경 필요
process_comments(file_path)

분석 완료: 'updated_file.csv'에 결과가 저장되었습니다.


In [16]:
mmmn = pd.read_csv('updated_file.csv')
mmmn[mmmn['emotion']=='긍정적']

Unnamed: 0,vId,cId,textDisplay,authorDisplayName,likeCount,commentPublishedAt,commentUpdatedAt,replyCount,replies,emotion
0,7O2zu7Ec924,UgwWeht33z-i43p7HRF4AaABAg,한글날을 경시하며 투바투 팬클럽의 존재 자체를 부정하고 지금도 어렵게 살아가시는 취...,@vincero9706,63616,2023-09-30T10:44:33Z,2023-09-30T11:34:42Z,240,"[['ㅅㅂ ㅋㅋㅋㅋㅋㅋㅋㅋㅋㅋㅋㅋ', '@nelllloom'], ['ㅋㅋㅋㅋㅋㅋㅋㅋ...",긍정적
1,7O2zu7Ec924,Ugwe843HfPFJH8rlr0N4AaABAg,"<a href=""https://www.youtube.com/watch?v=7O2zu...",@미미미누,31345,2023-09-30T13:45:16Z,2023-09-30T13:45:16Z,118,"[['개웃기네 ㅋㅋㅋㅋㅋㅋ', '@쮸마'], ['그렇지않습니다', '@이일-v3c'...",긍정적
2,7O2zu7Ec924,UgxA3lFktxhVpD_Ez4N4AaABAg,6수해서 피식대학이면 잘갔다 형,@곽민준-e6q,26435,2023-09-30T10:02:36Z,2023-09-30T10:02:36Z,41,"[['ㅋㅋㅋㅋㅋㅋㅋㅋㅋㅋㅋㅅㅂ 개웃기네', '@김지훈-f6u'], ['10점.. 1...",긍정적
3,7O2zu7Ec924,Ugzq3iKGhYE4RZ024Rt4AaABAg,대학은 수능을 5번이나 보시고 가셨지만<br>나락은 한 번에 가겠다는 미누님의 의지...,@alphamanager,23472,2023-09-30T10:11:12Z,2023-09-30T10:11:12Z,42,"[['나나나나나락', '@parkssword'], ['너무해 ㅋㅋㅋㅋㅋㅋㅋㅋㅋㅋㅋㅋ...",긍정적
4,7O2zu7Ec924,UgxHW3XmSa9K-6Gd7ud4AaABAg,아 나락퀴즈쇼가 개웃긴 게 피식대학 멤버들이 문제 틀려도 안타깝다는 기색 1도 없이...,@user-td7pl4zy2z,13396,2023-10-04T19:55:30Z,2023-10-04T19:55:30Z,6,[['ㅋㅋㅋ ㅋ ㅋ ㅋ ㅋㅋㅋ ㅋ ㅋㅋ ㅌ ㅌ ㅌ ㅌ ㅌ ㅌ ㅌ ㅌㅌ ㅇㅇ ㅌㅌ...,긍정적
...,...,...,...,...,...,...,...,...,...,...
294,7O2zu7Ec924,Ugy4ZaiNLm2_odpkXqR4AaABAg,나락쿠ㅏ즈쇼 궤도 햐주세요 ㅜㅜ 개재밋을듯,@singaloneppoya,4,2023-10-22T00:56:25Z,2023-10-22T00:56:25Z,0,,긍정적
296,7O2zu7Ec924,Ugw-UTULsRA_rRyEYr94AaABAg,안경을 쓰고 지식을 쌓은 사람은 독립 운동보다는 친일을 택한다는 미3누님 항상 응원...,@HAD-HaveADrink,3,2023-11-18T18:35:34Z,2023-11-18T18:35:34Z,0,,긍정적
297,7O2zu7Ec924,Ugx61tNf7gimca32VDh4AaABAg,"<a href=""https://www.youtube.com/watch?v=7O2zu...",@lets_toast,2,2023-10-07T02:34:32Z,2023-10-07T02:34:42Z,0,,긍정적
298,7O2zu7Ec924,UgwLl4crZ7s4BqH7rsJ4AaABAg,그동안 구독 안했었는데 나락퀴즈쇼에 GG 치고 구독 박았다 ㅋㅋㅋㅋㅋㅋㅋㅋㅋㅋㅋㅋㅋ...,@eternalwinner911,2,2023-10-10T10:36:53Z,2023-10-10T10:36:53Z,0,,긍정적


In [17]:
mmmn[mmmn['emotion']=='부정적']

Unnamed: 0,vId,cId,textDisplay,authorDisplayName,likeCount,commentPublishedAt,commentUpdatedAt,replyCount,replies,emotion
6,7O2zu7Ec924,Ugw92YkXw2mjcqo9NRx4AaABAg,고려대 잘봤습니다.,@kwakyoongy,9744,2023-09-30T11:30:24Z,2023-09-30T11:30:24Z,45,"[['미쳤네,,,,', '@김재영-q9e7r'], ['찐이네?', '@준-h3w']...",부정적
13,7O2zu7Ec924,UgyYyNTM2KSjRGc1MoV4AaABAg,전한길 선생님 모시고 진행해도 재밌을 것 같네요. 한국사 관련 문제는 다 맞추고 안...,@sbd1000,6702,2023-10-31T02:44:57Z,2023-10-31T02:44:57Z,16,"[['그분은 리스크가 너무크다', '@활발하고계산적'], ['@@활발하고계산적ㅅㅂ ...",부정적
28,7O2zu7Ec924,Ugy2d3-a7Ttq1UDERcB4AaABAg,"<a href=""https://www.youtube.com/watch?v=7O2zu...",@Bana0._.,2968,2023-10-04T02:42:33Z,2023-10-04T02:43:35Z,11,"[['채널매니저 반응 중간중간 넣어놓은거 개웃기넼ㅋㅋㅋㅋㅋㅋㅋㅋㅋㅋㅋㅋㅋ', '@d...",부정적
34,7O2zu7Ec924,Ugw-hKCtEnXvi9XxjEt4AaABAg,아니 진짜 이컨텐츠 기획한사람 상줘야된다. 어떻게 이런 기발한발상을 할 수가 있지...,@안노옹-t2w,2344,2023-10-01T02:40:48Z,2023-10-01T02:40:48Z,2,"[['개콘에서 하긴했었음 근데 너무 좌편향적이라 욕을 많이 먹었지', '@chaps...",부정적
36,7O2zu7Ec924,UgzbIBeiECy_oH74gfJ4AaABAg,"<a href=""https://www.youtube.com/watch?v=7O2zu...",@handsomeajaekiller,2125,2023-09-30T12:09:58Z,2023-10-02T13:22:57Z,6,"[['빵ㅡ긋', '@퓰레-x7h'], ['표정 ㄹㅇ 아기 원숭이같음', '@은채-y...",부정적
44,7O2zu7Ec924,Ugw6lBWdxfJZkPw8RHd4AaABAg,보너스 문제 보고 나도 같이 뿜음ㅋㅋㅋ 창의력 대단하다 피식대학,@sayingE,961,2023-10-01T12:19:38Z,2023-10-01T12:19:38Z,0,,부정적
50,7O2zu7Ec924,Ugy8_vasPPXzKeoo2OF4AaABAg,잼버리 문제는 진짜 개미쳤닼ㅋㅋㅋㅋ,@star._____.,788,2023-09-30T10:33:56Z,2023-09-30T10:33:56Z,0,,부정적
60,7O2zu7Ec924,UgxxEoFlwpzbKw4nqBZ4AaABAg,잠깐.. 내가 뭘 본거지?,@Dragonstone0612,580,2023-09-30T09:07:30Z,2023-09-30T09:07:30Z,0,,부정적
63,7O2zu7Ec924,Ugx9EicIy3iqudk5UcN4AaABAg,미미미누 편입 폼 미쳤다이~,@관계자입니다,472,2023-09-30T10:03:49Z,2023-09-30T10:03:49Z,2,"[['@FreemanJohnsonbecker 너검마임?', '@Breadcan_']...",부정적
69,7O2zu7Ec924,UgzkevGhJ4k_5R7XUzJ4AaABAg,주입식 교육의 폐해를 보여주신 미3누님 감사합니다,@Dododohh__,374,2023-10-02T05:52:28Z,2023-10-02T05:52:28Z,0,,부정적
