In [4]:
import requests
from bs4 import BeautifulSoup
from transformers import AutoTokenizer, AutoModelForSequenceClassification, pipeline
import pandas as pd
import matplotlib.pyplot as plt

In [5]:
# 대상 URL (차트 마이너 갤러리 예시)
BASE_URL = "https://gall.dcinside.com/mgallery/board/lists/?id=chartanalysis"

# 게시글 텍스트 수집 함수
def get_posts(url, pages=2):
    texts = []
    for page in range(1, pages + 1):
        res = requests.get(f"{url}&page={page}")
        soup = BeautifulSoup(res.text, 'html.parser')
        titles = soup.select("td.gall_tit.ub-word a")
        for title in titles:
            if title.text.strip():
                texts.append(title.text.strip())
    return texts

# 감정 분석 모델 로딩
model_name = "klue/roberta-base"
tokenizer = AutoTokenizer.from_pretrained(model_name)
model = AutoModelForSequenceClassification.from_pretrained(model_name, num_labels=3)  # 예: 긍정/중립/부정
classifier = pipeline("text-classification", model=model, tokenizer=tokenizer)

# 게시글 수집
posts = get_posts(BASE_URL, pages=3)  # 최근 3페이지 수집

# 감정 분석 수행
results = [classifier(post[:512])[0] for post in posts]
df = pd.DataFrame(results)

To support symlinks on Windows, you either need to activate Developer Mode or to run Python as an administrator. In order to activate developer mode, see this article: https://docs.microsoft.com/en-us/windows/apps/get-started/enable-your-device-for-development


ImportError: 
AutoModelForSequenceClassification requires the PyTorch library but it was not found in your environment. Check out the instructions on the
installation page: https://pytorch.org/get-started/locally/ and follow the ones that match your environment.
Please note that you may need to restart your runtime after installation.


In [None]:
# 결과 시각화
df['label'].value_counts().plot(kind='bar', color='lightgreen')
plt.title("DCInside 차트 갤러리 감정 분포")
plt.xlabel("감정")
plt.ylabel("게시글 수")
plt.tight_layout()
plt.show()