In [1]:
import requests
import json
from bs4 import BeautifulSoup
import os
# 헤더 설정
HEADERS = {
    'user-agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/127.0.0.0 Safari/537.36',
    'referer': 'https://n.news.naver.com/mnews/article/comment/032/0003312684'
}
# 기사 제목 및 본문 추출 함수
def fetch_article_data(article_url):
    response = requests.get(article_url, headers=HEADERS)
    soup = BeautifulSoup(response.text, 'html.parser')
    title = soup.select_one('#title_area span').text.strip()
    body = soup.select_one('article#dic_area').text.strip()
    return title, body
# 리액션 데이터 추출 함수
def fetch_reactions(reaction_url):
    response = requests.get(reaction_url)
    reactions = response.json()['contents'][0]['reactions']
    return {react['reactionType']: react['count'] for react in reactions}
# 댓글 데이터 추출 함수
def fetch_comments(comment_url):
    response = requests.get(comment_url, headers=HEADERS)
    cleaned_response = response.text.replace('_callback(', '')[:-2]
    comments = json.loads(cleaned_response)['result']['commentList']
    return [comment['contents'] for comment in comments if comment['contents']]
# JSON 파일 저장 함수
def save_to_json(data, filename):
    # Jupyter Notebook에서 현재 작업 디렉토리를 얻는 방식으로 수정
    current_dir = os.getcwd()
    file_path = os.path.join(current_dir, filename)
    with open(file_path, 'w', encoding='utf-8') as file:
        json.dump(data, file, ensure_ascii=False, indent=4)
# URL 설정
ARTICLE_URL = 'https://n.news.naver.com/mnews/article/032/0003312694'
REACTION_URL = 'https://news.like.naver.com/v1/search/contents?suppress_response_codes=true&q=NEWS%5Bne_032_0003312694%5D&isDuplication=false&cssIds=MULTI_MOBILE%2CNEWS_MOBILE&_=1722702074749'
COMMENT_URL = 'https://apis.naver.com/commentBox/cbox/web_naver_list_jsonp.json?ticket=news&templateId=default_world&pool=cbox5&_cv=20241217130331&lang=ko&country=KR&objectId=news032%2C0003312694&categoryId=&pageSize=20&indexSize=10&groupId=&listType=OBJECT&pageType=more&page=1&initialize=true&followSize=5&userType=&useAltSort=true&replyPageSize=20&sort=FAVORITE&_=1735135173645'
# 데이터 수집
title, body = fetch_article_data(ARTICLE_URL)
reactions = fetch_reactions(REACTION_URL)
comments = fetch_comments(COMMENT_URL)
# 뉴스 데이터 구조 생성
news_data = {
    'title': title,
    'body': body,
    'reactions': reactions,
    'comments': comments
}
# JSON 파일로 저장

save_to_json(news_data, 'news_data.json')