In [2]:
from bs4 import BeautifulSoup
import requests
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
import warnings
from datetime import datetime, timedelta
import openpyxl  # openpyxl 모듈 추가
import re

sns.set_style('darkgrid')
sns.set_palette('Set3')
warnings.filterwarnings('ignore')
plt.rcParams["font.family"] = "NanumBarunGothic"


In [3]:

# User-Agent 헤더
headers = {
    'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/58.0.3029.110 Safari/537.3'
}

# 시작일과 종료일 설정
start_day = datetime(2024, 4, 29)
end_day = datetime(2024, 5, 10)

# 시작일과 종료일 사이의 주 count 계산
total_weeks = ((end_day - start_day).days // 7) + 1

# 엑셀 파일 생성
wb = openpyxl.Workbook()
ws = wb.active
ws.append(["Day", "Rank", "Title", "Album_img_urls", "album_Code", "Singer", "Release_date", "Genre_Text", "view_count", "click_Url"])

# 주 단위로 날짜 출력
current_day = start_day
print(f"시작일 : {start_day}, 종료일 : {end_day}")
Week_Count = 0

while current_day <= end_day:
    # 주의 시작일과 끝일을 계산
    week_start = current_day.strftime("%Y%m%d")
    week_end = (current_day + timedelta(days=6)).strftime("%Y%m%d")
    current_day += timedelta(weeks=1)

    response = requests.get(
        "https://www.melon.com/chart/week/index.htm?classCd=GN0000&moved=Y&startDay={0}&endDay={1}".format(
            week_start, week_end), headers=headers)

    URL = "https://www.melon.com/chart/week/index.htm?classCd=GN0000&moved=Y&startDay={0}&endDay={1}".format(
        week_start, week_end)

    print(URL)
    html = response.content.decode('utf-8', 'replace')
    soup = BeautifulSoup(html, 'html.parser', from_encoding='cp949')

    my_st = soup.select('#frm > div > table > tbody')

    for stock_name in my_st:
        # 앨범 코드, 랭킹, 제목, 앨범 이미지 가져오기
        Album_codes = [tag['data-song-no'] for tag in stock_name.find_all('tr') if
                       'class' in tag.attrs and ('lst50' in tag['class'] or 'lst100' in tag['class'])]
        Rank = stock_name.select("td:nth-child(2) > div > span.rank")
        Rank_Num = [int(Rank.text) for Rank in Rank]
        Title = stock_name.select("td:nth-child(6) > div > div > div.ellipsis.rank01 > span > a")
        Title_Text = [Title.text for Title in Title]
        Album_img_urls = [img['src'].replace("/resize/120/quality/80/", "/resize/240/quality/160/")  for img in stock_name.select("td:nth-child(4) > div > a > img")]
        # 각 앨범 코드에 대해 상세 정보 가져오기
        for album_code, rank, title_Text, album_img_url in zip(Album_codes, Rank_Num, Title_Text, Album_img_urls):
            response_detail = requests.get("https://www.melon.com/song/detail.htm?songId={0}".format(album_code), headers=headers)
            html_detail = response_detail.content.decode('utf-8','replace')
            soup_detail = BeautifulSoup(html_detail, 'html.parser',from_encoding='cp949')

            # 발매일, 장르, 가수 가져오기
            Release = soup_detail.select("#downloadfrm > div > div > div.entry > div.meta > dl > dd:nth-child(4)")
            Release_date = Release[0].text.strip() if Release else ""
            Genre = soup_detail.select("#downloadfrm > div > div > div.entry > div.meta > dl > dd:nth-child(6)")
            Genre_Text = Genre[0].text.strip() if Genre else ""
            Singer_name = soup_detail.select("#downloadfrm > div > div > div.entry > div.info > div.artist > a > span:nth-child(1)")
            Singer = Singer_name[0].text.strip() if Singer_name else ""

            # 결과를 엑셀에 추가
            ws.append([week_start, rank, title_Text, album_img_url, album_code, Singer, Release_date, Genre_Text, "", "https://www.melon.com/song/detail.htm?songId={0}".format(album_code)])

        # 주별 카운트 확인
        Week_Count += 1
        print(f"Day_Count : {Week_Count}, Total_Count : {total_weeks}")

# 엑셀 파일 저장
file_path = r"C:\P_Project\1.Project\5-Project_data\1.Melon_Total.xlsx"
wb.save(file_path)
print("Excel 파일이 저장되었습니다:", file_path)


시작일 : 2024-04-29 00:00:00, 종료일 : 2024-05-10 00:00:00
https://www.melon.com/chart/week/index.htm?classCd=GN0000&moved=Y&startDay=20240429&endDay=20240505
Day_Count : 1, Total_Count : 2
https://www.melon.com/chart/week/index.htm?classCd=GN0000&moved=Y&startDay=20240506&endDay=20240512
Day_Count : 2, Total_Count : 2
Excel 파일이 저장되었습니다: C:\P_Project\1.Project\5-Project_data\1.Melon_Total.xlsx
