In [1]:
from bs4 import BeautifulSoup
import requests
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
import warnings
from datetime import datetime, timedelta
import openpyxl  # openpyxl 모듈 추가
import re
from selenium import webdriver
from selenium.webdriver.chrome.options import Options
import time
import json

sns.set_style('darkgrid')
sns.set_palette('Set3')
warnings.filterwarnings('ignore')
plt.rcParams["font.family"] = "NanumBarunGothic"


In [2]:
# 시작일과 종료일 설정
start_day = datetime(2024, 4, 22)
end_day = datetime(2024, 4, 29)
Count_end_day = end_day.strftime("%Y%m%d")

# 시작일과 종료일 사이의 주count 계산
total_weeks = ((end_day - start_day).days // 7)

# 엑셀 파일 생성
wb = openpyxl.Workbook() 
ws = wb.active
ws.append(["Day", "Rank", "Title_Name", "Cover_Img", "Singer","View", "video_url"])

# 주 단위로 날짜 출력
current_day = start_day
print(f"시작일 : {start_day}, 종료일 : {end_day}")
Week_Count = 0

while current_day <= end_day:
    week_start = current_day.strftime("%Y%m%d")
    week_end = (current_day + timedelta(days=6)).strftime("%Y%m%d")
    # print(f"{week_start}~{week_end}")
    current_day += timedelta(weeks=1)
    
    date =  (current_day + timedelta(days=-4)).strftime("%Y%m%d")
    # URL 뒤에 들어갈 날짜
   
   
    # Chrome 브라우저를 띄우지 않고 실행하는 옵션 설정
    chrome_options = Options()
    chrome_options.add_argument("--headless")  # 브라우저를 화면에 표시하지 않음

    # Chrome 드라이버 생성
    driver = webdriver.Chrome(options=chrome_options)

    url = "https://charts.youtube.com/charts/TopSongs/kr/weekly/{0}".format(date)
    driver.get(url)
    print(url)
    print(f"Count : {date} / End_Count : {Count_end_day}")

    # 페이지가 로드될 때까지 잠시 대기
    time.sleep(5)

    # 페이지 소스 가져오기
    html = driver.page_source

    # BeautifulSoup을 사용하여 HTML 파싱
    soup = BeautifulSoup(html, 'html.parser')

    # 데이터를 담을 리스트 생성
    data_list = []

    # 각 아이템에 대한 정보를 가져와서 리스트에 추가
    for item in soup.select('.data-table-container.style-scope.ytmc-entry-row'):
        Ranking = item.select_one('.rank-container.center.style-scope.ytmc-entry-row #rank').text.strip()
        Cover_img = item.select_one('.thumbnail-container.center.style-scope.ytmc-entry-row img').get('src')
        Title = item.select_one('.title.style-scope.ytmc-entry-row').text.strip() if item.select_one('.title.style-scope.ytmc-entry-row') else ""
        Artist_name = item.select_one('.subtitle.style-scope.ytmc-entry-row span.style-scope.ytmc-entry-row').text.strip()
        # views를 가져오는 부분 수정
        views_elements = item.select('.metric.content.center.tablet-non-displayed-metric.style-scope.ytmc-entry-row')
        Views = views_elements[1].text.strip() if len(views_elements) >= 2 else ""
        # 이미지 태그를 가져옴
        Video_url = item.select_one('.thumbnail-container.center.style-scope.ytmc-entry-row img').get('endpoint')
        
        for line in Video_url.split('\n'):
            if line.strip():
                # JSON 형식의 문자열을 파이썬 객체로 변환
                data = json.loads(line)
                # URL을 출력
                Video_URL = data['urlEndpoint']['url']
        data_list.append({
            "날짜 ": (current_day + timedelta(days=-5)).strftime("%Y%m%d"),
            "랭킹 ": Ranking,
            "제목 ": Title,
            "Cover_Img": Cover_img,
            "가수 ": Artist_name,
            "조회수 ": Views,
            "영상URL" : Video_URL
        })
        for ranking, title, cover_img, artist_name, view, video_URL in zip(Ranking, Title, Cover_img, Artist_name, Views, Video_URL):
            ws.append([(current_day + timedelta(days=-5)).strftime("%Y%m%d"), Ranking, Title, Cover_img, Artist_name, Views, Video_URL])
      
    # 결과 출력
    for data in data_list:
        #print(data)
        print(data)

    # 브라우저 종료
    driver.quit()



# 엑셀 파일 저장
file_path = r"C:\P_Project\1.Project\5-Project_data\6.Youtube_Crawling_Final4.29.xlsx"
wb.save(file_path)
print("Excel 파일이 저장되었습니다:", file_path)

시작일 : 2024-04-22 00:00:00, 종료일 : 2024-04-29 00:00:00
https://charts.youtube.com/charts/TopSongs/kr/weekly/20240425
Count : 20240425 / End_Count : 20240429
{'날짜 ': '20240424', '랭킹 ': '1', '제목 ': 'Magnetic', 'Cover_Img': 'https://lh3.googleusercontent.com/vSYEILsfvGwSZU3PuZPU15DX0FRHcWbDnei-30-ZKQmg5XM9QKlL2BpwF9Obuvls9H3YEAMaqcusetE=w180-h180-l90-rj', '가수 ': 'ILLIT', '조회수 ': '4,977,916', '영상URL': 'https://www.youtube.com/watch?v=Vk5-c_v4gMU'}
{'날짜 ': '20240424', '랭킹 ': '2', '제목 ': 'T.B.H', 'Cover_Img': 'https://lh3.googleusercontent.com/CSoziUkkEKh5-wM2PFXXhJJzpalvskD2Lrv9GZ9PWFbePzvEGBqX1gjntXCnbHem0F4OHEbs9LY2d2He-A=w180-h180-l90-rj', '가수 ': 'QWER', '조회수 ': '4,437,327', '영상URL': 'https://www.youtube.com/watch?v=ImuWa3SJulY'}
{'날짜 ': '20240424', '랭킹 ': '3', '제목 ': 'Fate', 'Cover_Img': 'https://lh3.googleusercontent.com/-d5I1nv7x56NNuD3nbMZDRG874qNwua2usmDnVM35VGwHbLlLw8ozDMUrClz7f0lnSbG40L_dAF75EU8=w180-h180-l90-rj', '가수 ': '(여자)아이들', '조회수 ': '3,579,715', '영상URL': 'https://www.youtube.