In [None]:
from pytube import Playlist
import csv
from datetime import datetime
import time
import logging

# 로깅 설정
logging.basicConfig(level=logging.INFO, format='%(asctime)s - %(levelname)s - %(message)s')

pyconkr_playlist = {
    2023: "https://www.youtube.com/watch?v=dJlX0i-q4ck&list=PLZPhyNeJvHRllQiXsJAryqWmqWrwFxY8I",
    2022: "https://www.youtube.com/watch?v=5NjMaxYQuIc&list=PLZPhyNeJvHRnlqQwMj-WNlrsac7yTiVhk",
    2021: "",
    2020: "https://www.youtube.com/watch?v=xs66jubM88k&list=PLZPhyNeJvHRk9wIL9rZekFLIfT3aVcHT7",
    2019: "https://www.youtube.com/watch?v=b2BFxbkXkKY&list=PLZPhyNeJvHRlECdmkJ7M8konKB0NhBfve",
    2018: "https://www.youtube.com/watch?v=9lNN9wMD-60&list=PLZPhyNeJvHRmnMr5yucZ9Eu-yVhjRRsOM",
    2017: "https://www.youtube.com/watch?v=MmIxahj9vnY&list=PLZPhyNeJvHRmvCnWMBZJiFXu9kDUcn5FG",
    2016: "https://www.youtube.com/watch?v=UWDRX4z4-k0&list=PLZPhyNeJvHRnSJ2sAnqCGFnVRKo98EgCp",
    2015: "https://www.youtube.com/watch?v=0abmVNlkxRo&list=PLZPhyNeJvHRnoO_m1hH78j0JRj8LgUICN",
    2014: "https://www.youtube.com/watch?v=JGkfzWhVvKk&list=PLZPhyNeJvHRnchPDpnFV1uUmLhR_JG3A8",
}

def get_playlist_videos(url, max_retries=3):
    if not url:
        return []
    
    videos = []
    retries = 0
    
    while retries < max_retries:
        try:
            playlist = Playlist(url)
            for video_url in playlist.video_urls:
                try:
                    videos.append({
                        "title": playlist.title,
                        "url": video_url
                    })
                except Exception as e:
                    logging.error(f"Error processing video {video_url}: {str(e)}")
            return videos
        except Exception as e:
            logging.warning(f"Error fetching playlist {url}: {str(e)}")
            retries += 1
            time.sleep(5)  # 재시도 전 5초 대기
    
    logging.error(f"Failed to fetch playlist {url} after {max_retries} attempts")
    return []

all_videos = []

for year, url in pyconkr_playlist.items():
    logging.info(f"Collecting videos for {year}...")
    videos = get_playlist_videos(url)
    for video in videos:
        all_videos.append({
            "year": year,
            "playlist_title": video["title"],
            "video_url": video["url"]
        })

# 결과를 CSV 파일로 저장
try:
    with open("pyconkr_videos.csv", "w", newline='', encoding="utf-8") as f:
        writer = csv.DictWriter(f, fieldnames=["year", "playlist_title", "video_url"])
        writer.writeheader()
        for video in all_videos:
            writer.writerow(video)
    logging.info("영상 목록 수집 완료. 'pyconkr_videos.csv' 파일에 저장되었습니다.")
except IOError as e:
    logging.error(f"파일 저장 중 오류 발생: {str(e)}")

# 메타데이터 출력
total_videos = len(all_videos)
years_covered = sorted(set(video["year"] for video in all_videos))
collection_date = datetime.now().strftime("%Y-%m-%d %H:%M:%S")

logging.info(f"총 영상 수: {total_videos}")
logging.info(f"수집된 연도: {years_covered}")
logging.info(f"수집 날짜: {collection_date}")

In [None]:
df = pd.read_csv("pyconkr_videos.csv")
df