In [1]:
from googleapiclient.discovery import build
from googleapiclient.errors import HttpError
import keyring


api_key = keyring.get_password('youtube_data_api', 'api_key')
youtube = build('youtube', 'v3', developerKey=api_key)
channel_id = 'UCqJ7MmAjzuQvKWcHe_0XGsA'
# https://www.youtube.com/@user-nn8eh3nh2g
# 채널 ID 얻는 방법: 비디오 URL의 watch?v= 뒤에 있는 문자열(videoId)를 Youtube Data API의 videos().list(snippet)에 넣어서 얻을 수 있음

In [2]:
# keyword와 channel_id로 검색해서 video_id를 얻어옴
# video_id로 라이브 방송인지 확인
# channel_id는 keyword 검색 결과로 얻어온 video_id를 통해 얻을 수 있음
# 당장은 API quota가 적어서 무한루프 불가능, puppeteer 등 이용해서 API 없이 크롤링하게 개선 가능

def search_and_fetch(channel_id=None, keyword=None) -> list[str]:
    video_ids = []
    page_token = None
    if (channel_id is None) ^ (keyword is None): # XOR, 둘 다 입력하거나 둘 모두 입력했을 때
        print('channel_id와 keyword 중 하나만 입력하세요.')
        return video_ids

    while True:
        try:
            response = youtube.search().list(
                    part='id',
                    type='video',
                    channelId=channel_id,
                    q=keyword,
                    maxResults=50,
                    pageToken=page_token
                ).execute()

            page_token = response.get('nextPageToken')
            items = response.get('items')
            
            if items:
                for item in items:
                    video_id = item.get('id').get('videoId')
                    video_ids.append(video_id) if video_id else None
                    
        except HttpError as e:
            print('HTTP error occurred:')
            print(e)
            page_token = None

        if (page_token is None) or (len(video_ids) >= 500):
            break
    
    return video_ids


def check_live(video_id: str) -> bool:
    response = youtube.videos().list(
        part='liveStreamingDetails',
        id=video_id
    ).execute()
    details = response.get('items')[0].get('liveStreamingDetails')
    
    return True if details else False

In [None]:
if __name__ == '__main__':
    video_ids = search_and_fetch(channel_id=channel_id)
    video_ids = [video_id for video_id in video_ids if check_live(video_id)]

    with open('video_ids.txt', 'a') as f:
        for video_id in video_ids:
            f.write(video_id + '\n')

In [4]:
# YT-DLP 이용 일괄 다운로드, VPN 등 이용 개선 필요

with open('video_ids.txt', 'r') as f:
    video_ids = f.read().splitlines()
    video_ids = list(set(video_ids))

for video_id in video_ids[:2]:
    yt_commandline = f'yt-dlp https://www.youtube.com/watch?v={video_id} -f "bv" --downloader ffmpeg --downloader-args "ffmpeg_i:-ss 0 -to 10"'
    yt_commandline = f'conda activate youtube&{yt_commandline}'
    !powershell {yt_commandline}
    break

[youtube] Extracting URL: https://www.youtube.com/watch?v=0Nh96W32HhM
[youtube] 0Nh96W32HhM: Downloading webpage
[youtube] 0Nh96W32HhM: Downloading ios player API JSON
[youtube] 0Nh96W32HhM: Downloading android player API JSON
[youtube] 0Nh96W32HhM: Downloading m3u8 information
[info] 0Nh96W32HhM: Downloading 1 format(s): 299
[download] Destination: 후니상 배틀그라운드 [0Nh96W32HhM].mp4

[download] 100% of    1.54MiB in 00:00:02 at 755.48KiB/s


Input #0, mov,mp4,m4a,3gp,3g2,mj2, from 'https://rr3---sn-n3cgv5qc5oq-bh2sz.googlevideo.com/videoplayback?expire=1689455564&ei=bLeyZLKqGsWyvcAP5MmSyAs&ip=222.251.220.68&id=o-ABDkmNc4y2haE4-txKM8lFp0URlyb7DEmA2jsDYWYODJ&itag=299&source=youtube&requiressl=yes&mh=85&mm=31%2C26&mn=sn-n3cgv5qc5oq-bh2sz%2Csn-nx57ynsl&ms=au%2Conr&mv=m&mvi=3&pcm2cms=yes&pl=17&initcwndbps=947500&vprv=1&svpuc=1&mime=video%2Fmp4&gir=yes&clen=3231733466&dur=6706.600&lmt=1689064839492039&mt=1689433503&fvip=5&keepalive=yes&fexp=24007246%2C24362686%2C24363391&beids=24350018&c=IOS&txp=7209224&sparams=expire%2Cei%2Cip%2Cid%2Citag%2Csource%2Crequiressl%2Cvprv%2Csvpuc%2Cmime%2Cgir%2Cclen%2Cdur%2Clmt&sig=AOq0QJ8wRQIhAJMgkwowvJs7-pGskvBC3pumdlE3Ipk-Q_jeOiPc6NKRAiAd9ozEshU42t6tAqXEfBQsWDkVKH-j3F-jUXeImlSwgQ%3D%3D&lsparams=mh%2Cmm%2Cmn%2Cms%2Cmv%2Cmvi%2Cpcm2cms%2Cpl%2Cinitcwndbps&lsig=AG3C_xAwRAIgMyKnxx_JJMOP5zEL9q1tpl6FrKHuaGLHAhxQ5hCLPm0CIFnHataE4ryoVLPWDTwuv2a5KTT6Qz60JE35jGW_MEhl':
  Metadata:
    major_brand     : dash
