In [4]:
import os
import json
import requests

In [82]:
class YtApiUtil:
    def __init__(self):
        current_path = current_path = os.getcwd()
        youtube_key_name = "youtube_key.json"
        data_path = current_path + "/" + youtube_key_name
        file = open(data_path)
        file_dict = json.load(file)
        self.API_key = file_dict["API_key"]
        
    def get_all_playlists(self, channel_id, page_token=""):
        API_key = self.API_key
        url = f"https://youtube.googleapis.com/youtube/v3/playlists?part=contentDetails&part=snippet&part=id&channelId={channel_id}&maxResults=50&key={API_key}&pageToken={page_token}"
        resp = requests.get(url)
        
        return json.loads(resp.text)
    
    def get_all_playlist_items(self, playlist_id, page_token=""):
        API_key = self.API_key
        url = f"https://youtube.googleapis.com/youtube/v3/playlistItems?part=snippet&part=contentDetails&maxResults=50&playlistId={playlist_id}&key={API_key}&pageToken={page_token}"
        resp = requests.get(url)
        
        return json.loads(resp.text)
    
    def get_video_info(self, video_id):
        API_key = self.API_key
        url = f"https://youtube.googleapis.com/youtube/v3/videos?part=contentDetails&part=snippet&id={video_id}&key={API_key}"
        resp = requests.get(url)
        
        try:
            video_info = json.loads(resp.text)["items"][0]
        except:
            video_info = None
        
        return video_info
        
class YtApiCrawler(YtApiUtil):
    def __init__(self):
        super().__init__()
    
    def fetch_data(self, url):
        print("===YtCrawlerInPlaylist fetch_data ===")
        channel_id = url.replace("https://www.youtube.com/channel/","")
        playlist_items = []
        page_token = ""
        has_next_page = True
        while has_next_page:
            data = self.get_all_playlists(channel_id, page_token)
            playlist_items += data["items"]
            if "nextPageToken" in data:
                page_token = data["nextPageToken"]
            else:
                has_next_page = False

        playlist_list = []
        for item in playlist_items:
            channel_id = item["snippet"]["channelId"]
            channel_title = item["snippet"]["channelTitle"]
            channel_url = url
            playlist_id = item["id"]
            playlist_title = item["snippet"]["title"]

            playlist_items = {
                "channel_id": channel_id,
                "channel_title": channel_title,
                "channel_url": channel_url,
                "playlist_id": playlist_id,
                "playlist_title": playlist_title,        
            }

            playlist_list.append(playlist_items)

        playlist_items_list = []
        for playlist_dict in playlist_list:
            playlist_id = playlist_dict["playlist_id"]

            page_token = ""
            has_next_page = True
            items = []
            while has_next_page:
                data = self.get_all_playlist_items(playlist_id, page_token)
                items += data["items"]
                if "nextPageToken" in data:
                    page_token = data["nextPageToken"]
                else:
                    has_next_page = False

            for item in items:
                video_id = item["snippet"]["resourceId"]["videoId"]
                video_info = self.get_video_info(video_id)
                if video_info:
                    video_url = f"https://www.youtube.com/watch?v={video_id}"
                    published = item["snippet"]["publishedAt"]
                    title = item["snippet"]["title"]
                    description = item["snippet"]["description"]
                    playlist_position = item["snippet"]["position"]
                    
                    if "standard" in item["snippet"]["thumbnails"]:
                        img_link = item["snippet"]["thumbnails"]["standard"]["url"] 
                    else: 
                        img_link = ""

                    if "tags" in video_info["snippet"]:
                        tags = video_info["snippet"]["tags"]
                    else:
                        tags = []

                    playlist_dict["video_id"] = video_id
                    playlist_dict["video_url"] = video_url
                    playlist_dict["published"] = published
                    playlist_dict["title"] = title
                    playlist_dict["img_link"] = img_link
                    playlist_dict["description"] = description
                    playlist_dict["playlist_position"] = playlist_position
                    playlist_dict["tags"] = tags
                    
                    playlist_items_list.append(playlist_dict)
                    
        return playlist_items_list
        
    def get_data_json(self, playlist_items_list):
        
        return playlist_items_list
    

In [83]:
crawler = YtApiCrawler()
url = "https://www.youtube.com/channel/UCLNBEt_42kYuX7fgZiubgXQ"
crawler.fetch_data(url)

===YtCrawlerInPlaylist fetch_data ===
https://www.youtube.com/watch?v=JLkYTc6U-g0
-----
{'channel_id': 'UCLNBEt_42kYuX7fgZiubgXQ', 'channel_title': '均一教育平台(軟體組)', 'channel_url': 'https://www.youtube.com/channel/UCLNBEt_42kYuX7fgZiubgXQ', 'playlist_id': 'PLtKoBWTkVkTc6I-WrLDDcCgMy2gdXLaQY', 'playlist_title': '微分享(公開版)', 'video_id': 'JLkYTc6U-g0', 'video_url': 'https://www.youtube.com/watch?v=JLkYTc6U-g0', 'published': '2022-05-10T05:29:41Z', 'title': '【微分享】Mob Programming', 'img_link': 'https://i.ytimg.com/vi/JLkYTc6U-g0/sddefault.jpg', 'description': '・分享者：沈家緯\n・分享內容：Mob Programming 大家一起開發\n・來源：均一微分享\n----------------------------------------------------------------------------------------------------\n・「微分享」是均一軟體組每天的都有的 15min 技術分享（一個不希望大家特別準備/隨意/低成本的技術分享）\n・「均一」一個希望幫助孩子成為終身學習者的線上學習平台 https://www.junyiacademy.org/"', 'playlist_position': 0, 'tags': []}
https://www.youtube.com/watch?v=E_x4al5INak
-----
{'channel_id': 'UCLNBEt_42kYuX7fgZiubgXQ', 'channel_title': '均一教育平台(軟體組)', 'channel_ur

https://www.youtube.com/watch?v=d3O3ju1l6vI
-----
{'channel_id': 'UCLNBEt_42kYuX7fgZiubgXQ', 'channel_title': '均一教育平台(軟體組)', 'channel_url': 'https://www.youtube.com/channel/UCLNBEt_42kYuX7fgZiubgXQ', 'playlist_id': 'PLtKoBWTkVkTc6I-WrLDDcCgMy2gdXLaQY', 'playlist_title': '微分享(公開版)', 'video_id': 'd3O3ju1l6vI', 'video_url': 'https://www.youtube.com/watch?v=d3O3ju1l6vI', 'published': '2022-05-10T05:32:57Z', 'title': '【好物分享】Terminal Warp：一個終於比較正常的終端機', 'img_link': 'https://i.ytimg.com/vi/d3O3ju1l6vI/sddefault.jpg', 'description': '・分享者：Nissen\n・分享內容：一個終於比較正常的終端機（Terminal） Warp\n・來源：均一微分享\n----------------------------------------------------------------------------------------------------\n・「微分享」是均一軟體組每天的都有的 15min 技術分享（一個不希望大家特別準備/隨意/低成本的技術分享）\n・「均一」一個希望幫助孩子成為終身學習者的線上學習平台 https://www.junyiacademy.org/\n・Warp：https://www.warp.dev/', 'playlist_position': 10, 'tags': []}
https://www.youtube.com/watch?v=sNDoLXorAKg
-----
{'channel_id': 'UCLNBEt_42kYuX7fgZiubgXQ', 'channel_title': '均一教育平台(軟體組)', 

[{'channel_id': 'UCLNBEt_42kYuX7fgZiubgXQ',
  'channel_title': '均一教育平台(軟體組)',
  'channel_url': 'https://www.youtube.com/channel/UCLNBEt_42kYuX7fgZiubgXQ',
  'playlist_id': 'PLtKoBWTkVkTc6I-WrLDDcCgMy2gdXLaQY',
  'playlist_title': '微分享(公開版)',
  'video_id': 'L-Pv7uROcac',
  'video_url': 'https://www.youtube.com/watch?v=L-Pv7uROcac',
  'published': '2022-05-16T04:54:36Z',
  'title': '【刷題】Two Sum 變化題',
  'img_link': 'https://i.ytimg.com/vi/L-Pv7uROcac/sddefault.jpg',
  'description': '・分享者：Vincent（後端軟體實習生）\n・分享內容：Two Sum\n・來源：均一微分享\n----------------------------------------------------------------------------------------------------\n・「微分享」是均一軟體組每天的都有的 15min 技術分享（一個不希望大家特別準備/隨意/低成本的技術分享）\n・「均一」一個希望幫助孩子成為終身學習者的線上學習平台 https://www.junyiacademy.org/"',
  'playlist_position': 17,
  'tags': []},
 {'channel_id': 'UCLNBEt_42kYuX7fgZiubgXQ',
  'channel_title': '均一教育平台(軟體組)',
  'channel_url': 'https://www.youtube.com/channel/UCLNBEt_42kYuX7fgZiubgXQ',
  'playlist_id': 'PLtKoBWTkVkTc6I-WrLDDcCgMy2gdXL

In [59]:
data["items"][0]["snippet"]["thumbnails"]["medium"]["url"]

'https://i.ytimg.com/vi/RbEo4fkvafQ/mqdefault.jpg'

In [53]:
item = data["items"][0]
video_id = item["snippet"]["resourceId"]["videoId"]
video_url = f"https://www.youtube.com/watch?v={video_id}"
published = item["snippet"]["publishedAt"]
title = item["snippet"]["title"]
img_link = item["snippet"]["thumbnails"]["maxres"]["url"]
description = item["snippet"]["description"]
playlist_position = item["snippet"]["position"]

playlist_position

0

In [None]:
# get all playlists in channel
GET https://youtube.googleapis.com/youtube/v3/playlists?part=contentDetails&part=snippet&part=id&channelId=UCyDIP-h6xNBAiXITkbvubhA&maxResults=50&key=[YOUR_API_KEY] HTTP/1.1
GET https://youtube.googleapis.com/youtube/v3/search?channelId=UCyDIP-h6xNBAiXITkbvubhA&pageToken=CAUQAA&key=[YOUR_API_KEY] HTTP/1.1

    
# get all playlistitems in playlist
GET https://youtube.googleapis.com/youtube/v3/playlistItems?part=snippet&part=contentDetails&maxResults=50&playlistId=PLI6pJZaOCtF1vPNVcsR6K31FbqZ2V6PVi&key=[YOUR_API_KEY] HTTP/1.1

# get video info by video_id
GET https://youtube.googleapis.com/youtube/v3/videos?part=contentDetails&part=snippet&id=aFx44mglAbs&maxResults=50&key=[YOUR_API_KEY] HTTP/1.1

    
# get all videos by channel_id
GET https://youtube.googleapis.com/youtube/v3/search?part=snippet&channelId={channelId}&key={API_KEY}
