In [16]:
from googleapiclient.discovery import build
from google_auth_oauthlib.flow import InstalledAppFlow
from google.auth.transport.requests import Request

import urllib.parse as p
import re
import os
import pickle

SCOPES = ["https://www.googleapis.com/auth/youtube.force-ssl"]

In [17]:
def youtube_authenticate():
    os.environ["OAUTHLIB_INSECURE_TRANSPORT"] = "1"
    api_service_name = "youtube"
    api_version = "v3"
    client_secrets_file = "credentials.json"
    creds = None
    # the file token.pickle stores the user's access and refresh tokens, and is
    # created automatically when the authorization flow completes for the first time
    if os.path.exists("token.pickle"):
        with open("token.pickle", "rb") as token:
            creds = pickle.load(token)
    # if there are no (valid) credentials availablle, let the user log in.
    if not creds or not creds.valid:
        if creds and creds.expired and creds.refresh_token:
            creds.refresh(Request())
        else:
            flow = InstalledAppFlow.from_client_secrets_file(client_secrets_file, SCOPES)
            creds = flow.run_local_server(port=0)
        # save the credentials for the next run
        with open("token.pickle", "wb") as token:
            pickle.dump(creds, token)

    return build(api_service_name, api_version, credentials=creds)

In [18]:
youtube = youtube_authenticate()

In [19]:
def get_video_id_by_url(url):
    """
    Return the Video ID from the video `url`
    """
    # split URL parts
    parsed_url = p.urlparse(url)
    # get the video ID by parsing the query of the URL
    video_id = p.parse_qs(parsed_url.query).get("v")
    if video_id:
        return video_id[0]
    else:
        raise Exception(f"Wasn't able to parse video URL: {url}")

In [20]:
def get_video_details(youtube, **kwargs):
    return youtube.videos().list(
        part="snippet,contentDetails,statistics",
        **kwargs
    ).execute()

In [21]:
def get_video_metadata(url):
    dat=get_video_details(youtube,id=get_video_id_by_url(url))["items"][0]
    
    res={}
    res["views"]=dat["statistics"]["viewCount"]
    res["likes"]=dat["statistics"]["likeCount"]
    res["dislikes"]=dat["statistics"]["dislikeCount"]
    res["comments"]=dat["statistics"]["commentCount"]
    
    res["title"]=dat["snippet"]["title"]
    res["desc"]=dat["snippet"]["description"]
    res["thumbnail"]=dat["snippet"]["thumbnails"]["maxres"]["url"]
    
    return res

In [22]:
get_video_metadata("https://www.youtube.com/watch?v=951X1IgtL5M")

{'views': '790806',
 'likes': '59614',
 'dislikes': '308',
 'comments': '1658',
 'title': 'How Skrillex makes his basslines',
 'desc': 'with a Tibetan monk in his computer..',
 'thumbnail': 'https://i.ytimg.com/vi/951X1IgtL5M/maxresdefault.jpg'}

In [8]:
def get_channel_videos(youtube, **kwargs):
    return youtube.search().list(
        **kwargs
    ).execute()

In [9]:
def crawl_channel(channelId):
    ids=[]
    nxt=None
    
    while(True):
        res=get_channel_videos(youtube,part="snippet",channelId=channelId,type="video",pageToken=nxt)
        videos=res.get("items")
        
        for video in videos:
            ids.append(video["id"]["videoId"])
            
        if "nextPageToken" in res:
            nxt=res["nextPageToken"]
        else:
            break
    
    return ids

In [10]:
crawl_channel("UCRWOdwLRsenx2jLaiCAIU4A")

['kmte4HGJCdo',
 'T71vkqZeniU',
 'R4zha-fcxH8',
 'E4kRDlsMLNM',
 'ABp2hdvgkxk',
 'KhUYTvpNDNI',
 'zTpu9bQmBlQ',
 'b-WtGGawa7M',
 '4xLWjHNrIys',
 'xbxP-s_-arc',
 'AtLU2bHiVbE',
 'GwnGVNuuYWw',
 'YTyx9bXmMIw',
 'pKE7iMKJGHg',
 'SxBrZ6tpgOQ',
 '54cchVisr2A',
 'KgRZ5ohwLKc',
 '_Lcstg3DVfg',
 'imkKlp3YadY',
 '5i_oD45sAWE',
 'tULklvbdi5w',
 'uDyNjSQVL6M',
 'a1ENF43IIeI',
 'TlMg3plBhiU',
 '39_AInFOrm8',
 '84d9ooGeH48',
 '4t_hUQNkwVU',
 'TwX8tiuowU0',
 'M3qjF2vGcA8',
 'X13cOgv1fxQ',
 'X13cOgv1fxQ',
 'YTmvCBftDXk',
 'ANwXRqBq5ik',
 'CLaiky2Cy4k',
 'gGxyb_WlFmM',
 'e_NT_9fr7uE',
 '9FYyxXpI9CM',
 'qt0InK2pofo',
 'J_f-Jc8sMw8',
 'lVE_CLfBwxY',
 'qopkBKmmGkE',
 'E9L7z5xu7Q4',
 'lXqVu6soRCQ',
 'WARGstLj150',
 'hNbQB2pHYnk',
 'OnsIBD8h91A',
 '5aAUbn7W6sg',
 'EF2DlcV0dUg',
 'fW6Kz4C6-TA',
 'vL2F55kMbyA',
 'XQfTgvhQeLw',
 'VndkM923RM8',
 'pYZ4AKs8XtQ',
 'uwhfH_ajsiE',
 'XoNlPHsUfTA',
 'GxFwNLfpdDQ',
 'f4MdGGPHu6w',
 '-080QH5__LM',
 '-Y87rT2z5XE',
 '9GzM-WXNEEc',
 'WHSNkzZtQdQ',
 'rylXYMJLs7A',
 'sqCz3s

In [13]:
crawl_channel("UCHnyfMqiRRG1u-2MsSQLbXA") #veritasium

['1TKSfAkWWN0',
 'g_IaVepNDT4',
 'IcrBqCFLHIY',
 '2OSrvzNW9FE',
 'vVKFBaaL4uM',
 'iphcyNWFD10',
 'yArprk0q9eE',
 'zUyH3XhpLTo',
 'vqDbMEdLiCs',
 'a8FTr2qMutA',
 'txmKr69jGBk',
 'c6wuh0NRG1s',
 'Ztc6QPNUqls',
 'tLMpdBjA2SU',
 'XBr4GkRnY04',
 'ty9QSiVC2g0',
 'y9c8oZ49pFc',
 'BD6h-wDj7bw',
 'fu3645D4ZlI',
 'PifL8bAybyc',
 'WIyTZDHuarQ',
 'vBX-KulgJ1o',
 'gM3zP72-rJE',
 'eCMmmEEyOO0',
 'zNzzGgr2mhk',
 'J3xLuZNKhlY',
 'ph8xusY3GTM',
 'BickMFHAZR0',
 'sehKAccM8p0',
 '1tSqSMOyNFE',
 'cY_o4A1wzsg',
 'J3xLuZNKhlY',
 'ph8xusY3GTM',
 'BickMFHAZR0',
 'sehKAccM8p0',
 '1tSqSMOyNFE',
 '2KZb2_vcNTg',
 'rM04U5BO3Ug',
 'aFO4PBolwFg',
 'DWkYRh6OXy8',
 'zUDqI9PJpc8',
 'sMb00lz-IfE',
 'I_rw-AJqpCM',
 'TRL7o2kPqw0',
 'YMPzDiraNnA',
 'TcxZSmzPw8k',
 'stRPiifxQnM',
 'oKb2tCtpvNU',
 '72DCj3BztG4',
 '23f1jvGUWJs',
 'K4vyRvMASPU',
 'g20JZ2HNZaw',
 'EAyk2OsKvtU',
 'cebFWOlx848',
 'x4I9mmd-2Rc',
 'LKPwKFigF8U',
 'Yl_K2Ata6XY',
 'Bz9D6xba9Og',
 '-OqrZG-EBaQ',
 'jIMihpDmBpY',
 'j5v8D-alAKE',
 'aIx2N-viNwY',
 'lL2e0r

In [23]:
crawl_channel("UCXZpQgX1897wYDLtvzmgyIA") #facts verse

HttpError: <HttpError 403 when requesting https://youtube.googleapis.com/youtube/v3/search?part=snippet&channelId=UCXZpQgX1897wYDLtvzmgyIA&type=video&pageToken=CPQDEAA&alt=json returned "The request cannot be completed because you have exceeded your <a href="/youtube/v3/getting-started#quota">quota</a>.". Details: "The request cannot be completed because you have exceeded your <a href="/youtube/v3/getting-started#quota">quota</a>.">

References

https://www.thepythoncode.com/article/using-youtube-api-in-python