In [1]:
%load_ext autoreload
%autoreload 2

In [2]:
import os
from os.path import join, basename
from tqdm import tqdm

from googleapiclient.discovery import build

from utils.io import load_youtube_api

In [3]:
api = load_youtube_api()

### Access `Cricket Australia` channel

In [4]:
channel_url = "https://www.youtube.com/channel/UCkBY0aHJP9BwjZLDYxAQrKg"

In [5]:
channel_id = basename(channel_url)

In [6]:
channel_id

'UCkBY0aHJP9BwjZLDYxAQrKg'

In [7]:
api_service_name = "youtube"
api_version = "v3"

In [8]:
youtube = build(api_service_name, api_version, developerKey=api)

### Playground

In [9]:
request = youtube.channels().list(part="contentDetails",id=channel_id)
# request = youtube.channels().list(part="contentDetails", forUsername="Cricket Australia")

In [10]:
response = request.execute()

In [11]:
response

{'kind': 'youtube#channelListResponse',
 'etag': 'TXUYsCt4yxEkjVYiQ-wpgHkvFWw',
 'pageInfo': {'totalResults': 1, 'resultsPerPage': 5},
 'items': [{'kind': 'youtube#channel',
   'etag': 'PpKv4TgmAoNS77VTeBYYywSl-8I',
   'id': 'UCkBY0aHJP9BwjZLDYxAQrKg',
   'contentDetails': {'relatedPlaylists': {'likes': '',
     'uploads': 'UUkBY0aHJP9BwjZLDYxAQrKg'}}}]}

In [12]:
uploads_id = response["items"][0]["contentDetails"]["relatedPlaylists"]["uploads"]

In [13]:
uploads_id

'UUkBY0aHJP9BwjZLDYxAQrKg'

In [14]:
uploads_id

'UUkBY0aHJP9BwjZLDYxAQrKg'

In [15]:
request = youtube.playlistItems().list(part="snippet", playlistId=uploads_id, maxResults=100)
response = request.execute()

### Extract all videos from a channel

In [16]:
scraped = []

# scrape the first 50 videos
request = youtube.playlistItems().list(
    part="snippet",
    playlistId=uploads_id,
    maxResults=50,
)
response = request.execute()
scraped.extend(response["items"])

total_results = response["pageInfo"]["totalResults"]

# keep scraping till you cover all videos
pbar = tqdm(
    total=(total_results // 50) + 1,
    desc="Extracting all videos of a channel",
    bar_format='{l_bar}{bar:10}{r_bar}{bar:-10b}',
)
while len(scraped) < total_results:
    request = youtube.playlistItems().list(
        part="snippet",
        playlistId=uploads_id,
        maxResults=50,
        pageToken=response["nextPageToken"],
    )
    response = request.execute()
    scraped.extend(response["items"])
    
    pbar.update(1)

pbar.close()

Extracting all videos of a channel: 100%|█████████▉| 225/226 [00:49<00:00,  4.54it/s]                                                          


### Get video metadata for all scrapped items

In [17]:
video_ids = []
for i in tqdm(range(len(scraped)), bar_format='{l_bar}{bar:10}{r_bar}{bar:-10b}',):
    video_id = scraped[i]["snippet"]["resourceId"]["videoId"]
    video_ids.append(video_id)

100%|██████████| 11298/11298 [00:00<00:00, 1097130.18it/s]                                                                                     


In [18]:
start = 0
delta = 50
videos = []

pbar = tqdm(
    total=(total_results // delta) + 1,
    desc="Extracting all videos",
    bar_format='{l_bar}{bar:10}{r_bar}{bar:-10b}',
)
while len(videos) < len(video_ids):
    request = youtube.videos().list(
        part="snippet,statistics,contentDetails",
        id=video_ids[start: start + delta],
    )
    response = request.execute()
    videos.extend(response["items"])
    
    start += delta
    pbar.update(1)

pbar.close()

Extracting all videos: 100%|██████████| 226/226 [01:18<00:00,  2.87it/s]                                                                       


In [19]:
len(videos)

11298

In [21]:
videos[-1]

{'kind': 'youtube#video',
 'etag': 'ZBR0fdk4zHeN8WUUQhIxrASQRYg',
 'id': 'aNE0tQcfAGk',
 'snippet': {'publishedAt': '2008-12-08T03:29:33Z',
  'channelId': 'UCkBY0aHJP9BwjZLDYxAQrKg',
  'title': "Peter Siddle with Delhi's Cricket Impressionist",
  'description': 'Australian player Peter Siddle meets an Indian from Delhi impersonates Ricky Ponting, Brett Lee, Matthew Hayden and more.',
  'thumbnails': {'default': {'url': 'https://i.ytimg.com/vi/aNE0tQcfAGk/default.jpg',
    'width': 120,
    'height': 90},
   'medium': {'url': 'https://i.ytimg.com/vi/aNE0tQcfAGk/mqdefault.jpg',
    'width': 320,
    'height': 180},
   'high': {'url': 'https://i.ytimg.com/vi/aNE0tQcfAGk/hqdefault.jpg',
    'width': 480,
    'height': 360}},
  'channelTitle': 'cricket.com.au',
  'tags': ['ricky',
   'ponting',
   'peter',
   'siddle',
   'brett',
   'lee',
   'matthew',
   'hayden',
   'mitchell',
   'johnson',
   'impressionist',
   'delhi',
   'cricket',
   'cricket australia',
   'ashes',
   'australian

In [22]:
videos[-2]

{'kind': 'youtube#video',
 'etag': 'avA_o2FGhQoqq61Ec8au53Z8MTw',
 'id': 'j_CN4g-9RIQ',
 'snippet': {'publishedAt': '2008-12-08T03:56:00Z',
  'channelId': 'UCkBY0aHJP9BwjZLDYxAQrKg',
  'title': 'Stuart Clark interviews personalities in Nagpur',
  'description': "Stuart Clark Interviews Allan Border, Brendan Julian and more during Australia's tour of India.",
  'thumbnails': {'default': {'url': 'https://i.ytimg.com/vi/j_CN4g-9RIQ/default.jpg',
    'width': 120,
    'height': 90},
   'medium': {'url': 'https://i.ytimg.com/vi/j_CN4g-9RIQ/mqdefault.jpg',
    'width': 320,
    'height': 180},
   'high': {'url': 'https://i.ytimg.com/vi/j_CN4g-9RIQ/hqdefault.jpg',
    'width': 480,
    'height': 360}},
  'channelTitle': 'cricket.com.au',
  'tags': ['Stuart',
   'Clark',
   'Allan',
   'Border',
   'Brendan',
   'Julian',
   'Mark',
   'Nicholas',
   'cricket',
   'tour',
   'of',
   'India',
   'cricket australia',
   'ashes',
   'australian team',
   'clarke',
   'warne',
   'ponting',
   'h

### Extract video IDs for all videos uploaded in last 10 years

In [28]:
request = youtube.videos().list(part="snippet", id="GWrOR1FxBhs")
response = request.execute()

In [29]:
response

{'kind': 'youtube#videoListResponse',
 'etag': 'USFEWr8PYjqFh3knEX5pt07SZM8',
 'items': [{'kind': 'youtube#video',
   'etag': 'vFXUpQmZlySrxAGBS4XUBmUg6p0',
   'id': 'GWrOR1FxBhs',
   'snippet': {'publishedAt': '2022-11-28T19:00:08Z',
    'channelId': 'UCpcTrCXblq78GZrTUTLWeBw',
    'title': 'Kramaric hits brace! | Croatia v Canada | FIFA World Cup Qatar 2022',
    'description': "Watch the Croatia v Canada Group F highlights from the FIFA World Cup Qatar 2022. \n\nGet all the highlights from Qatar 2022 on FIFA+ 👉 https://goto.fifa.com/nxRf/7750136f\nFind out where to watch the games live here 👉 https://goto.fifa.com/nxRf/e4b3204e\n\nFollow FIFA World Cup & FIFA Women's World Cup: \n👉 https://www.instagram.com/fifaworldcup \n👉 https://www.instagram.com/fifaworldcup \n👉 https://twitter.com/FIFAWorldCup \n👉 https://twitter.com/FIFAWWC \n👉 https://www.facebook.com/fifaworldcup \n👉 https://www.facebook.com/fifawomensworldcup",
    'thumbnails': {'default': {'url': 'https://i.ytimg.com/vi/G