In [None]:
#| default_exp youtube

# YouTube

In [None]:
#| hide
from nbdev.showdoc import *

In [None]:
#| export
from pathlib import Path
import os
import yaml
import json
from pathlib import Path

from googleapiclient.discovery import build

In [None]:
#| hide
#| eval: false
config = load_config(config_path ="../config.yaml")

In [None]:
#| hide
#| eval: false
data_dir = Path('../data/')
os.makedirs(data_dir, exist_ok=True)

In [None]:
#| hide
#| eval: false
youtube = build("youtube", "v3", developerKey = os.getenv("YOUTUBE_API_KEY"))

In [None]:
#| hide
#| eval: false
sample_handle = 'IndianFootball'
sample_video_id = 'NvcQiB05hOM'

# Section 1. Channel

In [None]:
#| hide
#| eval: false

def fetch_channel_details(youtube, handle):
    response = youtube.channels().list(part="id", forHandle=handle.lstrip("@")).execute()
    return response

In [None]:
#| hide
#| eval: false

sample_channel_details = fetch_channel_details(youtube, sample_handle)
print(json.dumps(sample_channel_details, indent=4))

{
    "kind": "youtube#channelListResponse",
    "etag": "JklBZId-ggPuKbmQ5aJ--Guh2vQ",
    "pageInfo": {
        "totalResults": 1,
        "resultsPerPage": 5
    },
    "items": [
        {
            "kind": "youtube#channel",
            "etag": "Brtx0E8XE-dICIQSWnKHveqsJb8",
            "id": "UCjiPxzIyNtfQ2HZZ1eVjZlg"
        }
    ]
}


In [None]:
#| hide
#| eval: false

def extract_channel_id(details):
    return details['items'][0]['id']

In [None]:
#| hide
#| eval: false

sample_channel_id = extract_channel_id(sample_channel_details)
sample_channel_id

'UCjiPxzIyNtfQ2HZZ1eVjZlg'

# Section 2. Videos

In [None]:
#| hide
#| eval: false

def fetch_latest_videos(youtube, channel_id, latest=10):
    video = youtube.search().list(
        part = "id,snippet",
        channelId = channel_id,
        order = "date",
        maxResults = latest,
        type = "video"
    ).execute()
    return video

In [None]:
#| hide
#| eval: false

sample_videos = fetch_latest_videos(youtube, sample_channel_id, latest=1)
print(json.dumps(sample_videos, indent=4))

{
    "kind": "youtube#searchListResponse",
    "etag": "LIbcv1ZP4XX1TxCd0wsnjsV6uhc",
    "nextPageToken": "CAEQAA",
    "regionCode": "IN",
    "pageInfo": {
        "totalResults": 2,
        "resultsPerPage": 1
    },
    "items": [
        {
            "kind": "youtube#searchResult",
            "etag": "HqEcOJ0i8CVdHT3wcCaYg4HCZ0o",
            "id": {
                "kind": "youtube#video",
                "videoId": "V_zA6azPYdQ"
            },
            "snippet": {
                "publishedAt": "2025-09-13T05:11:10Z",
                "channelId": "UCjiPxzIyNtfQ2HZZ1eVjZlg",
                "title": "Yes, he really &#39;\ud835\ude23\ud835\ude30\ud835\ude36\ud835\ude35 \ud835\ude35\ud835\ude30 \ud835\ude25\ud835\ude30 \ud835\ude2a\ud835\ude35.. and you can&#39;t do anything &#39;\ud835\ude23\ud835\ude30\ud835\ude36\ud835\ude35 \ud835\ude2a\ud835\ude35 \ud83e\udd76",
                "description": "AFCU23 #BlueColts #IndianFootball \u26bd Find us on - Website: https://www.t

In [None]:
#| hide
#| eval: false

def extract_video_id(details):
    return details['id']['videoId']

In [None]:
#| hide
#| eval: false

sample_video_id = extract_video_id(sample_videos['items'][0])
print(sample_video_id)

V_zA6azPYdQ


# Section 3. Comments

In [None]:
#| hide
#| eval: false

def fetch_video_comments(youtube, video_id, max_comments=None):
    comments = []
    next_page_token = None

    while True:
        request = youtube.commentThreads().list(
            part="snippet",
            videoId=video_id,
            maxResults=100,
            textFormat="plainText",
            pageToken=next_page_token
        )
        response = request.execute()

        comments += response["items"]
        if max_comments and len(comments) >= max_comments:
            return comments[: max_comments]

        next_page_token = response.get("nextPageToken")
        if not next_page_token:
            break

    return comments

In [None]:
#| hide
#| eval: false

sample_comments = fetch_video_comments(youtube, sample_video_id, max_comments=1)
print(json.dumps(sample_comments, indent=4))

[
    {
        "kind": "youtube#commentThread",
        "etag": "XngeWkSs1pC5_BZ6Z8-t0R4nSvE",
        "id": "UgxpWqXlOPUmkRf_aON4AaABAg",
        "snippet": {
            "channelId": "UCjiPxzIyNtfQ2HZZ1eVjZlg",
            "videoId": "V_zA6azPYdQ",
            "topLevelComment": {
                "kind": "youtube#comment",
                "etag": "zeXf8cppfdPsrbZHJoKY_sxxO-I",
                "id": "UgxpWqXlOPUmkRf_aON4AaABAg",
                "snippet": {
                    "channelId": "UCjiPxzIyNtfQ2HZZ1eVjZlg",
                    "videoId": "V_zA6azPYdQ",
                    "textDisplay": "Crazy \u2764",
                    "textOriginal": "Crazy \u2764",
                    "authorDisplayName": "@LonelyMani",
                    "authorProfileImageUrl": "https://yt3.ggpht.com/7HwTVtyK3n-i9NI4xUd3-CNDRg__Ji8G3XKyBH8rJG22uwA5B9CtAYhK31uQe0ZU75PETIv0ww=s48-c-k-c0x00ffffff-no-rj",
                    "authorChannelUrl": "http://www.youtube.com/@LonelyMani",
                    "

In [None]:
#| hide
#| eval: false

def extract_comment_texts(comments):
    texts = []
    for item in comments:
        text = item["snippet"]["topLevelComment"]["snippet"]["textDisplay"]
        texts.append(text)
    return texts

In [None]:
#| hide
#| eval: false

extract_comment_texts(sample_comments)

['Crazy ❤']

In [None]:
#| hide
import nbdev; nbdev.nbdev_export()