In [88]:
import os
from pathlib import Path
from google.oauth2 import service_account
from dotenv import load_dotenv
import json
from typing import List, Union
from datetime import datetime
import gzip

load_dotenv()

True

In [89]:
def create_service_account_credentials(api_config: dict) -> service_account.Credentials:
    return service_account.Credentials.from_service_account_file(
        filename=api_config["key_file_path"],
        scopes=api_config["scopes"]
    )

def create_youtube_api_resource(
    api_config: dict,
    credentials: service_account.Credentials
) -> googleapiclient.discovery.Resource:
    return googleapiclient.discovery.build(
        serviceName=api_config["api_service_name"], 
        version=api_config["api_version"], 
        credentials=credentials
    )


def get_channel_info(
        youtube_resource: googleapiclient.discovery.Resource = None, 
        id: str = None
    ) -> dict:

    request = youtube_resource.channels().list(
        part="snippet,contentDetails,statistics,topicDetails,status",
        id=id
    )
    return request.execute()


def parse_channel_uploads_playlist_id(response_channel: dict) -> str:
    return response_channel["items"][0]["contentDetails"]["relatedPlaylists"]["uploads"]


def paginate_all_channel_uploads(
    youtube_resource: googleapiclient.discovery.Resource, 
    uploads_playlist_id: str
) -> List[dict]:
    def get_uploads_playlist_items(next_page_token: str = None):
        request = youtube_resource.playlistItems().list(
            part="snippet,contentDetails,status",
            maxResults=50,
            playlistId=uploads_playlist_id,
            pageToken=next_page_token
        )
        response_playlist_items = request.execute()

        return response_playlist_items

    video_metadata = []
    response_playlist_items = get_uploads_playlist_items()
    video_metadata.extend(response_playlist_items["items"])

    while response_playlist_items.get("nextPageToken"):
        response_playlist_items = get_uploads_playlist_items(response_playlist_items.get("nextPageToken"))
        video_metadata.extend(response_playlist_items["items"])

    return video_metadata


def create_file_path_if_doesnt_exist(file_path: str) -> None:
    os.makedirs(os.path.dirname(file_path), exist_ok=True)


def write_object_to_json_gzip_file(object: Union[dict, List[dict]], file_path: str) -> None:
    create_file_path_if_doesnt_exist(file_path)
    with gzip.open(file_path, "wt", encoding="utf-8") as f:
        json.dump(object, f, indent=4)

In [90]:
api_config = {
    "scopes": [
        "https://www.googleapis.com/auth/youtube.readonly"
    ],
    "api_service_name": "youtube",
    "api_version": "v3",
    "key_file_path": "key_youtube-stats-459404-eefde03eff46.json"
}

credentials = create_service_account_credentials(api_config)
youtube_resource = create_youtube_api_resource(api_config, credentials)

# identify channel
lirik_plays_channel_id = "UCebh6Np0l-DT9LXHrXbmopg"
response_channel = get_channel_info(youtube_resource, lirik_plays_channel_id)

# # identify uploads playlist, pull all videos
# uploads_playlist_id = parse_channel_uploads_playlist_id(response_channel)
# all_video_metadata = paginate_all_channel_uploads(youtube_resource, uploads_playlist_id)

In [None]:
# write raw channel & video data as json to code repo
CWD = Path().cwd().resolve()

# channel
file_path = f"{CWD}/../datasets/lirik_plays/youtube_api/raw/channel/{datetime.now().strftime("%Y-%m-%d")}.json.gz"
write_object_to_json_gzip_file(response_channel, file_path)

# # video
# file_path = f"{CWD}/../datasets/lirik_plays/youtube_api/raw/video/{datetime.now().strftime("%Y-%m-%d")}.json.gz"
# write_object_to_json_gzip_file(all_video_metadata, file_path)