In [2]:
# Import necessary modules
import matplotlib.pyplot as plt
import matplotlib.dates as mdates
import pandas as pd
import numpy as np
import requests
import time
from datetime import datetime
from pprint import pprint
import json

import googleapiclient.discovery
from config import api_key

# Define API Key (Replace with your API key)
API_KEY = api_key
max_results = 50

# Initialize YouTube API client
youtube = googleapiclient.discovery.build("youtube", "v3", developerKey=API_KEY)

In [4]:
# for a video part="snippet,statistics,contentDetails,status,topicDetails,player,liveStreamingDetails"
# for a channel part="snippet,statistics,contentDetails,brandingSettings,topicDetails,status"
def fetch_all_channels(query, max_results=50):
    next_page_token = None
    all_channels = []

    while True:
        request = youtube.search().list(
            part="snippet",
            q=query,
            type="channel",
            maxResults=max_results,
            order="date",
            pageToken=next_page_token 
        )
        response = request.execute()

        all_channels.extend(response.get("items", []))
        
        next_page_token = response.get("nextPageToken")
        if not next_page_token:
            break

    return all_channels

In [5]:
channels = fetch_all_channels("DIY")

In [None]:
print(f"Total channels retrieved: {len(channels)}")

In [9]:
# for a video part="snippet,statistics,contentDetails,status,topicDetails,player,liveStreamingDetails"
# for a channel part="snippet,statistics,contentDetails,brandingSettings,topicDetails,status"
def fetch_all_channels_data(channel_list_id, max_results=50):
    all_channels_data = []
    batch_size = 50  # Max allowed per API call
    
    # Grouping channel IDs into batches of 50
    for i in range(0, len(channel_list_id), batch_size):
        batch_ids = [channel_info['id']['channelId'] for channel_info in channel_list_id[i:i + batch_size]]
        
        # Fetch channel data in batches of 50
        request = youtube.channels().list(
            part="snippet,statistics,contentDetails,brandingSettings,topicDetails,status",
            id=",".join(batch_ids)  # Multiple channel IDs separated by commas
        )
        response = request.execute()

        # Collect all channel data
        all_channels_data.extend(response.get("items", []))

    return all_channels_data

In [10]:
channels_data = fetch_all_channels_data(channels)

In [15]:
# Write the json data pulled for channels to a file
filename = 'data/channels.json'
with open(filename, 'w') as file:
    json.dump(channels, file, indent=4)

In [16]:
# Write the json data pulled for channels_data to a file
filename = 'data/channels_data.json'
with open(filename, 'w') as file:
    json.dump(channels_data, file, indent=4)

In [None]:
filename = 'data/channels.json'
with open(filename, 'r') as file:
    channels = json.load(file)

In [None]:
filename = 'data/channels_data.json'
with open(filename, 'r') as file:
    channels_data = json.load(file)

In [11]:
channel_list = []
for channel in channels_data:
    channel_title = channel['brandingSettings']['channel']['title'] # get channel title
    channel_id = channel['id'] # get channel id
    try:
        channel_subCount = channel['statistics']['subscriberCount'] # get channel subscriber count, if it fails ...
    except KeyError:
        channel_subCount = '???' # set subscriber count to ???
    channel_videoCount = channel['statistics']['videoCount'] # get channel video count
    channel_viewCount = channel['statistics']['viewCount'] # get channel view count

    # Build a dictionary for the channel and add it to the list
    channel_list.append({'channel_title':channel_title,
                            'channel_id':channel_id,
                            'channel_subCount':channel_subCount,
                            'channel_videoCount':channel_videoCount,
                            'channel_viewCount':channel_viewCount})

# Create a DF from list
channel_data_df = pd.DataFrame(channel_list)
channel_data_df = channel_data_df.set_index('channel_id')

In [None]:
channel_data_df.dtypes

In [13]:
channel_data_df['channel_subCount'] = channel_data_df['channel_subCount'].astype('int64')
channel_data_df['channel_videoCount'] = channel_data_df['channel_videoCount'].astype('int64')
channel_data_df['channel_viewCount'] = channel_data_df['channel_viewCount'].astype('int64')

In [None]:
channel_data_df.sort_values('channel_subCount',ascending=False).head(50)