In [1]:
# Import necessary modules
import matplotlib.pyplot as plt
import matplotlib.dates as mdates
import pandas as pd
import numpy as np
import requests
import time
from datetime import datetime
from pprint import pprint
import json

import googleapiclient.discovery
from config import api_key

# Define API Key (Replace with your API key)
API_KEY = api_key
max_results = 50

# Initialize YouTube API client
youtube = googleapiclient.discovery.build("youtube", "v3", developerKey=API_KEY)

In [4]:
# for a any general info use youtube.search().list() and part="snippet"
# for a channel use youtube.channels().list() and part="snippet,statistics,contentDetails,brandingSettings,topicDetails,status"
# for a video use youtube.videos().list() and part="snippet,statistics,contentDetails,status,topicDetails,player,liveStreamingDetails"

# Function to use Youtube API and find all channels related to 'query'
def fetch_all_channels(query, max_results=50):
    next_page_token = None
    all_channels = []

    while True:
        request = youtube.search().list(
            part="snippet",
            q=query,
            type="channel",
            maxResults=max_results,
            order="date",
            pageToken=next_page_token 
        )
        response = request.execute()

        all_channels.extend(response.get("items", []))
        
        next_page_token = response.get("nextPageToken")
        if not next_page_token:
            break

    return all_channels

In [5]:
# Use function above to find all channels related to DIY
channels = fetch_all_channels("DIY")

In [None]:
# Print total amount of channels found
print(f"Total channels retrieved: {len(channels)}")

In [9]:
# for a any general info use youtube.search().list() and part="snippet"
# for a channel use youtube.channels().list() and part="snippet,statistics,contentDetails,brandingSettings,topicDetails,status"
# for a video use youtube.videos().list() and part="snippet,statistics,contentDetails,status,topicDetails,player,liveStreamingDetails"

# Function to use Youtube API and find all channel data related to channelId's we got previously
def fetch_all_channels_data(channel_list_id, max_results=50):
    all_channels_data = []
    batch_size = 50  # Max allowed per API call
    
    # Grouping channel IDs into batches of 50
    for i in range(0, len(channel_list_id), batch_size):
        batch_ids = [channel_info['id']['channelId'] for channel_info in channel_list_id[i:i + batch_size]]
        
        # Fetch channel data in batches of 50
        request = youtube.channels().list(
            part="snippet,statistics,contentDetails,brandingSettings,topicDetails,status",
            id=",".join(batch_ids)  # Multiple channel IDs separated by commas
        )
        response = request.execute()

        all_channels_data.extend(response.get("items", []))

    return all_channels_data

In [10]:
# Use function above to find all channel data related to channelId's found
channels_data = fetch_all_channels_data(channels)

In [None]:
# Print total amount of channels found
print(f"Total channels retrieved: {len(channels_data)}")

In [15]:
# Write the json data pulled for channels to a file
filename = 'data/channels.json'
with open(filename, 'w') as file:
    json.dump(channels, file, indent=4)

In [16]:
# Write the json data pulled for channels_data to a file
filename = 'data/channels_data.json'
with open(filename, 'w') as file:
    json.dump(channels_data, file, indent=4)

In [8]:
# If you didnt run the API calls, run this to read in prevous json data from channels.json
filename = 'data/channels.json'
with open(filename, 'r') as file:
    channels = json.load(file)

In [9]:
# If you didnt run the API calls, run this to read in prevous json data from channels_data.json
filename = 'data/channels_data.json'
with open(filename, 'r') as file:
    channels_data = json.load(file)

In [15]:
channel_list = [] # List to hold channel data dictionaries
for channel in channels_data:
    channel_title = channel['brandingSettings']['channel']['title'] # get channel title
    channel_id = channel['id'] # get channel id
    try:
        channel_subCount = channel['statistics']['subscriberCount'] # get channel subscriber count, if it fails ...
    except KeyError:
        channel_subCount = '???' # set subscriber count to ???
    channel_videoCount = channel['statistics']['videoCount'] # get channel video count
    channel_viewCount = channel['statistics']['viewCount'] # get channel view count

    # Build a dictionary for the channel and add it to the list
    channel_list.append({'channel_title':channel_title,
                            'channel_id':channel_id,
                            'channel_subCount':channel_subCount,
                            'channel_videoCount':channel_videoCount,
                            'channel_viewCount':channel_viewCount})

# Create a DF from list
channel_data_df = pd.DataFrame(channel_list)
channel_data_df = channel_data_df.set_index('channel_id')

In [16]:
# Check the dtypes of columns
channel_data_df.dtypes

channel_title         object
channel_subCount      object
channel_videoCount    object
channel_viewCount     object
dtype: object

In [17]:
# Change some columns to int64
channel_data_df['channel_subCount'] = channel_data_df['channel_subCount'].astype('int64')
channel_data_df['channel_videoCount'] = channel_data_df['channel_videoCount'].astype('int64')
channel_data_df['channel_viewCount'] = channel_data_df['channel_viewCount'].astype('int64')

In [18]:
# Display the sorted dataframe based on subcount
channel_data_df.sort_values('channel_subCount',ascending=False).head(20)

Unnamed: 0_level_0,channel_title,channel_subCount,channel_videoCount,channel_viewCount
channel_id,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
UC295-Dw_tDNtZXFeAPAW6Aw,5-Minute Crafts,81100000,7445,27960945663
UC2etEuPIfohP4P53wM0KImA,5-Minute Crafts DIY,20300000,6640,5187110540
UCw5VDXH8up3pKUppIvcstNQ,"SLICK SLIME SAM - DIY, Comedy, Science",11200000,1477,5389464086
UCxBRABv7DQJqNn6zwkVFGsA,Ideas en 5 minutos DIY,6260000,6306,1223828792
UCwToG9qbHnaAFrllqt9TmNw,DIY & Crafts,5100000,19,1231019252
UCxFRrC-Xup5LVd7tyt0AGAQ,Ideias Incríveis DIY,4680000,6606,960715967
UCJsxAriYBUnKZVdMXZnGDoA,５分でできる DIY,3860000,6189,1666614796
UCnorhjQR4zJkT7AVNhu395Q,Home RenoVision DIY,3410000,941,559273237
UChKlSK39lLg8eZHIX0iVzLA,DIY Creators,3220000,200,253390531
UCSFKV-1TQsNpDvOv8H6UAQA,DIY Queen,3160000,329,674190084
