In [1]:
# Import necessary modules
import matplotlib.pyplot as plt
import matplotlib.dates as mdates
import pandas as pd
import numpy as np
import requests
import time
import random
from datetime import datetime
from pprint import pprint
import json

import googleapiclient.discovery
from googleapiclient.errors import HttpError
from config import api_key

# Define API Key (Replace with your API key)
API_KEY = api_key
max_results = 50

# Initialize YouTube API client
youtube = googleapiclient.discovery.build("youtube", "v3", developerKey=API_KEY)

In [4]:
# for a any general info use youtube.search().list() and part="snippet"
# for a channel use youtube.channels().list() and part="snippet,statistics,contentDetails,brandingSettings,topicDetails,status"
# for a video use youtube.videos().list() and part="snippet,statistics,contentDetails,status,topicDetails,player,liveStreamingDetails"

# Function to use Youtube API and find all channels related to 'query'
def fetch_all_channels(query, max_results=50):
    next_page_token = None
    all_channels = []

    while True:
        request = youtube.search().list(
            part="snippet",
            q=query,
            type="channel",
            maxResults=max_results,
            order="date",
            pageToken=next_page_token 
        )
        response = request.execute()

        all_channels.extend(response.get("items", []))
        
        next_page_token = response.get("nextPageToken")
        if not next_page_token:
            break

    return all_channels

In [5]:
# Use function above to find all channels related to DIY
channels = fetch_all_channels("DIY")

In [None]:
# Print total amount of channels found
print(f"Total channels retrieved: {len(channels)}")

In [9]:
# for a any general info use youtube.search().list() and part="snippet"
# for a channel use youtube.channels().list() and part="snippet,statistics,contentDetails,brandingSettings,topicDetails,status"
# for a video use youtube.videos().list() and part="snippet,statistics,contentDetails,status,topicDetails,player,liveStreamingDetails"

# Function to use Youtube API and find all channel data related to channelId's we got previously
def fetch_all_channels_data(channel_list_id, max_results=50):
    all_channels_data = []
    batch_size = 50  # Max allowed per API call
    
    # Grouping channel IDs into batches of 50
    for i in range(0, len(channel_list_id), batch_size):
        batch_ids = [channel_info['id']['channelId'] for channel_info in channel_list_id[i:i + batch_size]]
        
        # Fetch channel data in batches of 50
        request = youtube.channels().list(
            part="snippet,statistics,contentDetails,brandingSettings,topicDetails,status",
            id=",".join(batch_ids)  # Multiple channel IDs separated by commas
        )
        response = request.execute()

        all_channels_data.extend(response.get("items", []))

    return all_channels_data

In [10]:
# Use function above to find all channel data related to channelId's found
channels_data = fetch_all_channels_data(channels)

In [None]:
# Print total amount of channels found
print(f"Total channels retrieved: {len(channels_data)}")

In [15]:
# Write the json data pulled for channels to a file
filename = 'data/channels.json'
with open(filename, 'w') as file:
    json.dump(channels, file, indent=4)

In [16]:
# Write the json data pulled for channels_data to a file
filename = 'data/channels_data.json'
with open(filename, 'w') as file:
    json.dump(channels_data, file, indent=4)

In [27]:
# START HERE AFTER RUNNING FIRST CELL IF YOU DONT WANT TO USE API CALLS ON CHANNELS

# If you didnt run the API calls, run this to read in prevous json data from channels.json
filename = 'data/channels.json'
with open(filename, 'r') as file:
    channels = json.load(file)

In [28]:
# If you didnt run the API calls, run this to read in prevous json data from channels_data.json
filename = 'data/channels_data.json'
with open(filename, 'r') as file:
    channels_data = json.load(file)

In [29]:
# This code block will take the json file, grab the needed data, and turn it into a dataframe

channel_list = [] # List to hold channel data dictionaries
for channel in channels_data:
    channel_title = channel['brandingSettings']['channel']['title'] # get channel title
    channel_id = channel['id'] # get channel id
    try:
        channel_subCount = channel['statistics']['subscriberCount'] # get channel subscriber count, if it fails ...
    except KeyError:
        channel_subCount = '???' # set subscriber count to ???
    channel_videoCount = channel['statistics']['videoCount'] # get channel video count
    channel_viewCount = channel['statistics']['viewCount'] # get channel view count
    channel_upload_playlist = channel["contentDetails"]["relatedPlaylists"]["uploads"] # getchannel uplaod playlist for later use

    # Build a dictionary for the channel and add it to the list
    channel_list.append({'channel_title':channel_title,
                            'channel_id':channel_id,
                            'channel_upload_playlist':channel_upload_playlist,
                            'channel_subCount':channel_subCount,
                            'channel_videoCount':channel_videoCount,
                            'channel_viewCount':channel_viewCount})

# Create a DF from list
channel_data_df = pd.DataFrame(channel_list)
#channel_data_df = channel_data_df.set_index('channel_id')

In [30]:
# Check the dtypes of columns
channel_data_df.dtypes

channel_title              object
channel_id                 object
channel_upload_playlist    object
channel_subCount           object
channel_videoCount         object
channel_viewCount          object
dtype: object

In [31]:
# Change some columns to int64
channel_data_df['channel_subCount'] = channel_data_df['channel_subCount'].astype('int64')
channel_data_df['channel_videoCount'] = channel_data_df['channel_videoCount'].astype('int64')
channel_data_df['channel_viewCount'] = channel_data_df['channel_viewCount'].astype('int64')

In [32]:
# Display the sorted dataframe based on subcount
channel_data_df.sort_values('channel_subCount',ascending=False).head(20)

Unnamed: 0,channel_title,channel_id,channel_upload_playlist,channel_subCount,channel_videoCount,channel_viewCount
479,5-Minute Crafts,UC295-Dw_tDNtZXFeAPAW6Aw,UU295-Dw_tDNtZXFeAPAW6Aw,81100000,7445,27960945663
430,5-Minute Crafts DIY,UC2etEuPIfohP4P53wM0KImA,UU2etEuPIfohP4P53wM0KImA,20300000,6640,5187110540
438,"SLICK SLIME SAM - DIY, Comedy, Science",UCw5VDXH8up3pKUppIvcstNQ,UUw5VDXH8up3pKUppIvcstNQ,11200000,1477,5389464086
342,Ideas en 5 minutos DIY,UCxBRABv7DQJqNn6zwkVFGsA,UUxBRABv7DQJqNn6zwkVFGsA,6260000,6306,1223828792
334,DIY & Crafts,UCwToG9qbHnaAFrllqt9TmNw,UUwToG9qbHnaAFrllqt9TmNw,5100000,19,1231019252
372,Ideias Incríveis DIY,UCxFRrC-Xup5LVd7tyt0AGAQ,UUxFRrC-Xup5LVd7tyt0AGAQ,4680000,6606,960715967
432,５分でできる DIY,UCJsxAriYBUnKZVdMXZnGDoA,UUJsxAriYBUnKZVdMXZnGDoA,3860000,6189,1666614796
505,Home RenoVision DIY,UCnorhjQR4zJkT7AVNhu395Q,UUnorhjQR4zJkT7AVNhu395Q,3410000,941,559273237
551,DIY Creators,UChKlSK39lLg8eZHIX0iVzLA,UUhKlSK39lLg8eZHIX0iVzLA,3220000,200,253390531
466,DIY Queen,UCSFKV-1TQsNpDvOv8H6UAQA,UUSFKV-1TQsNpDvOv8H6UAQA,3160000,329,674190084


In [33]:
# Show the total amount of videos among every channel in channel_data_df
print(sum(channel_data_df['channel_videoCount'].values))

270166


In [47]:
# This cell will pull every uploaded video id and published date for every channel in channel_data_df

video_ids = []
all_video_data = []

MAX_RETRIES = 3

for index, row in channel_data_df.iterrows():
    channelid = row['channel_id']
    uploads_playlist_id = row['channel_upload_playlist']

    print(f"📌 Pulling data for (ChannelID: {channelid}, PlaylistID: {uploads_playlist_id}), Total Videos Sofar: {len(video_ids)} ...")
    
    next_page_token = None
    retries = 0

    while True:
        try:
            request = youtube.playlistItems().list(
                part="snippet,contentDetails,status",
                playlistId=uploads_playlist_id,
                maxResults=50,  # Max per request
                pageToken=next_page_token
            )
            response = request.execute()

            # Handle empty playlists (avoid KeyError)
            items = response.get("items", [])
            if not items:
                print(f"⚠️ No videos found for ChannelID: {channelid}")
                break  # Stop fetching if the playlist is empty

            for item in items:
                video_ids.append({
                    'channel_id': channelid,
                    'video_id': item["snippet"]["resourceId"]["videoId"],
                    'publishedAt': item["snippet"]['publishedAt']
                })

            all_video_data.extend(items)

            next_page_token = response.get("nextPageToken")
            if not next_page_token:
                break  # No more pages

        except HttpError as e:
            print(f"❌ API Error for Channel {channelid}: {e}")
            if e.resp.status == 403:  # Quota exceeded or rate limit
                print("⚠️ Quota exceeded or rate limited. Sleeping for 10 minutes...")
                time.sleep(600)  # Sleep 10 minutes before retrying
            elif e.resp.status in [500, 503]:  # Server errors (retryable)
                print("⚠️ Server error, retrying in 5 seconds...")
                time.sleep(5)
            else:
                print("⚠️ Non-retryable error, skipping this channel.")
                break  # Stop retrying on fatal errors
            
            retries += 1
            if retries >= MAX_RETRIES:
                print(f"⚠️ Too many failures for Channel {channelid}, skipping...")
                break  # Avoid infinite loops on repeated errors
    
    time.sleep(random.uniform(0.5, 1.5))

# Convert to DataFrame and Save as CSV
video_id_df = pd.DataFrame(video_ids)
csv_output_path = "data/youtube_videos_list.csv"
video_id_df.to_csv(csv_output_path, index=False)
print(f"📂 CSV file saved: {csv_output_path}")

json_filename = 'data/video_id_data.json'
try:
    with open(json_filename, 'w', encoding='utf-8') as file:
        json.dump(all_video_data, file, ensure_ascii=False, indent=4)
    print(f"📂 JSON file saved: {json_filename}")
except Exception as e:
    print(f"❌ Error saving JSON file: {e}")

output_path = "data/youtube_videos_list.csv"
video_id_df.to_csv(output_path, index=False)

print(f"✅ Data collection complete. Total videos collected: {len(video_id_df)}")

📌 Pulling data for (ChannelID: UCjAzObYQ1--u0ctr2Kt_PuQ, PlaylistID: UUjAzObYQ1--u0ctr2Kt_PuQ), Total Videos Sofar: 0 ...
📌 Pulling data for (ChannelID: UCSvJyHffQ48dw2oQVq9cgqQ, PlaylistID: UUSvJyHffQ48dw2oQVq9cgqQ), Total Videos Sofar: 5 ...
📌 Pulling data for (ChannelID: UCcAA1NgTk2TnKdwHEEflTWQ, PlaylistID: UUcAA1NgTk2TnKdwHEEflTWQ), Total Videos Sofar: 7 ...
📌 Pulling data for (ChannelID: UCEPzmkd5mqbwA6zIeVvFz4w, PlaylistID: UUEPzmkd5mqbwA6zIeVvFz4w), Total Videos Sofar: 8 ...
📌 Pulling data for (ChannelID: UCmmjeEz72_4FziVDwLYTFEg, PlaylistID: UUmmjeEz72_4FziVDwLYTFEg), Total Videos Sofar: 16 ...
📌 Pulling data for (ChannelID: UCxiIpT886l9Yy797IGGGC2g, PlaylistID: UUxiIpT886l9Yy797IGGGC2g), Total Videos Sofar: 17 ...
❌ API Error for Channel UCxiIpT886l9Yy797IGGGC2g: <HttpError 404 when requesting https://youtube.googleapis.com/youtube/v3/playlistItems?part=snippet%2CcontentDetails%2Cstatus&playlistId=UUxiIpT886l9Yy797IGGGC2g&maxResults=50&key=AIzaSyAquiTSOsMEhG_5LCGKSkDDEsw4MNaz

In [49]:
video_id_df.head(20)

Unnamed: 0,channel_id,video_id,publishedAt
0,UCjAzObYQ1--u0ctr2Kt_PuQ,myYfDsxcDA0,2025-01-17T08:41:29Z
1,UCjAzObYQ1--u0ctr2Kt_PuQ,TKhDNl5JqxM,2025-01-17T08:36:14Z
2,UCjAzObYQ1--u0ctr2Kt_PuQ,eww1W2QEX18,2025-01-15T19:34:59Z
3,UCjAzObYQ1--u0ctr2Kt_PuQ,5s1d6JQqfIU,2025-01-15T19:32:48Z
4,UCjAzObYQ1--u0ctr2Kt_PuQ,LgTwITNhWqk,2025-01-15T19:29:23Z
5,UCSvJyHffQ48dw2oQVq9cgqQ,IQYwsJQOecU,2025-01-17T23:15:00Z
6,UCSvJyHffQ48dw2oQVq9cgqQ,BgFHhNl9U7k,2025-01-15T11:33:56Z
7,UCcAA1NgTk2TnKdwHEEflTWQ,u-wFPHOAY0E,2025-01-17T19:56:13Z
8,UCEPzmkd5mqbwA6zIeVvFz4w,ykfSwWzM-SM,2025-01-18T12:18:11Z
9,UCEPzmkd5mqbwA6zIeVvFz4w,_p3O5mbHsx4,2025-01-18T09:01:01Z


In [54]:
# Convert to datetime
videoid_clean_time_df = video_id_df.copy()
videoid_clean_time_df['publishedAt'] = pd.to_datetime(video_id_df['publishedAt'])

# Extract date and time
videoid_clean_time_df['date_publishedAt'] = videoid_clean_time_df['publishedAt'].dt.date
videoid_clean_time_df['time_publishedAt'] = videoid_clean_time_df['publishedAt'].dt.time
videoid_clean_time_df = videoid_clean_time_df.drop(columns='publishedAt')
videoid_clean_time_df

Unnamed: 0,channel_id,video_id,date_publishedAt,time_publishedAt
0,UCjAzObYQ1--u0ctr2Kt_PuQ,myYfDsxcDA0,2025-01-17,08:41:29
1,UCjAzObYQ1--u0ctr2Kt_PuQ,TKhDNl5JqxM,2025-01-17,08:36:14
2,UCjAzObYQ1--u0ctr2Kt_PuQ,eww1W2QEX18,2025-01-15,19:34:59
3,UCjAzObYQ1--u0ctr2Kt_PuQ,5s1d6JQqfIU,2025-01-15,19:32:48
4,UCjAzObYQ1--u0ctr2Kt_PuQ,LgTwITNhWqk,2025-01-15,19:29:23
...,...,...,...,...
268966,UC_rUFviaLzBXUNcks0j2l4g,RUUhDi8tBTU,2015-02-05,17:04:07
268967,UC_rUFviaLzBXUNcks0j2l4g,tGSzBe8jrcE,2015-02-03,19:30:23
268968,UC_rUFviaLzBXUNcks0j2l4g,sJLNUwW6_bg,2015-02-02,22:40:09
268969,UC_rUFviaLzBXUNcks0j2l4g,aE3ArY9BBww,2015-01-30,15:45:50


In [59]:
# Merging both the channel_data_df and videoid_clean_time_df
channel_video_merged_df = pd.merge(videoid_clean_time_df, channel_data_df, on='channel_id', how='left')
print(channel_video_merged_df.shape) 
channel_video_merged_df.head(20)

(268971, 9)


Unnamed: 0,channel_id,video_id,date_publishedAt,time_publishedAt,channel_title,channel_upload_playlist,channel_subCount,channel_videoCount,channel_viewCount
0,UCjAzObYQ1--u0ctr2Kt_PuQ,myYfDsxcDA0,2025-01-17,08:41:29,DIY or DIE,UUjAzObYQ1--u0ctr2Kt_PuQ,0,5,33
1,UCjAzObYQ1--u0ctr2Kt_PuQ,TKhDNl5JqxM,2025-01-17,08:36:14,DIY or DIE,UUjAzObYQ1--u0ctr2Kt_PuQ,0,5,33
2,UCjAzObYQ1--u0ctr2Kt_PuQ,eww1W2QEX18,2025-01-15,19:34:59,DIY or DIE,UUjAzObYQ1--u0ctr2Kt_PuQ,0,5,33
3,UCjAzObYQ1--u0ctr2Kt_PuQ,5s1d6JQqfIU,2025-01-15,19:32:48,DIY or DIE,UUjAzObYQ1--u0ctr2Kt_PuQ,0,5,33
4,UCjAzObYQ1--u0ctr2Kt_PuQ,LgTwITNhWqk,2025-01-15,19:29:23,DIY or DIE,UUjAzObYQ1--u0ctr2Kt_PuQ,0,5,33
5,UCSvJyHffQ48dw2oQVq9cgqQ,IQYwsJQOecU,2025-01-17,23:15:00,ENA Diy,UUSvJyHffQ48dw2oQVq9cgqQ,1,1,2
6,UCSvJyHffQ48dw2oQVq9cgqQ,BgFHhNl9U7k,2025-01-15,11:33:56,ENA Diy,UUSvJyHffQ48dw2oQVq9cgqQ,1,1,2
7,UCcAA1NgTk2TnKdwHEEflTWQ,u-wFPHOAY0E,2025-01-17,19:56:13,Fan DIY,UUcAA1NgTk2TnKdwHEEflTWQ,0,1,0
8,UCEPzmkd5mqbwA6zIeVvFz4w,ykfSwWzM-SM,2025-01-18,12:18:11,DiY Arts and craft,UUEPzmkd5mqbwA6zIeVvFz4w,0,3,0
9,UCEPzmkd5mqbwA6zIeVvFz4w,_p3O5mbHsx4,2025-01-18,09:01:01,DiY Arts and craft,UUEPzmkd5mqbwA6zIeVvFz4w,0,3,0


In [62]:
# Check for any NA values
print(channel_video_merged_df.isna().any())

channel_id                 False
video_id                   False
date_publishedAt           False
time_publishedAt           False
channel_title              False
channel_upload_playlist    False
channel_subCount           False
channel_videoCount         False
channel_viewCount          False
dtype: bool


In [63]:
# Save the merged dataframe to a CSV
file_path = "data/full_videos_list.csv"
channel_video_merged_df.to_csv(file_path, index=False)

In [21]:
# Read in the full_video_list.csv
channel_video_merged_df = pd.read_csv("data/full_videos_list.csv")
channel_video_merged_df

Unnamed: 0,channel_id,video_id,date_publishedAt,time_publishedAt,channel_title,channel_upload_playlist,channel_subCount,channel_videoCount,channel_viewCount
0,UCjAzObYQ1--u0ctr2Kt_PuQ,myYfDsxcDA0,2025-01-17,08:41:29,DIY or DIE,UUjAzObYQ1--u0ctr2Kt_PuQ,0,5,33
1,UCjAzObYQ1--u0ctr2Kt_PuQ,TKhDNl5JqxM,2025-01-17,08:36:14,DIY or DIE,UUjAzObYQ1--u0ctr2Kt_PuQ,0,5,33
2,UCjAzObYQ1--u0ctr2Kt_PuQ,eww1W2QEX18,2025-01-15,19:34:59,DIY or DIE,UUjAzObYQ1--u0ctr2Kt_PuQ,0,5,33
3,UCjAzObYQ1--u0ctr2Kt_PuQ,5s1d6JQqfIU,2025-01-15,19:32:48,DIY or DIE,UUjAzObYQ1--u0ctr2Kt_PuQ,0,5,33
4,UCjAzObYQ1--u0ctr2Kt_PuQ,LgTwITNhWqk,2025-01-15,19:29:23,DIY or DIE,UUjAzObYQ1--u0ctr2Kt_PuQ,0,5,33
...,...,...,...,...,...,...,...,...,...
268966,UC_rUFviaLzBXUNcks0j2l4g,RUUhDi8tBTU,2015-02-05,17:04:07,DIY Life Tech,UU_rUFviaLzBXUNcks0j2l4g,15700,2587,13942576
268967,UC_rUFviaLzBXUNcks0j2l4g,tGSzBe8jrcE,2015-02-03,19:30:23,DIY Life Tech,UU_rUFviaLzBXUNcks0j2l4g,15700,2587,13942576
268968,UC_rUFviaLzBXUNcks0j2l4g,sJLNUwW6_bg,2015-02-02,22:40:09,DIY Life Tech,UU_rUFviaLzBXUNcks0j2l4g,15700,2587,13942576
268969,UC_rUFviaLzBXUNcks0j2l4g,aE3ArY9BBww,2015-01-30,15:45:50,DIY Life Tech,UU_rUFviaLzBXUNcks0j2l4g,15700,2587,13942576


In [22]:
channel_video_merged_df['date_publishedAt'] = pd.to_datetime(channel_video_merged_df['date_publishedAt'])
print(channel_video_merged_df.dtypes)
channel_video_merged_df

channel_id                         object
video_id                           object
date_publishedAt           datetime64[ns]
time_publishedAt                   object
channel_title                      object
channel_upload_playlist            object
channel_subCount                    int64
channel_videoCount                  int64
channel_viewCount                   int64
dtype: object


Unnamed: 0,channel_id,video_id,date_publishedAt,time_publishedAt,channel_title,channel_upload_playlist,channel_subCount,channel_videoCount,channel_viewCount
0,UCjAzObYQ1--u0ctr2Kt_PuQ,myYfDsxcDA0,2025-01-17,08:41:29,DIY or DIE,UUjAzObYQ1--u0ctr2Kt_PuQ,0,5,33
1,UCjAzObYQ1--u0ctr2Kt_PuQ,TKhDNl5JqxM,2025-01-17,08:36:14,DIY or DIE,UUjAzObYQ1--u0ctr2Kt_PuQ,0,5,33
2,UCjAzObYQ1--u0ctr2Kt_PuQ,eww1W2QEX18,2025-01-15,19:34:59,DIY or DIE,UUjAzObYQ1--u0ctr2Kt_PuQ,0,5,33
3,UCjAzObYQ1--u0ctr2Kt_PuQ,5s1d6JQqfIU,2025-01-15,19:32:48,DIY or DIE,UUjAzObYQ1--u0ctr2Kt_PuQ,0,5,33
4,UCjAzObYQ1--u0ctr2Kt_PuQ,LgTwITNhWqk,2025-01-15,19:29:23,DIY or DIE,UUjAzObYQ1--u0ctr2Kt_PuQ,0,5,33
...,...,...,...,...,...,...,...,...,...
268966,UC_rUFviaLzBXUNcks0j2l4g,RUUhDi8tBTU,2015-02-05,17:04:07,DIY Life Tech,UU_rUFviaLzBXUNcks0j2l4g,15700,2587,13942576
268967,UC_rUFviaLzBXUNcks0j2l4g,tGSzBe8jrcE,2015-02-03,19:30:23,DIY Life Tech,UU_rUFviaLzBXUNcks0j2l4g,15700,2587,13942576
268968,UC_rUFviaLzBXUNcks0j2l4g,sJLNUwW6_bg,2015-02-02,22:40:09,DIY Life Tech,UU_rUFviaLzBXUNcks0j2l4g,15700,2587,13942576
268969,UC_rUFviaLzBXUNcks0j2l4g,aE3ArY9BBww,2015-01-30,15:45:50,DIY Life Tech,UU_rUFviaLzBXUNcks0j2l4g,15700,2587,13942576


In [23]:
channel_video_2024_df = channel_video_merged_df[channel_video_merged_df['date_publishedAt'].dt.year == 2024]
channel_video_2024_df

Unnamed: 0,channel_id,video_id,date_publishedAt,time_publishedAt,channel_title,channel_upload_playlist,channel_subCount,channel_videoCount,channel_viewCount
133,UCMRnsnlHHtVt0WmTzoVyk-w,tQ5qWH05TvA,2024-12-31,12:36:22,DIY queen,UUMRnsnlHHtVt0WmTzoVyk-w,16,132,3047
134,UCMRnsnlHHtVt0WmTzoVyk-w,olOANmxDR30,2024-12-31,12:31:05,DIY queen,UUMRnsnlHHtVt0WmTzoVyk-w,16,132,3047
135,UCMRnsnlHHtVt0WmTzoVyk-w,9f5yz00-pc8,2024-12-30,12:14:28,DIY queen,UUMRnsnlHHtVt0WmTzoVyk-w,16,132,3047
136,UCMRnsnlHHtVt0WmTzoVyk-w,eTXdvWjtLY0,2024-12-29,11:40:14,DIY queen,UUMRnsnlHHtVt0WmTzoVyk-w,16,132,3047
137,UCMRnsnlHHtVt0WmTzoVyk-w,t2WWCMPvd1E,2024-12-29,11:25:44,DIY queen,UUMRnsnlHHtVt0WmTzoVyk-w,16,132,3047
...,...,...,...,...,...,...,...,...,...
266622,UC_rUFviaLzBXUNcks0j2l4g,1rS7DxsbAPQ,2024-01-16,17:22:28,DIY Life Tech,UU_rUFviaLzBXUNcks0j2l4g,15700,2587,13942576
266623,UC_rUFviaLzBXUNcks0j2l4g,BZ4k8zByVVI,2024-01-10,20:19:49,DIY Life Tech,UU_rUFviaLzBXUNcks0j2l4g,15700,2587,13942576
266624,UC_rUFviaLzBXUNcks0j2l4g,W4nr1HbrI1o,2024-01-10,20:17:01,DIY Life Tech,UU_rUFviaLzBXUNcks0j2l4g,15700,2587,13942576
266625,UC_rUFviaLzBXUNcks0j2l4g,X5oDk3vnq4M,2024-01-10,20:16:09,DIY Life Tech,UU_rUFviaLzBXUNcks0j2l4g,15700,2587,13942576


In [24]:
# Save the videos from 2024 dataframe to a CSV
file_path = "data/2024_videos_list.csv"
channel_video_2024_df.to_csv(file_path, index=False)

In [None]:
# for a any general info use youtube.search().list() and part="snippet"
# for a channel use youtube.channels().list() and part="snippet,statistics,contentDetails,brandingSettings,topicDetails,status"
# for a video use youtube.videos().list() and part="snippet,statistics,contentDetails,status,topicDetails,player,liveStreamingDetails"

# Not finished do not run

# all_video_data = []
# batch_size = 50  # Max per request

# for i in range(0, len(video_ids), batch_size):
#     batch = video_ids[i:i + batch_size]  # Process in batches of 50

#     request = youtube.videos().list(
#         part="snippet,statistics,contentDetails,status,topicDetails,player,liveStreamingDetails",
#         id=",".join(batch)
#     )
#     response = request.execute()
    
#     all_video_data.extend(response["items"])

# print(f"Retrieved data for {len(all_video_data)} videos")