In [1]:
from googleapiclient.discovery import build 
import pandas as pd 
from dotenv import load_dotenv
import os
import re

In [2]:
load_dotenv()
api_key =  os.getenv("API_KEY")
channel_id = "UCWsDFcIhY2DBi3GB5uykGXA"

youtube = build('youtube', 'v3', developerKey=api_key)

Get Channel Data

In [3]:
def get_channel_stats(youtube, channel_id):
    
    request = youtube.channels().list(
        part="snippet,statistics",
        id=channel_id
    )
    response = request.execute()

    data =  dict(Channel_name = response['items'][0]['snippet']['title'], 
                 Subscribers = response['items'][0]['statistics']['subscriberCount'],
                 Views = response['items'][0]['statistics']['viewCount'],
                 Total_videos = response['items'][0]['statistics']['videoCount']
                 )
    
    return data

In [4]:
get_channel_stats(youtube, channel_id)

{'Channel_name': 'IShowSpeed',
 'Subscribers': '34200000',
 'Views': '3406776148',
 'Total_videos': '1508'}

Collecting all Speed's YouTube Videos' Statistics

In [5]:
#
def get_channel_videos(channel_id):

    channel_response = youtube.channels().list(
        part='contentDetails',
        id=channel_id
    ).execute()

    uploads_playlist_id = channel_response['items'][0]['contentDetails']['relatedPlaylists']['uploads']

    videos = []
    next_page_token = None

    while True:
        playlist_response = youtube.playlistItems().list(
            part='snippet,contentDetails',
            playlistId=uploads_playlist_id,
            maxResults=50,
            pageToken=next_page_token
        ).execute()

        for item in playlist_response['items']:
            video_id = item['contentDetails']['videoId']
            videos.append(video_id)

        next_page_token = playlist_response.get('nextPageToken')

        if not next_page_token:
            break

    return videos

video_ids = get_channel_videos(channel_id)


# Function to parse duration

def parse_duration(duration):
    pattern = r'PT(\d+H)?(\d+M)?(\d+S)?'
    match = re.match(pattern, duration)

    hours = int(match.group(1)[:-1]) if match.group(1) else 0
    minutes = int(match.group(2)[:-1]) if match.group(2) else 0
    seconds = int(match.group(3)[:-1]) if match.group(3) else 0

    total_seconds = hours * 3600 + minutes * 60 + seconds
    formatted_time = f"{hours:02}:{minutes:02}:{seconds:02}"

    return total_seconds, formatted_time


# Function to fetch video statistics

def get_video_statistics(video_ids):
    stats = []
    for video_id in video_ids:
        response = youtube.videos().list(
            part="snippet,contentDetails,statistics",
            id=video_id
        ).execute()

        for item in response['items']:
            duration = item['contentDetails']['duration']
            total_seconds, formatted_time = parse_duration(duration)

            stats.append({
                'videoId': video_id,
                'videoTitle': item['snippet']['title'],
                # 'videoDescription': item['snippet']['description'],
                'datePosted': item['snippet']['publishedAt'],
                'duration': formatted_time,
                'totalSeconds': total_seconds,
                'views': item['statistics']['viewCount'],
                'likes': item['statistics'].get('likeCount', 0),
                'comments': item['statistics'].get('commentCount', 0)
            })
    return stats

video_stats = get_video_statistics(video_ids)
# video_stats

In [6]:
df = pd.DataFrame(video_stats)

In [7]:
df.head()

Unnamed: 0,videoId,videoTitle,datePosted,duration,totalSeconds,views,likes,comments
0,dpgRc8PvUIo,PLAYING FORTNITE UNTIL WE WIN pt 5 ⛏️ ft. Kai ...,2024-12-30T07:06:24Z,11:06:50,40010,8400478,172883,6263
1,GLCnbEunjSs,PLAYING FORTNITE UNTIL WE WIN pt 4 ⛏️ ft. Kai ...,2024-12-29T19:43:30Z,11:14:59,40499,5871678,131644,766
2,tyIuYkABPfo,PLAYING FORTNITE UNTIL WE WIN pt 3⛏️ ft. Kai C...,2024-12-29T08:35:24Z,11:35:18,41718,7631389,167410,1507
3,rgNcb8PTvgc,PLAYING FORTNITE UNTIL WE WIN pt 2⛏️ ft. Kai C...,2024-12-28T20:59:07Z,11:41:35,42095,6473120,165893,792
4,qkrO2TkbL2A,PLAYING FORTNITE UNTIL WE WIN ⛏️ ft. Kai Cenat...,2024-12-28T09:11:00Z,11:41:40,42100,7847431,212490,1782


In [8]:
df.to_csv('iShowSpeedVidData.csv', index=False)