## Import Libraries

In [1]:
import pandas as pd
import numpy as np

In [2]:
from googleapiclient.discovery import build
from googleapiclient.errors import HttpError
from IPython.display import JSON

##  Step 1: Channel Meta Data and Playlist Id

- snippet -> title
- contentDetails -> Related Playlist -> Uploads
- statistics -> viewCount, subscriberCount, videoCount

In [3]:
def get_channel_details(youtube, channel_id):
    try:
        request = youtube.channels().list(
            part="snippet, contentDetails, statistics",
            id=channel_id)
        response = request.execute()
        
        items = response.get('items', [])
    
        if not items:
            return "Channel Not Found. Please Check your Channel ID."
            
        channel = items[0] 
        snippet = channel.get('snippet', {})
        content_details = channel.get("contentDetails", {})
        statistics = channel.get('statistics', {})
    
        channel_details = {
            'channel_title': snippet.get('title', None),
            'playlist_id': content_details.get('relatedPlaylists', {}).get('uploads', None),
            'total_views': statistics.get('viewCount', 0),
            'total_subscribers': statistics.get('subscriberCount', 0),
            'total_videos': statistics.get('videoCount', 0),
            }
        return pd.DataFrame([channel_details])
    except HttpError as e:
        if e.resp.status == 400:
            return "API Key not Found"
        else:
            return (f"Error getting channel details: {e}")

## Step 2: Retrieving Video id's using Playlist id

- Items -> contentDetails -> VideoId

In [4]:
def fetch_video_ids(youtube, playlist_id):
    all_video_ids = []
    page_token = None

    try:
        while True:
            request = youtube.playlistItems().list(
                part='contentDetails',
                playlistId = playlist_id,
                maxResults=50,
                pageToken = page_token
            )
    
            response = request.execute()
            items = response.get('items', [])
            
            for item in items:
                video_id = item.get("contentDetails", None).get("videoId", None)
                all_video_ids.append(video_id)
    
            page_token = response.get('nextPageToken')
            if not page_token:
                break
            
        return all_video_ids
            
    except HttpError as e:
        return e

## Step 3: Fetch the Video Meta Data 

- snippet -> title, publishedAt, description, channelTitle, tags
- contentDetails -> duration
- statistics -> viewCount, likeCount, favouriteCount, commentCount

In [5]:
def get_video_meta_data(youtube, video_ids):
    all_video_details= []

    try:
        for i in range(0, len(video_ids), 50):
            request = youtube.videos().list(
                part="snippet, contentDetails, statistics",
                id = ",".join(video_ids[i:i+50])
            )
            response = request.execute()
        
            items = response.get('items', [])
        
            for item in items:
                snippet = item.get("snippet", {})
                content_details = item.get('contentDetails', {})
                statistics = item.get('statistics', {})
        
                tag_list = snippet.get('tags', [])
                tags = ",".join(tag_list) if tag_list else None
        
                video_details = {
                    'video_title': snippet.get('title', None),
                    'description': snippet.get('description', None),
                    'published_at': snippet.get('publishedAt', None),
                    'tags': tags,
                    'video_duration': content_details.get('duration', None),
                    'views': statistics.get('viewCount', 0),
                    'likes': statistics.get('likeCount', 0),
                    'favorites': statistics.get('favoriteCount', 0),
                    'comments': statistics.get('commentCount', 0)
                }
                all_video_details.append(video_details)
            
        return pd.DataFrame(all_video_details)

    except HttpError as e:
        print('Error getting video Details:{e}')

In [11]:
api_key = "AIzaSyDeM3TTNamojpsEiPZ-yNmBSgyp8sZJB4E"
channel_id = "UCBauXqj4Ksf1Rhz7v8i7hlQ  "

In [12]:
youtube = build(
    serviceName='youtube',
    version='v3',
    developerKey=api_key)

In [13]:
# get the playlist_id and channel statistics
channel_df = get_channel_details(youtube, channel_id)

# Dynamically retrieve the playlist_id from the channel dataframe
playlist = channel_df.loc[0, 'playlist_id']

# fetch all the videos and store in a list
video_ids = fetch_video_ids(youtube, playlist)

# fetch the video meta data from the video_ids
video_df = get_video_meta_data(youtube, video_ids)

# merge both the channel_df and video_df
total_df = pd.merge(video_df, channel_df, how='cross')

TypeError: object of type 'HttpError' has no len()

In [9]:
# total_df.sample(5)

In [10]:
total_df.to_csv('youtube_data.csv', index=False)