## Import Libraries

In [1]:
import numpy as np
import pandas as pd

from googleapiclient.discovery import build
from googleapiclient.errors import HttpError
from IPython.display import JSON

In [2]:
api_key = "AIzaSyBmYjH8ZcoRErf625zswS2tJEeViFVArFU"
channel_id = "UC-qsP49Ai2GymJgyKX38l1w " # Robert Greene
# "UC-qsP49Ai2GymJgyKX38l1w" # Robert Greene
# "UCbzVRTkX3bzNZuBd9In4XyA" # Modern MBA

# Channel Statistics and Playlist Id

## Step 1: Getting JSON Response

In [3]:
youtube = build(
    serviceName='youtube', 
    version='v3', 
    developerKey=api_key
)

In [4]:
def get_channel_data(youtube, channel_id):
    request = youtube.channels().list(
        part="snippet,contentDetails,statistics",
        id = channel_id
    )
    response = request.execute()

    return JSON(response)

In [5]:
get_channel_data(youtube, channel_id)

<IPython.core.display.JSON object>

## Step 2: Getting Playlist id and Statistics

- Snippet -> title
- contentDetails -> relatedPlaylists -> uploads
- statistics -> views, subscribers, and videos

In [6]:
def get_channel_data(youtube, channel_id):
    request = youtube.channels().list(
        part="snippet,contentDetails,statistics",
        id = channel_id
    )
    response = request.execute()
    items = response.get('items', {})
    channel_items = items[0]

    snippet = channel_items.get('snippet', {})
    content_details = channel_items.get('contentDetails', {})
    statistics = channel_items.get('statistics', {})

    channel_details = {
        "channel_title" : snippet.get('title', None),
        "playlist_id" : content_details.get('relatedPlaylists', None).get('uploads', None),
        "total_views" : statistics.get('viewCount', 0),
        "total_subscribers": statistics.get('subscriberCount', 0),
        "total_videos": statistics.get('videoCount', 0)
    }

    return pd.DataFrame([channel_details])

In [7]:
channel_df = get_channel_data(youtube, channel_id)
channel_df

Unnamed: 0,channel_title,playlist_id,total_views,total_subscribers,total_videos
0,Robert Greene,UU-qsP49Ai2GymJgyKX38l1w,115840350,1000000,460


## Step 3: Handling Exceptions

- No Proper API Key
- No Channel ID

In [8]:
def get_channel_data(youtube, channel_id):

    try:
        request = youtube.channels().list(
            part="snippet,contentDetails,statistics",
            id = channel_id
        )
        response = request.execute()
        items = response.get('items', {})

        if not items:
            return "No Channel Found. Please provide a proper channel Id."
            
        channel_items = items[0]
        snippet = channel_items.get('snippet', {})
        content_details = channel_items.get('contentDetails', {})
        statistics = channel_items.get('statistics', {})
    
        channel_details = {
            "channel_title" : snippet.get('title', None),
            "playlist_id" : content_details.get('relatedPlaylists', None).get('uploads', None),
            "total_views" : statistics.get('viewCount', 0),
            "total_subscribers": statistics.get('subscriberCount', 0),
            "total_videos": statistics.get('videoCount', 0)
        }
    
        return pd.DataFrame([channel_details])
    except HttpError as e:
        if e.resp.status == 400:
            print('No API Found')
        else:
            print(e)

In [9]:
get_channel_data(youtube, channel_id)

Unnamed: 0,channel_title,playlist_id,total_views,total_subscribers,total_videos
0,Robert Greene,UU-qsP49Ai2GymJgyKX38l1w,115840350,1000000,460


# Retrieving video ids using playlist items

## Step 1: Getting JSON Response

In [10]:
channel_df = get_channel_data(youtube, channel_id)
channel_df

Unnamed: 0,channel_title,playlist_id,total_views,total_subscribers,total_videos
0,Robert Greene,UU-qsP49Ai2GymJgyKX38l1w,115840350,1000000,460


In [11]:
playlist_id = channel_df.loc[0, 'playlist_id']
playlist_id

'UU-qsP49Ai2GymJgyKX38l1w'

In [12]:
def fetch_video_ids(youtube, playlist_id):
    request = youtube.playlistItems().list(
        part='contentDetails',
        playlistId = playlist_id)

    response = request.execute()
    return JSON(response)
    

In [13]:
fetch_video_ids(youtube, playlist_id)

<IPython.core.display.JSON object>

## Step 2: Getting One Video ID

In [14]:
def fetch_video_ids(youtube, playlist_id):
    request = youtube.playlistItems().list(
        part='contentDetails',
        playlistId = playlist_id)

    response = request.execute()

    items = response.get('items')[0]

    content_details = items.get('contentDetails', {})

    video_id = content_details.get("videoId")
    return video_id
    

In [15]:
fetch_video_ids(youtube, playlist_id)

'VQx4MB2oS_M'

## Step 3: Getting Max Video IDs

In [16]:
def fetch_video_ids(youtube, playlist_id):
    all_video_ids = []
    
    request = youtube.playlistItems().list(
        part='contentDetails',
        playlistId = playlist_id,
        maxResults = 50
    )

    response = request.execute()

    items = response.get('items')

    for item in items:

        content_details = item.get('contentDetails', {})
    
        video_id = content_details.get("videoId")

        all_video_ids.append(video_id)
        
    return all_video_ids
    

In [17]:
len(fetch_video_ids(youtube, playlist_id))

50

## Step 4: Using PageToken

In [18]:
def fetch_video_ids(youtube, playlist_id):
    all_video_ids = []
    page_token = None

    while True:
        request = youtube.playlistItems().list(
            part='contentDetails',
            playlistId = playlist_id,
            maxResults = 50,
            pageToken = page_token
        )
    
        response = request.execute()
        items = response.get('items')
    
        for item in items:
    
            content_details = item.get('contentDetails', {})
            video_id = content_details.get("videoId")
            all_video_ids.append(video_id)
    
        page_token = response.get('nextPageToken', None)
        
        if not page_token:
            break
        
    return all_video_ids
    

In [19]:
video_ids = fetch_video_ids(youtube, playlist_id)

## Step 5: Handling Exceptions

- Channel with no Videos

In [20]:
def fetch_video_ids(youtube, playlist_id):
    try:
        all_video_ids = []
        page_token = None
    
        while True:
            request = youtube.playlistItems().list(
                part='contentDetails',
                playlistId = playlist_id,
                maxResults = 50,
                pageToken = page_token
            )
        
            response = request.execute()
            items = response.get('items')
        
            for item in items:
        
                content_details = item.get('contentDetails', {})
                video_id = content_details.get("videoId")
                all_video_ids.append(video_id)
        
            page_token = response.get('nextPageToken', None)
            
            if not page_token:
                break
            
        return all_video_ids
    except HttpError as e:
        if e.resp.status == 404:
            print("No Videos in the channel")
        else:
            print(e)

In [21]:
video_ids = fetch_video_ids(youtube, playlist_id)

In [22]:
len(video_ids)

460

# Getting Video Meta Data

## Step 1: Getting JSON Response

In [23]:
def get_video_meta_data(youtube, video_ids):

    request= youtube.videos().list(
        part="snippet,contentDetails,statistics",
        id = ",".join(video_ids)
    )

    response = request.execute()
    return JSON(response)

In [24]:
get_video_meta_data(youtube, video_ids[0:5])

<IPython.core.display.JSON object>

- Snippet -> published at, title, description, tags
- contentDetails -> duration
- Statistics -> views, likes, comments

## Step 2: Getting video meta data for first 50 items

In [47]:
def get_video_meta_data(youtube, video_ids):

    all_video_details = []

    request= youtube.videos().list(
        part="snippet,contentDetails,statistics",
        id = ",".join(video_ids[0:50])
    )

    response = request.execute()

    items = response.get('items', [])

    for item in items:
        snippet = item.get('snippet', {}),
        content_details = item.get('contentDetails', {}),
        statistics = item.get('statistics', {})

        video_details = {
            'video_title' : snippet[0].get('title', None),
            'description' : snippet[0].get('description', None),
            'published_at': snippet[0].get('publishedAt', None),
            'tags':snippet[0].get('tags', None),
            'duration':content_details[0].get('duration', None),
            'views':statistics.get('viewCount', 0),
            'likes':statistics.get('likeCount', 0),
            'comments':statistics.get('commentCount', 0)
        }

        all_video_details.append(video_details)

    return pd.DataFrame(all_video_details)

In [49]:
# get_video_meta_data(youtube, video_ids)

## Step 3: getting the complete dataframe

In [52]:
def get_video_meta_data(youtube, video_ids):

    all_video_details = []

    for i in range(0, len(video_ids), 50):
        request= youtube.videos().list(
            part="snippet,contentDetails,statistics",
            id = ",".join(video_ids[i:i+50])
        )
    
        response = request.execute()
    
        items = response.get('items', [])
    
        for item in items:
            snippet = item.get('snippet', {}),
            content_details = item.get('contentDetails', {}),
            statistics = item.get('statistics', {})
    
            video_details = {
                'video_title' : snippet[0].get('title', None),
                'description' : snippet[0].get('description', None),
                'published_at': snippet[0].get('publishedAt', None),
                'tags':snippet[0].get('tags', None),
                'duration':content_details[0].get('duration', None),
                'views':statistics.get('viewCount', 0),
                'likes':statistics.get('likeCount', 0),
                'comments':statistics.get('commentCount', 0)
            }
    
            all_video_details.append(video_details)

    return pd.DataFrame(all_video_details)

In [54]:
video_df = get_video_meta_data(youtube, video_ids)

## Step 4: Handling Exceptions

In [55]:
def get_video_meta_data(youtube, video_ids):

    try:

        all_video_details = []
    
        for i in range(0, len(video_ids), 50):
            request= youtube.videos().list(
                part="snippet,contentDetails,statistics",
                id = ",".join(video_ids[i:i+50])
            )
        
            response = request.execute()
        
            items = response.get('items', [])
        
            for item in items:
                snippet = item.get('snippet', {}),
                content_details = item.get('contentDetails', {}),
                statistics = item.get('statistics', {})
        
                video_details = {
                    'video_title' : snippet[0].get('title', None),
                    'description' : snippet[0].get('description', None),
                    'published_at': snippet[0].get('publishedAt', None),
                    'tags':snippet[0].get('tags', None),
                    'duration':content_details[0].get('duration', None),
                    'views':statistics.get('viewCount', 0),
                    'likes':statistics.get('likeCount', 0),
                    'comments':statistics.get('commentCount', 0)
                }
        
                all_video_details.append(video_details)
    
        return pd.DataFrame(all_video_details)
    except HttpError as e:
        print(e)

In [56]:
video_df = get_video_meta_data(youtube, video_ids)

In [57]:
video_df

Unnamed: 0,video_title,description,published_at,tags,duration,views,likes,comments
0,Know Your Form of Intelligence I Robert Greene,"Get your copy of ""Frames of Mind"": https://amz...",2023-12-13T17:00:19Z,"[robert greene, robert greene books, the laws ...",PT52S,84345,10156,68
1,Die With No Regrets I Robert Greene,Robert Greene is the author of the New York Ti...,2023-12-11T17:00:28Z,"[robert greene, 48 laws of power, the 48 laws ...",PT44S,62792,7304,66
2,The Daily Laws Summarized in Under 6 Minutes b...,"""The Daily Laws"" is the perfect entry point fo...",2023-12-08T17:00:21Z,"[robert greene, the 33 strategies of war, the ...",PT5M6S,28771,1655,64
3,"Seduction Advice: Less Thinking, More Feeling ...",Robert Greene is the author of the New York Ti...,2023-12-07T17:00:30Z,,PT58S,194467,18183,167
4,Your Internal Radar I Robert Greene,@hubermanlab \n\nRobert Greene is the author o...,2023-12-06T17:00:27Z,"[robert greene, robert greene books, robert gr...",PT58S,72626,7705,94
...,...,...,...,...,...,...,...,...
455,It takes time to develop a skill | Robert Greene,Order my new book ⬇️ The Daily Laws | 48 Laws ...,2021-10-03T22:09:10Z,,PT46S,83601,9262,66
456,Robert Greene | Wolf of Wall Street | The prim...,Order my new book ⬇️ The Daily Laws | 48 Laws ...,2021-07-18T16:55:21Z,"[Robert Greene, Wolf of Wall Street, 48 Laws o...",PT55S,429803,34148,263
457,Robert Greene and Ryan Holiday: The Madness of...,Follow Me on Social Media:\nInstagram: https:/...,2020-10-02T16:00:15Z,"[RobertGreene, 48Laws, 48lawsofpower, mastery,...",PT1H4M16S,68416,2582,430
458,Irrationality I Robert Greene,Follow Me on Social Media:\nInstagram: https:/...,2020-08-29T23:19:39Z,"[RobertGreene, 48Laws, 48lawsofpower, mastery,...",PT1H1M19S,289549,12618,1247


In [59]:
video_df.to_csv('Robert_Greene.csv', index=False)