In [1]:
from googleapiclient.discovery import build
from IPython.display import JSON

In [2]:
import pandas as pd

In [3]:
# api_key = 'API Key'

In [4]:
channel_ids = ['UCJQJAI7IjbLcpsjWdSzYz0Q', # Thu Vu Data Analytics
               #more channels here
              ]

In [5]:
api_service_name = "youtube"
api_version = "v3"


# Get credentials and create an API client
youtube = build(
    api_service_name, api_version, developerKey=api_key)

In [6]:


def get_channel_stats(youtube, channel_ids):
    
    """
    Get channel stats
    
    Params:
    ------
    youtube: build object of Youtube API
    channel_ids: list of channel IDs
    
    Returns:
    ------
    dataframe with all channel stats for each channel ID
    
    """
    
    all_data = []
    
    request = youtube.channels().list(
        part="snippet,contentDetails,statistics",
        id=','.join(channel_ids)
    )
    response = request.execute()

    # loop through items
    for item in response['items']:
        data = {'channelName': item['snippet']['title'],
                'subscribers': item['statistics']['subscriberCount'],
                'views': item['statistics']['viewCount'],
                'totalVideos': item['statistics']['videoCount'],
                'playlistId': item['contentDetails']['relatedPlaylists']['uploads']
        }
        
        all_data.append(data)
        
    return(pd.DataFrame(all_data))

def get_video_ids(youtube, playlist_id):
    
    video_ids = []
    
    request = youtube.playlistItems().list(
        part="snippet,contentDetails",
        playlistId=playlist_id,
        maxResults = 50
    )
    response = request.execute()
    
    for item in response['items']:
        video_ids.append(item['contentDetails']['videoId'])
        
    next_page_token = response.get('nextPageToken')
    while next_page_token is not None:
        request = youtube.playlistItems().list(
                    part='contentDetails',
                    playlistId = playlist_id,
                    maxResults = 50,
                    pageToken = next_page_token)
        response = request.execute()

        for item in response['items']:
            video_ids.append(item['contentDetails']['videoId'])

        next_page_token = response.get('nextPageToken')
        
    return video_ids
    
    
def get_video_details(youtube, video_ids):

    all_video_info = []
    
    for i in range(0, len(video_ids), 50):
        request = youtube.videos().list(
            part="snippet,contentDetails,statistics",
            id=','.join(video_ids[i:i+50])
        )
        response = request.execute() 

        for video in response['items']:
            stats_to_keep = {'snippet': ['channelTitle', 'title', 'description', 'tags', 'publishedAt'],
                             'statistics': ['viewCount', 'likeCount', 'favouriteCount', 'commentCount'],
                             'contentDetails': ['duration', 'definition', 'caption']
                            }
            video_info = {}
            video_info['video_id'] = video['id']

            for k in stats_to_keep.keys():
                for v in stats_to_keep[k]:
                    try:
                        video_info[v] = video[k][v]
                    except:
                        video_info[v] = None

            all_video_info.append(video_info)
    
    return pd.DataFrame(all_video_info)

# def get_comments_in_videos(youtube, video_ids):
#     """
#     Get top level comments as text from all videos with given IDs (only the first 10 comments due to quote limit of Youtube API)
#     Params:
    
#     youtube: the build object from googleapiclient.discovery
#     video_ids: list of video IDs
    
#     Returns:
#     Dataframe with video IDs and associated top level comment in text.
    
#     """
#     all_comments = []
    
#     for video_id in video_ids:
#         try:   
#             request = youtube.commentThreads().list(
#                 part="snippet,replies",
#                 videoId=video_id
#             )
#             response = request.execute()
        
#             comments_in_video = [comment['snippet']['topLevelComment']['snippet']['textOriginal'] for comment in response['items'][0:10]]
#             comments_in_video_info = {'video_id': video_id, 'comments': comments_in_video}

#             all_comments.append(comments_in_video_info)
            
#         except: 
#             # When error occurs - most likely because comments are disabled on a video
#             print('Could not get comments for video ' + video_id)
        
#     return pd.DataFrame(all_comments) 



In [7]:
channel_stats = get_channel_stats(youtube, channel_ids)

In [8]:
channel_stats

Unnamed: 0,channelName,subscribers,views,totalVideos,playlistId
0,Thu Vu data analytics,166000,5353692,74,UUJQJAI7IjbLcpsjWdSzYz0Q


In [9]:
request = youtube.playlistItems().list(
    part="snippet,contentDetails",
    playlistId="UUJQJAI7IjbLcpsjWdSzYz0Q"
)
response = request.execute()

JSON(response)

<IPython.core.display.JSON object>

In [10]:
playlist_id = 'UUJQJAI7IjbLcpsjWdSzYz0Q'

In [11]:
video_ids = get_video_ids(youtube, playlist_id)

In [12]:
request = youtube.videos().list(
    part="snippet,contentDetails,statistics",
    id=video_ids[0:5]
)
response = request.execute()

JSON(response)

<IPython.core.display.JSON object>

In [13]:
video_df = get_video_details(youtube, video_ids)

In [14]:
video_df

Unnamed: 0,video_id,channelTitle,title,description,tags,publishedAt,viewCount,likeCount,favouriteCount,commentCount,duration,definition,caption
0,EzVIkHQWnhc,Thu Vu data analytics,How to Handle Sensitive Data in Data Science (...,🏆 Check out Antigranular competition & win cas...,"[data analytics, data science, python, data, t...",2023-08-22T00:29:27Z,3959,223,,22,PT12M19S,hd,false
1,8sLq4P-QDL0,Thu Vu data analytics,I Tried ChatGPT Code Interpreter for Data Task...,I tell ChatGPT Code Interpreter to do my job 😅...,"[data analytics, data science, python, data, t...",2023-07-15T19:02:17Z,32191,1478,,50,PT58S,hd,false
2,3736Pd_hwFQ,Thu Vu data analytics,40 Productivity Tips that Make You More Pro in...,📩 Interested in hearing about my coming course...,"[data analytics, data science, python, data, t...",2023-07-08T09:45:30Z,11458,606,,59,PT1H16M9S,hd,false
3,PvnSSrnjLX8,Thu Vu data analytics,Self-taught vs. Guided Program: CareerFoundry'...,📊 Enroll with this link to get 10% off the Dat...,"[data analytics, data science, python, data, t...",2023-06-16T22:57:31Z,6648,206,,33,PT16M32S,hd,false
4,S5U76LPu_bQ,Thu Vu data analytics,Is a Computer Science Degree WORTHLESS 🤯? (in ...,I've been thinking lately if it's still worth ...,"[data analytics, data science, python, data, t...",2023-06-08T16:10:11Z,16630,613,,52,PT11M8S,hd,false
...,...,...,...,...,...,...,...,...,...,...,...,...,...
69,5LWoJAh-kww,Thu Vu data analytics,Data Analyst Skill Stack // How I Became A Dat...,👩🏻‍💻 My laptop and iPad for doing DS/ study 👉 ...,,2021-06-16T10:16:47Z,31948,1772,,72,PT10M35S,hd,false
70,_RzoHVWKwq4,Thu Vu data analytics,Effective visual note-taking on iPad + Downloa...,Hi there! In this video I’m showing you how to...,"[Note-taking, Study, iPad Pro, Visual note-tak...",2021-06-09T06:58:21Z,9235,334,,10,PT8M19S,hd,false
71,hWKLO7GtpiU,Thu Vu data analytics,Data scientist is NOT the only SEXY job // All...,Hi there! In this video I explain different da...,,2021-06-01T22:51:42Z,8355,284,,20,PT14M4S,hd,false
72,dBZqggW22rs,Thu Vu data analytics,"How I take notes on iPad Pro | Notion, Notes, ...",My note-taking system tour in Notion and tips ...,"[Notes, Study, Productivity, Notion, Procreate...",2021-05-16T22:47:12Z,9608,246,,18,PT9M7S,hd,false


In [15]:
channel_ids = ['UCJQJAI7IjbLcpsjWdSzYz0Q', # Thu Vu Data Analytics
               'UCCezIgC97PvUuR4_gbFUs5g', # Corey Schafer
               'UCfzlCWGWYyIQ0aLC5w48gBQ', # Sentdex
               'UCtYLUTtgS3k1Fg4y5tAhLbw' # Statquest
               #more channels here
              ]

In [16]:
channel_data = get_channel_stats(youtube, channel_ids)

In [17]:
channel_data

Unnamed: 0,channelName,subscribers,views,totalVideos,playlistId
0,StatQuest with Josh Starmer,998000,53268099,263,UUtYLUTtgS3k1Fg4y5tAhLbw
1,sentdex,1270000,111618352,1248,UUfzlCWGWYyIQ0aLC5w48gBQ
2,Thu Vu data analytics,166000,5353692,74,UUJQJAI7IjbLcpsjWdSzYz0Q
3,Corey Schafer,1200000,88881156,232,UUCezIgC97PvUuR4_gbFUs5g


In [18]:
# Convert count columns to numeric columns
numeric_cols = ['subscribers', 'views', 'totalVideos']
channel_data[numeric_cols] = channel_data[numeric_cols].apply(pd.to_numeric, errors='coerce')

In [19]:
video_df = pd.DataFrame()
# comments_df = pd.DataFrame()

for c in channel_data['channelName'].unique():
    print("Getting video information from channel: " + c)
    playlist_id = channel_data.loc[channel_data['channelName']== c, 'playlistId'].iloc[0]
    video_ids = get_video_ids(youtube, playlist_id)
    
    # get video data
    video_data = get_video_details(youtube, video_ids)
    # get comment data
    # comments_data = get_comments_in_videos(youtube, video_ids)

    # append video data together and comment data toghether
    video_df = video_df.append(video_data, ignore_index=True)
    # comments_df = comments_df.append(comments_data, ignore_index=True)

Getting video information from channel: StatQuest with Josh Starmer


  video_df = video_df.append(video_data, ignore_index=True)


Getting video information from channel: sentdex


  video_df = video_df.append(video_data, ignore_index=True)


Getting video information from channel: Thu Vu data analytics


  video_df = video_df.append(video_data, ignore_index=True)


Getting video information from channel: Corey Schafer


  video_df = video_df.append(video_data, ignore_index=True)


In [20]:
video_df

Unnamed: 0,video_id,channelTitle,title,description,tags,publishedAt,viewCount,likeCount,favouriteCount,commentCount,duration,definition,caption
0,Ka04Dj7DxGk,StatQuest with Josh Starmer,Another 3 lessons from my Pop!!!,Since September 4th is Global Frank Starmer Da...,"[Josh Starmer, StatQuest, Machine Learning, St...",2023-09-04T04:00:16Z,6052,241,,36,PT6M46S,hd,false
1,bQ5BoolX9Ag,StatQuest with Josh Starmer,"Decoder-Only Transformers, ChatGPTs specific T...","Transformers are taking over AI right now, and...","[Josh Starmer, StatQuest, Machine Learning, St...",2023-08-28T04:00:30Z,19394,690,,104,PT36M45S,hd,false
2,zxQyTK8quyY,StatQuest with Josh Starmer,"Transformer Neural Networks, ChatGPT's foundat...",Transformer Neural Networks are the heart of p...,"[Josh Starmer, StatQuest, Machine Learning, Ne...",2023-07-24T04:00:34Z,156591,5174,,596,PT36M15S,hd,true
3,8ZcccMzTz7Y,StatQuest with Josh Starmer,What is a Logit?,I used to see references to logit values or lo...,"[Josh Starmer, StatQuest, Machine Learning, St...",2023-06-19T04:00:04Z,9788,343,,26,PT29S,hd,false
4,YaQEUgIr4Mk,StatQuest with Josh Starmer,Logistic vs Logit Functions,"Gosh, here are some terms that I could never g...","[Josh Starmer, StatQuest, Machine Learning, St...",2023-06-12T04:00:16Z,5966,261,,18,PT30S,hd,false
...,...,...,...,...,...,...,...,...,...,...,...,...,...
1814,zlWkB4bzZsg,Corey Schafer,Lab Puppy playing fetch in a lake,My chocolate lab puppy and I playing fetch at ...,"[Dog, Puppy, Chocolate Lab, Labrador, Labrador...",2014-04-25T15:34:01Z,13189,434,,22,PT1M2S,hd,false
1815,_63O1hgJTaQ,Corey Schafer,Lab Puppy playing fetch in a creek,My chocolate lab puppy and I playing fetch in ...,"[Dog, Puppy, Chocolate Lab, Labrador, Labrador...",2014-04-25T15:23:56Z,12517,280,,7,PT43S,hd,false
1816,sEJGynuNdWw,Corey Schafer,If I'm running even 1 minute late for my dog's...,I take my dog for a long walk every morning. A...,"[Dog, Puppy, Chocolate Lab, Labrador, Funny, D...",2014-04-25T14:25:06Z,73891,1609,,72,PT40S,hd,false
1817,iT1SE6AMbq0,Corey Schafer,Lab Puppy Diving in Snow,Lab puppy diving into the snow for a snowball,"[Puppy (Animal), Dog (Organism Classification)...",2014-03-22T04:32:13Z,27261,525,,17,PT55S,hd,false
