# Fetching Data using Youtube Api

## Create a Project on Google Cloud

#### Cloud Credential

    1. Go to console.cloud.google.com
    2. Create a project
    3. Under APIs and services create credentials and copy the API Key
    4. Under API Library, enable Youtube Data API v3

In [1]:
api_key = 'your_api_key'

### Install Google APIs Client Library

In [2]:
# pip3 install --upgrade google-api-python-client

### Get the channel id you want to fetch statistics

In [3]:
channel_ids = ['UCX6OQ3DkcsbYNE6H8uQQuVA', #MrBeast
               'UC7cs8q-gJRlGwj4A8OmCmXg', # Alex the analyst
               # more channels here
              ]

### Import Libraries

In [4]:
from googleapiclient.discovery import build
import pandas as pd
from IPython.display import JSON

In [5]:
api_service_name = "youtube"
api_version = "v3"

# Get credentials and create an API client
youtube = build(
    api_service_name, api_version, developerKey=api_key)

### Defining Functions

In [6]:
def get_channel_stats(youtube, channel_ids):
    """
    Get channel statistics: title, subscriber count, view count, video count, upload playlist
    Params:
    
    youtube: the build object from googleapiclient.discovery
    channels_ids: list of channel IDs
    
    Returns:
    Dataframe containing the channel statistics for all channels in the provided list: title, subscriber count, view count, video count, upload playlist
    
    """
    all_data = []
    request = youtube.channels().list(
                part='snippet,contentDetails,statistics',
                id=','.join(channel_ids))
    response = request.execute() 
    
    for i in range(len(response['items'])):
        data = dict(channelName = response['items'][i]['snippet']['title'],
                    subscribers = response['items'][i]['statistics']['subscriberCount'],
                    views = response['items'][i]['statistics']['viewCount'],
                    totalVideos = response['items'][i]['statistics']['videoCount'],
                    playlistId = response['items'][i]['contentDetails']['relatedPlaylists']['uploads'])
        all_data.append(data)
    
    return pd.DataFrame(all_data)

In [7]:

def get_video_ids(youtube, playlist_id):
    """
    Get list of video IDs of all videos in the given playlist
    Params:
    
    youtube: the build object from googleapiclient.discovery
    playlist_id: playlist ID of the channel
    
    Returns:
    List of video IDs of all videos in the playlist
    
    """
    
    request = youtube.playlistItems().list(
                part='contentDetails',
                playlistId = playlist_id,
                maxResults = 50)
    response = request.execute()
    
    video_ids = []
    
    for i in range(len(response['items'])):
        video_ids.append(response['items'][i]['contentDetails']['videoId'])
        
    next_page_token = response.get('nextPageToken')
    more_pages = True
    
    while more_pages:
        if next_page_token is None:
            more_pages = False
        else:
            request = youtube.playlistItems().list(
                        part='contentDetails',
                        playlistId = playlist_id,
                        maxResults = 50,
                        pageToken = next_page_token)
            response = request.execute()
    
            for i in range(len(response['items'])):
                video_ids.append(response['items'][i]['contentDetails']['videoId'])
            
            next_page_token = response.get('nextPageToken')
        
    return video_ids

In [8]:
def get_video_details(youtube, video_ids):
    """
    Get video statistics of all videos with given IDs
    Params:
    
    youtube: the build object from googleapiclient.discovery
    video_ids: list of video IDs
    
    Returns:
    Dataframe with statistics of videos, i.e.:
        'channelTitle', 'title', 'description', 'tags', 'publishedAt'
        'viewCount', 'likeCount', 'favoriteCount', 'commentCount'
        'duration', 'definition', 'caption'
    """
        
    all_video_info = []
    
    for i in range(0, len(video_ids), 50):
        request = youtube.videos().list(
            part="snippet,contentDetails,statistics",
            id=','.join(video_ids[i:i+50])
        )
        response = request.execute() 

        for video in response['items']:
            stats_to_keep = {'snippet': ['channelTitle', 'title', 'description', 'tags', 'publishedAt'],
                             'statistics': ['viewCount', 'likeCount', 'favouriteCount', 'commentCount'],
                             'contentDetails': ['duration', 'definition', 'caption']
                            }
            video_info = {}
            video_info['video_id'] = video['id']

            for k in stats_to_keep.keys():
                for v in stats_to_keep[k]:
                    try:
                        video_info[v] = video[k][v]
                    except:
                        video_info[v] = None

            all_video_info.append(video_info)
            
    return pd.DataFrame(all_video_info)

In [25]:

def get_comments_in_videos(youtube, video_ids):
    """
    Get top level comments as text from all videos with given IDs (only the first 5 comments due to quote limit of Youtube API)
    Params:
    
    youtube: the build object from googleapiclient.discovery
    video_ids: list of video IDs
    
    Returns:
    Dataframe with video IDs and associated top level comment in text.
    
    """
    all_comments = []
    
    for video_id in video_ids:
        try:   
            request = youtube.commentThreads().list(
                part="snippet,replies",
                videoId=video_id
            )
            response = request.execute()
        
            comments_in_video = [comment['snippet']['topLevelComment']['snippet']['textOriginal'] for comment in response['items'][0:5]]
            comments_in_video_info = {'video_id': video_id, 'comments': comments_in_video}

            all_comments.append(comments_in_video_info)
            
        except: 
            # When error occurs - most likely because comments are disabled on a video
            print('Could not get comments for video ' + video_id)
        
    return pd.DataFrame(all_comments)

In [10]:
all_channel_stats = get_channel_stats(youtube, channel_ids)
all_channel_stats

Unnamed: 0,channelName,subscribers,views,totalVideos,playlistId
0,Alex The Analyst,642000,27225050,266,UU7cs8q-gJRlGwj4A8OmCmXg
1,MrBeast,217000000,38492249747,771,UUX6OQ3DkcsbYNE6H8uQQuVA


In [11]:
mrBeast_video_ids = get_video_ids(youtube, all_channel_stats.playlistId[1])
mrBeast_video_ids[0:5] # first 5 video_ids

['rWBOITBjitE', 'tnTPaLOaHz8', 'Wdjh81uH6FU', '7dYTw-jAYkY', 'AjZDwxt7S8w']

In [21]:
len(mrBeast_video_ids)

771

In [14]:
video_df = get_video_details(youtube,mrBeast_video_ids)
video_df.head(5)

Unnamed: 0,video_id,channelTitle,title,description,tags,publishedAt,viewCount,likeCount,favouriteCount,commentCount,duration,definition,caption
0,rWBOITBjitE,MrBeast,Could You Walk Up A Skyscraper?,,,2023-12-05T18:00:00Z,27492401,2167926,,8883,PT50S,hd,False
1,tnTPaLOaHz8,MrBeast,"$10,000 Every Day You Survive In A Grocery Store",I didn’t expect him to stay that long \nShop K...,,2023-12-02T17:00:00Z,106733691,3975174,,61208,PT21M32S,hd,True
2,Wdjh81uH6FU,MrBeast,"$1 vs $10,000,000 Job!",I can’t believe they actually hired me lol\nTr...,,2023-11-25T17:00:00Z,102719593,3584723,,61559,PT15M39S,hd,True
3,7dYTw-jAYkY,MrBeast,I Spent 7 Days Buried Alive,Please don't try this at home lol\nVerizon 5G ...,,2023-11-18T17:00:01Z,107671323,5318503,,106501,PT18M40S,hd,True
4,AjZDwxt7S8w,MrBeast,I Gave Away A House On Halloween,,,2023-11-10T18:00:00Z,101839809,5750589,,17313,PT48S,hd,False


In [26]:
comments_df = get_comments_in_videos(youtube,mrBeast_video_ids[0:10])
comments_df

Unnamed: 0,video_id,comments
0,rWBOITBjitE,"[🎉🎉🎉🎉🎉, VOLTA DUIO BOTTA, Оператор:😢, Woóooooo..."
1,tnTPaLOaHz8,[make sure to buy feastables at your local gro...
2,Wdjh81uH6FU,[The final job is the craziest thing I’ve ever...
3,7dYTw-jAYkY,[I’ve now been buried for 50 hours and 7 days....
4,AjZDwxt7S8w,"[You are so much better 💕, Mr B\n Eats, Te qui..."
5,aZnqqXaT02g,"[Jimmy is 👌 👍 😍, Happy Halloween 🎃, ❤❤❤❤❤, N..."
6,mwKJfNYwvm8,[Visit https://www.beastphilanthropy.org/campa...
7,l9_8_pDTmis,"[Gastaron más de lo que ganaron ..:, 🇹🇷🇹🇷🇹🇷🇹🇷🇹..."
8,n7x4Jj9pdH8,"[Jimmy you are besttttt❤❤❤❤😊😊😊😊😊, Advita🎉😊I e..."
9,QjvpjXdgugA,"[Before anyone asks, I blew them up when they ..."


In [16]:
video_df.shape

(771, 13)

In [23]:
comments_df.shape

(10, 2)

### Export the data into csv

In [27]:
video_df.to_csv("mrbeast_videos.csv")

In [28]:
comments_df.to_csv("mrbeast_top_comments.csv")