In [1]:
from googleapiclient.discovery import build
import pandas as pd
from IPython.display import JSON

In [2]:
api_key = 'AIzaSyA7ZfMOikaJEUBgHG6f_yRutKiyiR8eKZA'

In [11]:
channel_ids = ['UCNzd8n4UapaAtrhC062RxDQ', #Nyashinski
               'UCBZPPYU_l99Mg6-GXm6xbMg', #Sauti Sol
               'UCYGgbbl6veQIN9Hn0RhVX6A', #Femi One
               'UCRRSCAz5VJQqJHAIPgFVCxw', #Khaligraph Jones
               'UCsmPEw8gG-tmNJn_UzKwcWA', #Chris Kaiga
              # More channels here
              ]

In [4]:
api_service_name = "youtube"
api_version = "v3"

# Get credentials and create an API client
youtube = build(
    api_service_name, api_version, developerKey=api_key)

In [12]:
def get_channel_stats(youtube, channel_ids):
    """
    Get channel statistics: title, subscriber count, view count, video count, upload playlist
    Params:

    youtube: the build object from googleapiclient.discovery
    channels_ids: list of channel IDs

    Returns:
    Dataframe containing the channel statistics for all channels in the provided list: title, subscriber count, view count, video count, upload playlist

    """
    all_data = []
    request = youtube.channels().list(
                part='snippet,contentDetails,statistics',
                id=','.join(channel_ids))
    response = request.execute()

    for i in range(len(response['items'])):
        data = dict(channelName = response['items'][i]['snippet']['title'],
                    subscribers = response['items'][i]['statistics']['subscriberCount'],
                    views = response['items'][i]['statistics']['viewCount'],
                    totalVideos = response['items'][i]['statistics']['videoCount'],
                    playlistId = response['items'][i]['contentDetails']['relatedPlaylists']['uploads'])
        all_data.append(data)

    return pd.DataFrame(all_data)

def get_video_ids(youtube, playlist_id):
    """
    Get list of video IDs of all videos in the given playlist
    Params:

    youtube: the build object from googleapiclient.discovery
    playlist_id: playlist ID of the channel

    Returns:
    List of video IDs of all videos in the playlist

    """

    request = youtube.playlistItems().list(
                part='contentDetails',
                playlistId = playlist_id,
                maxResults = 50)
    response = request.execute()

    video_ids = []

    for i in range(len(response['items'])):
        video_ids.append(response['items'][i]['contentDetails']['videoId'])

    next_page_token = response.get('nextPageToken')
    more_pages = True

    while more_pages:
        if next_page_token is None:
            more_pages = False
        else:
            request = youtube.playlistItems().list(
                        part='contentDetails',
                        playlistId = playlist_id,
                        maxResults = 50,
                        pageToken = next_page_token)
            response = request.execute()

            for i in range(len(response['items'])):
                video_ids.append(response['items'][i]['contentDetails']['videoId'])

            next_page_token = response.get('nextPageToken')

    return video_ids

def get_video_details(youtube, video_ids):
    """
    Get video statistics of all videos with given IDs
    Params:

    youtube: the build object from googleapiclient.discovery
    video_ids: list of video IDs

    Returns:
    Dataframe with statistics of videos, i.e.:
        'channelTitle', 'title', 'description', 'tags', 'publishedAt'
        'viewCount', 'likeCount', 'favoriteCount', 'commentCount'
        'duration', 'definition', 'caption'
    """

    all_video_info = []

    for i in range(0, len(video_ids), 50):
        request = youtube.videos().list(
            part="snippet,contentDetails,statistics",
            id=','.join(video_ids[i:i+50])
        )
        response = request.execute()

        for video in response['items']:
            stats_to_keep = {'snippet': ['channelTitle', 'title', 'description', 'tags', 'publishedAt'],
                             'statistics': ['viewCount', 'likeCount', 'favouriteCount', 'commentCount'],
                             'contentDetails': ['duration', 'definition', 'caption']
                            }
            video_info = {}
            video_info['video_id'] = video['id']

            for k in stats_to_keep.keys():
                for v in stats_to_keep[k]:
                    try:
                        video_info[v] = video[k][v]
                    except:
                        video_info[v] = None

            all_video_info.append(video_info)

    return pd.DataFrame(all_video_info)

def get_comments_in_videos(youtube, video_ids):
    """
    Get top level comments as text from all videos with given IDs (only the first 10 comments due to quote limit of Youtube API)
    Params:

    youtube: the build object from googleapiclient.discovery
    video_ids: list of video IDs

    Returns:
    Dataframe with video IDs and associated top level comment in text.

    """
    all_comments = []

    for video_id in video_ids:
        try:
            request = youtube.commentThreads().list(
                part="snippet,replies",
                videoId=video_id
            )
            response = request.execute()

            comments_in_video = [comment['snippet']['topLevelComment']['snippet']['textOriginal'] for comment in response['items'][0:10]]
            comments_in_video_info = {'video_id': video_id, 'comments': comments_in_video}

            all_comments.append(comments_in_video_info)

        except:
            # When error occurs - most likely because comments are disabled on a video
            print('Could not get comments for video ' + video_id)

    return pd.DataFrame(all_comments)

### Get channel statistics

Using the `get_channel_stats` function defined below, now we are going to obtain the channel statistics for the 9 channels in scope.

In [13]:
channel_data = get_channel_stats(youtube, channel_ids)

In [14]:
# Now I can print out the data and take a look at the channel statistics overview.
channel_data

Unnamed: 0,channelName,subscribers,views,totalVideos,playlistId
0,Femi One,282000,26402353,109,UUYGgbbl6veQIN9Hn0RhVX6A
1,Sauti Sol,1060000,307035776,256,UUBZPPYU_l99Mg6-GXm6xbMg
2,Nyashinski,607000,127742464,44,UUNzd8n4UapaAtrhC062RxDQ
3,Khaligraph Jones,780000,99210504,101,UURRSCAz5VJQqJHAIPgFVCxw
4,Chris Kaiga,92400,13752117,10,UUsmPEw8gG-tmNJn_UzKwcWA


In [15]:
# Convert count columns to numeric columns
numeric_cols = ['subscribers', 'views', 'totalVideos']
channel_data[numeric_cols] = channel_data[numeric_cols].apply(pd.to_numeric, errors='coerce')

In [16]:
# Create a dataframe with video statistics and comments from all channels

video_df = pd.DataFrame()
comments_df = pd.DataFrame()

for c in channel_data['channelName'].unique():
    print("Getting video information from channel: " + c)
    playlist_id = channel_data.loc[channel_data['channelName']== c, 'playlistId'].iloc[0]
    video_ids = get_video_ids(youtube, playlist_id)

    # get video data
    video_data = get_video_details(youtube, video_ids)
    # get comment data
    comments_data = get_comments_in_videos(youtube, video_ids)

    # append video data together and comment data toghether
    video_df = video_df.append(video_data, ignore_index=True)
    comments_df = comments_df.append(comments_data, ignore_index=True)

Getting video information from channel: Femi One




Could not get comments for video ueewOtCdlB4


  video_df = video_df.append(video_data, ignore_index=True)
  comments_df = comments_df.append(comments_data, ignore_index=True)


Getting video information from channel: Sauti Sol




Could not get comments for video U2vEpHKrFJE


  video_df = video_df.append(video_data, ignore_index=True)
  comments_df = comments_df.append(comments_data, ignore_index=True)


Getting video information from channel: Nyashinski


  video_df = video_df.append(video_data, ignore_index=True)
  comments_df = comments_df.append(comments_data, ignore_index=True)


Getting video information from channel: Khaligraph Jones


  video_df = video_df.append(video_data, ignore_index=True)
  comments_df = comments_df.append(comments_data, ignore_index=True)


Getting video information from channel: Chris Kaiga




Could not get comments for video n-DibTVzdWI


  video_df = video_df.append(video_data, ignore_index=True)
  comments_df = comments_df.append(comments_data, ignore_index=True)


In [17]:
video_df

Unnamed: 0,video_id,channelTitle,title,description,tags,publishedAt,viewCount,likeCount,favouriteCount,commentCount,duration,definition,caption
0,vs4WeMRZx5I,Femi One,FEMI ONE - UNDER THE INFLUENCE (OFFICIAL VIDEO...,A Kaka Empire & Femi One 2023/24 presentation\...,,2023-09-29T13:00:10Z,601479,19726,,1374,PT2M15S,hd,false
1,5RDaWUYZ9DY,Femi One,ONE FOR ONE FOUNDATION - MENSTRUAL SUMMIT 2023...,"Nairobi, 29th May 2023 – the renowned Kenyan a...",,2023-06-05T07:09:51Z,3422,100,,11,PT6M6S,hd,false
2,y6bZn0of5dg,Femi One,FEMI ONE - SUSPECT (OFFICIAL VIDEO) FEAT KATAP...,A Kaka Empire & Femi One 2023 presentation\nOf...,,2023-05-13T08:06:33Z,137913,5560,,601,PT3M34S,hd,false
3,phBa9Flj76A,Femi One,FEMI ONE - RAHISI (OFFICIAL ANIMATED VIDEO) FE...,#Rahisi #kakaempireisthelifestyle #FemiOne \nA...,,2023-04-28T11:00:07Z,15399,902,,68,PT3M11S,hd,false
4,xTVqa8bMWFk,Femi One,FEMI ONE - 17B (OFFICIAL AUDIO) SEND SKIZA 698...,Dem Kutoka Mwiki - The EP\n\nLYRICS\n\nUkianza...,,2023-04-16T21:00:31Z,5470,283,,39,PT1M29S,hd,false
...,...,...,...,...,...,...,...,...,...,...,...,...,...
536,e4Y-V53eeh0,Chris Kaiga,Chris Kaiga - KONKODO ft Kiloriti (Official A...,"“Knock! Knock!” = “Konkodo""\n\n\nGet Konkodo o...","[Chiskaiga, Debe, Chriskaiga, chriskaigamusic,...",2020-02-14T06:30:00Z,89641,2434,,131,PT2M28S,hd,true
537,ENVjNBYSZKI,Chris Kaiga,Chris Kaiga - HIZI BUNDAS (Official Music Video),Chris Kaiga - HIZI BUNDAS (Official Music Vide...,"[Chris Kaiga, zimenice, bundas, playke, genget...",2019-09-17T05:46:22Z,641513,10840,,650,PT3M12S,hd,true
538,YhZXwbPB2NM,Chris Kaiga,Chris Kaiga - ZIMENICE (Official Music Video) ...,Chris Kaiga - ZIMENICE (Official Music Video) ...,"[zimenice, zamenice, niko on, Chris kaiga, och...",2019-05-03T07:58:14Z,3463864,34842,,1420,PT3M5S,hd,true
539,n-DibTVzdWI,Chris Kaiga,Chris Kaiga Live Stream,,,2019-03-26T14:34:27Z,0,0,,0,P0D,sd,false


In [18]:
video_df.to_csv('YTdata.csv', index=False)