YouTube Channel Analysis (w/ YouTube API)

In [1]:
# Imports
from googleapiclient.discovery import build
import pandas as pd
from IPython.display import JSON


# API_Key
api_key = 'SOME-SPECTACULAR-SECRET-SHOULD-SITUATE'


# List of Channel IDs
CHANNEL_IDS = [
                "UCX6OQ3DkcsbYNE6H8uQQuVA",     # Mr Beast
                "UC-lHJZR3Gqxm24_Vd_AJ5Yw",     # PewDiePie
                "UCINb0wqPz-A0dV9nARjJlOQ",     # The Dodo
                "UCshoKvlZGZ20rVgazZp5vnQ",     # CaptainSparklez
                "UCY1kMZp36IQSyNx_9h4mpCg",     # Mark Rober
                "UC6nSFpj9HTCZ5t-N3Rm3-HA",     # Vsauce
                "UCiDJtJKMICpb9B1qf7qjEOA",     # Adam Savage's Tested
              ]

In [2]:
api_service_name = "youtube"
api_version = "v3"
youtube = build(api_service_name, api_version, developerKey=api_key)


def get_channel_stats(youtube, CHANNEL_IDS):
  all_data = []

  request = youtube.channels().list(
    part ='snippet,contentDetails,statistics',
    id=','.join(CHANNEL_IDS)
  )
  response = request.execute()

  JSON(response)

  # Looping through items / refer to properties in YouTube API doc
  # https://developers.google.com/youtube/v3/docs/channels
  for item in response['items']:
    data = {
      'channelName': item['snippet']['title'],
      'subscribers': item['statistics']['subscriberCount'],
      'views': item['statistics']['viewCount'],
      'totalVideos': item['statistics']['videoCount'],            
      'playlistId': item['contentDetails']['relatedPlaylists']['uploads'],
    }

    all_data.append(data)



  return(pd.DataFrame(all_data))

# # Get credentials and create an API client
# youtube = build(api_service_name, api_version, developerKey=api_key)

# request = youtube.channels().list(
#   part ='snippet,contentDetails,statistics',
#   id=','.join(CHANNEL_IDS)
# )
# response = request.execute()

# #print(response)
# JSON(response)

In [3]:
channel_stats = get_channel_stats(youtube, CHANNEL_IDS)
channel_stats

Unnamed: 0,channelName,subscribers,views,totalVideos,playlistId
0,PewDiePie,111000000,29073005069,4716,UU-lHJZR3Gqxm24_Vd_AJ5Yw
1,Adam Savage’s Tested,6310000,1390346537,6356,UUiDJtJKMICpb9B1qf7qjEOA
2,Mark Rober,24700000,3666668465,120,UUY1kMZp36IQSyNx_9h4mpCg
3,Vsauce,19400000,2949921307,464,UU6nSFpj9HTCZ5t-N3Rm3-HA
4,The Dodo,14300000,10067994671,7968,UUINb0wqPz-A0dV9nARjJlOQ
5,CaptainSparklez,11400000,4035592020,5730,UUshoKvlZGZ20rVgazZp5vnQ
6,MrBeast,172000000,29327357187,741,UUX6OQ3DkcsbYNE6H8uQQuVA


In [4]:
playlist_id = "PLoSWVnSA9vG9qV0CVCpg5WwEy3LiP7udY"      # mrbeast "new uploads" playlist

def get_video_ids(youtube, playlist_id):
  video_ids = []
  
  request = youtube.playlistItems().list(
  part='snippet,contentDetails',
  playlistId=playlist_id,
  maxResults = 50       # default is 5 video ids
  )
  response = request.execute()

  for item in response['items']:
    video_ids.append(item['contentDetails']['videoId'])

  
  next_page_token = response.get('nextPageToken')
  while next_page_token is not None:

    request = youtube.playlistItems().list(
      part='snippet,contentDetails',
      playlistId=playlist_id,
      maxResults = 50,       # default is 5 video ids
      pageToken = next_page_token
    )
    response = request.execute()

    for item in response['items']:
      video_ids.append(item['contentDetails']['videoId'])

    next_page_token = response.get('nextPageToken')

  return video_ids


# # Display details about playlist item
# request = youtube.playlistItems().list(
#   part="snippet,contentDetails",
#   playlistId="PLoSWVnSA9vG9qV0CVCpg5WwEy3LiP7udY"
# )
# response = request.execute()

# JSON(response)

In [5]:
video_ids = get_video_ids(youtube, playlist_id)
len(video_ids)

220

In [6]:

# Fetches data from get_video_ids
def get_video_details(youtube, video_ids):
  all_video_info = []

  for i in range(0, len(video_ids), 50):
    request = youtube.videos().list(
      part='snippet,contentDetails,statistics',
      id=','.join(video_ids[i:i+50])
    )
    response = request.execute()

    for video in response['items']:
      ideal_stats = {'snippet': ['channelTitle', 'title', 'description', 'tags', 'publishedAt'],
                    'contentDetails': ['duration', 'definition', 'caption'],
                    'statistics': ['viewCount', 'likeCount', 'favoriteCount', 'commentCount']
                    }
      
      video_info = {}
      video_info['video_id'] = video['id']

    for k in ideal_stats.keys():
      for v in ideal_stats[k]:
        try:
          video_info[v] = video[k][v]
        except:
          video_info[v] = None        # not all videos will have info added

    all_video_info.append(video_info)

  #JSON(response)
  return pd.DataFrame(all_video_info)

In [7]:
video_df = get_video_details(youtube, video_ids)
video_df

Unnamed: 0,video_id,channelTitle,title,description,tags,publishedAt,duration,definition,caption,viewCount,likeCount,favoriteCount,commentCount
0,dg2Ag3e8W-Q,MrBeast,I Opened A Restaurant That Pays You To Eat At It,Buy Feastables!! https://feastables.com\n\nSUB...,,2020-12-19T22:00:03Z,PT9M32S,hd,True,161143929,4821755,0,746030
1,D9lVNzyhYnc,MrBeast,Surviving 24 Hours Straight In The Bermuda Tri...,THIS IS ONE OF THE CRAZIEST THINGS I'VE EVER D...,,2019-11-09T21:00:55Z,PT11M41S,hd,True,198787916,3818646,0,99007
2,dBxOYE2j55U,MrBeast,Make This Video The Most Liked Video On Youtube,"WHAT IS UP LOGANG, MAKE SURE YOU LIKE THE VIDE...",,2019-01-16T22:05:01Z,PT13M11S,hd,True,137706463,27856900,0,300427
3,wbzD04leeLI,MrBeast,Do Water Repellent Shoes Actually Work?,I CAN'T BELIEVE I WALKED ON WATER!\n\nNew Merc...,"[water repellent, water, toy, 24 hours, water ...",2018-06-22T22:11:00Z,PT11M39S,hd,True,120540578,1870315,0,65855
4,VwWV4JelEzY,MrBeast,Walking Into Random Stores With 100 Dinosaurs,I CAN'T BELIEVE WE DID THIS\n\nSUBSCRIBE OR I ...,"[100 dinosaurs, satire]",2018-03-22T22:43:02Z,PT10M12S,hd,False,32446630,892699,0,45699


## Data Pre-processing

In [8]:
# Null Values
video_df.isnull().any()

video_id         False
channelTitle     False
title            False
description      False
tags              True
publishedAt      False
duration         False
definition       False
caption          False
viewCount        False
likeCount        False
favoriteCount    False
commentCount     False
dtype: bool

In [9]:
# Data Types
video_df.dtypes

video_id         object
channelTitle     object
title            object
description      object
tags             object
publishedAt      object
duration         object
definition       object
caption          object
viewCount        object
likeCount        object
favoriteCount    object
commentCount     object
dtype: object

In [10]:
# Converting (parameter part: statistics) to numerical data type
numeric_cols = ['viewCount', 'likeCount', 'favoriteCount', 'commentCount']
video_df[numeric_cols] = video_df[numeric_cols].apply(pd.to_numeric, errors='coerce', axis=1)

In [None]:
# Not working / Needs fixing

import argparse
parser = argparse.ArgumentParser()
args = parser.parse_args()

# Converting "publishedAt" to "publishDayName" -- Identify publish day in the week
video_df['publishedAt'] = video_df['publishedAt'].apply(lambda x : parser.parse(x))
video_df['publishDayName'] = video_df['publishedAt'].apply(lambda x: x.strftime("%A"))

In [12]:
import isodate
video_df['durationSecs'] = video_df['duration'].apply(lambda x : isodate.parse_duration(x))
video_df['durationSecs'] = video_df['durationSecs'].astype('timedelta64[s]')

video_df[['durationSecs', 'duration']]

Unnamed: 0,durationSecs,duration
0,0 days 00:09:32,PT9M32S
1,0 days 00:11:41,PT11M41S
2,0 days 00:13:11,PT13M11S
3,0 days 00:11:39,PT11M39S
4,0 days 00:10:12,PT10M12S


In [13]:
# Create "TagCount" column
video_df['tagCount'] = video_df['tags'].apply(lambda x : 0 if x is None else len(x))


In [14]:
video_df

Unnamed: 0,video_id,channelTitle,title,description,tags,publishedAt,duration,definition,caption,viewCount,likeCount,favoriteCount,commentCount,durationSecs,tagCount
0,dg2Ag3e8W-Q,MrBeast,I Opened A Restaurant That Pays You To Eat At It,Buy Feastables!! https://feastables.com\n\nSUB...,,2020-12-19T22:00:03Z,PT9M32S,hd,True,161143929,4821755,0,746030,0 days 00:09:32,0
1,D9lVNzyhYnc,MrBeast,Surviving 24 Hours Straight In The Bermuda Tri...,THIS IS ONE OF THE CRAZIEST THINGS I'VE EVER D...,,2019-11-09T21:00:55Z,PT11M41S,hd,True,198787916,3818646,0,99007,0 days 00:11:41,0
2,dBxOYE2j55U,MrBeast,Make This Video The Most Liked Video On Youtube,"WHAT IS UP LOGANG, MAKE SURE YOU LIKE THE VIDE...",,2019-01-16T22:05:01Z,PT13M11S,hd,True,137706463,27856900,0,300427,0 days 00:13:11,0
3,wbzD04leeLI,MrBeast,Do Water Repellent Shoes Actually Work?,I CAN'T BELIEVE I WALKED ON WATER!\n\nNew Merc...,"[water repellent, water, toy, 24 hours, water ...",2018-06-22T22:11:00Z,PT11M39S,hd,True,120540578,1870315,0,65855,0 days 00:11:39,8
4,VwWV4JelEzY,MrBeast,Walking Into Random Stores With 100 Dinosaurs,I CAN'T BELIEVE WE DID THIS\n\nSUBSCRIBE OR I ...,"[100 dinosaurs, satire]",2018-03-22T22:43:02Z,PT10M12S,hd,False,32446630,892699,0,45699,0 days 00:10:12,2
