## Imports

In this section we are going to import our libraries.

In [1]:
!pip install google-api-python-client
!pip install google-auth-httplib2
!pip install google-auth-oauthlib
!pip install schedule
!pip install apscheduler



In [2]:
from googleapiclient.discovery import build
import os
import pandas as pd
import seaborn as sns
import numpy as np
import matplotlib.pyplot as plt

In [3]:
!pip install plotly==4.11.0

%matplotlib inline
from matplotlib import pyplot as plt
import seaborn as sns

import plotly.express as px
import plotly.offline as pyo
import plotly.figure_factory as ff
import plotly.graph_objects as go




## Setup


In [4]:

API_KEY = "" # apply api key here

CHANNEL_ID = "UCNU_lfiiWBdtULKOw6X0Dig"

youtube = build('youtube', 'v3', developerKey=API_KEY)

hour_measure = 0

## API Functions

Problem:
Using Youtube API (https://developers.google.com/youtube/v3/) scrapes channel videos with tags and stats.


In [5]:
# Function to get the channels stats
# It will also contain the upload playlist ID we can use to grab videos.
def get_channel_stats(youtube, channel_id):
    request = youtube.channels().list(
        part="snippet,contentDetails,statistics",
        id=channel_id
    )
    response = request.execute()
    
    return response['items']

In [6]:
# This will get us a list of videos from a playlist.
# Note a page of results has a max value of 50 so we will
# need to loop over our results with a pageToken

def get_video_list(youtube, upload_id):
    video_list = []
    request = youtube.playlistItems().list(
        part="snippet,contentDetails",
        playlistId=upload_id,
        maxResults=50
    )
    next_page = True
    while next_page:
        response = request.execute()
        data = response['items']

        for video in data:
            video_id = video['contentDetails']['videoId']
            if video_id not in video_list:
                video_list.append(video_id)

        # Do we have more pages?
        if 'nextPageToken' in response.keys():
            next_page = True
            request = youtube.playlistItems().list(
                part="snippet,contentDetails",
                playlistId=upload_id,
                pageToken=response['nextPageToken'],
                maxResults=50
            )
        else:
            next_page = False

    return video_list

In [7]:
# Once we have our video list we can pass it to this function to get details.
# Again we have a max of 50 at a time so we will use a for loop to break up our list. 
# check key exists 

def get_video_details(youtube, video_list):
    stats_list=[]

    # Can only get 50 videos at a time.
    for i in range(0, len(video_list), 50):
        request= youtube.videos().list(
            part="snippet,contentDetails,statistics",
            id=video_list[i:i+50]
        )

        data = request.execute()
        for video in data['items']:
            id = video['id']
            title=video['snippet']['title'] if 'title' in video['snippet'] else None
            published=video['snippet']['publishedAt'] if 'publishedAt' in video['snippet'] else None
            description=video['snippet']['description'] if 'description' in video['snippet'] else None
            tag_count= len(video['snippet']['tags'] if 'tags' in video['snippet'] else [])
            tags = video['snippet']['tags'] if 'tags' in video['snippet'] else None
            statistics = video['statistics'] if 'statistics' in video else None
            view_count=video['statistics'].get('viewCount',0)
            like_count=video['statistics'].get('likeCount',0)
            dislike_count=video['statistics'].get('dislikeCount',0)
            comment_count=video['statistics'].get('commentCount',0)
            stats_dict=dict(id = id, title=title, description=description, published=published, tag_count=tag_count, tags = tags, statistics = statistics, view_count=view_count, like_count=like_count, dislike_count=dislike_count, comment_count=comment_count)
            stats_list.append(stats_dict)

    return stats_list

### Create our channel stats

In [8]:
channel_stats = get_channel_stats(youtube, CHANNEL_ID)

### Find our upload playlist ( will contain all video uploads )

In [9]:
upload_id = channel_stats[0]['contentDetails']['relatedPlaylists']['uploads']

### Get our Video List

In [10]:
video_list = get_video_list(youtube, upload_id)

In [11]:
len(video_list)

1044

In [12]:
# splitted_video_list = video_list[0:10]

### Get our Video Details

Finally we will get all of our videos details returned in a dictionary.

In [13]:
video_data = get_video_details(youtube, video_list)

## Creating Pandas DF
In this section we will covert our data to pandas dataframe and start visualizing. I have also created a CSV of data in the cell below if you don't want to do the API calls yourself.

In [14]:
df_tags_stats = pd.DataFrame(video_data)
df_tags_stats['title_length'] = df_tags_stats['title'].str.len()
df_tags_stats["view_count"] = pd.to_numeric(df_tags_stats["view_count"])
df_tags_stats["like_count"] = pd.to_numeric(df_tags_stats["like_count"])
df_tags_stats["dislike_count"] = pd.to_numeric(df_tags_stats["dislike_count"])
df_tags_stats["comment_count"] = pd.to_numeric(df_tags_stats["comment_count"])
# reaction used later add up likes + dislikes + comments
df_tags_stats["reactions"] = df_tags_stats["like_count"] + df_tags_stats["dislike_count"] + df_tags_stats["comment_count"]
df_tags_stats.head()

Unnamed: 0,id,title,description,published,tag_count,tags,statistics,view_count,like_count,dislike_count,comment_count,title_length,reactions
0,lxSBH6p0v1U,Tutorial 2-Linear Algebra For ML-How To Add 2 ...,Linear Algebra playlist: https://www.youtube.c...,2021-06-02T11:33:09Z,0,,"{'viewCount': '234', 'likeCount': '21', 'disli...",234,21,3,2,53,26
1,cB3ycWAaznA,Linear Algebra-What is Scalar and Vectors And ...,Subscribe my vlogging channel\nhttps://www.you...,2021-06-01T12:07:20Z,0,,"{'viewCount': '1605', 'likeCount': '95', 'disl...",1605,95,3,18,100,116
2,bTMVj6DPdnY,Success Story Of Ameya- Transition To Devops I...,Subscribe my vlogging channel\nhttps://www.you...,2021-06-01T04:30:12Z,0,,"{'viewCount': '1740', 'likeCount': '55', 'disl...",1740,55,1,11,78,67
3,Pp90jv0ylAM,Explaining An End To End Machine Learning Proj...,Subscribe my vlogging channel\nhttps://www.you...,2021-05-31T14:15:13Z,0,,"{'viewCount': '12761', 'likeCount': '600', 'di...",12761,600,7,25,66,632
4,G9bJcNYv3fQ,Explaining An End To End Data Analyst/Business...,Subscribe my vlogging channel\nhttps://www.you...,2021-05-30T13:30:13Z,0,,"{'viewCount': '11182', 'likeCount': '498', 'di...",11182,498,10,20,65,528


In [15]:
df_tags_stats_1 = df_tags_stats

In [16]:
df_tags_stats_1.head(3)

Unnamed: 0,id,title,description,published,tag_count,tags,statistics,view_count,like_count,dislike_count,comment_count,title_length,reactions
0,lxSBH6p0v1U,Tutorial 2-Linear Algebra For ML-How To Add 2 ...,Linear Algebra playlist: https://www.youtube.c...,2021-06-02T11:33:09Z,0,,"{'viewCount': '234', 'likeCount': '21', 'disli...",234,21,3,2,53,26
1,cB3ycWAaznA,Linear Algebra-What is Scalar and Vectors And ...,Subscribe my vlogging channel\nhttps://www.you...,2021-06-01T12:07:20Z,0,,"{'viewCount': '1605', 'likeCount': '95', 'disl...",1605,95,3,18,100,116
2,bTMVj6DPdnY,Success Story Of Ameya- Transition To Devops I...,Subscribe my vlogging channel\nhttps://www.you...,2021-06-01T04:30:12Z,0,,"{'viewCount': '1740', 'likeCount': '55', 'disl...",1740,55,1,11,78,67


# Checking video stat after some interval
Problem:

Need to track changes of video stats every N minutes to see how videos are performing. Please pick the interval to scan stats which, according to you, is efficient and smart. You can hardcode channel ID in code, that’s not important. 

###interval = 30 min

In [17]:
def video_updating_stats(youtube, video_list):

    stats_list=[]

    # Can only get 50 videos at a time.
    for i in range(0, len(video_list), 50):
        request= youtube.videos().list(
            part="snippet,contentDetails,statistics",
            id=video_list[i:i+50]
        )

        data = request.execute()
        for video in data['items']:
            id = video['id']
            title=video['snippet']['title'] if 'title' in video['snippet'] else None
            published=video['snippet']['publishedAt'] if 'publishedAt' in video['snippet'] else None
            description=video['snippet']['description'] if 'description' in video['snippet'] else None
            tag_count= len(video['snippet']['tags'] if 'tags' in video['snippet'] else [])
            tags = video['snippet']['tags'] if 'tags' in video['snippet'] else None
            statistics = video['statistics'] if 'statistics' in video else None
            view_count=video['statistics'].get('viewCount',0)
            like_count=video['statistics'].get('likeCount',0)
            dislike_count=video['statistics'].get('dislikeCount',0)
            comment_count=video['statistics'].get('commentCount',0)
            stats_dict=dict(id = id, title=title, description=description, published=published, tag_count=tag_count, tags = tags, statistics = statistics, view_count=view_count, like_count=like_count, dislike_count=dislike_count, comment_count=comment_count)
            stats_list.append(stats_dict)

    return stats_list

In [18]:
def dict_to_series(video_list):

    stats_list = video_updating_stats(youtube, video_list)

    global df_tags_stats_1

    df_interval = pd.DataFrame(stats_list)
    df_interval["view_count"] = pd.to_numeric(df_interval["view_count"])
    df_interval["like_count"] = pd.to_numeric(df_interval["like_count"])
    df_interval["dislike_count"] = pd.to_numeric(df_interval["dislike_count"])
    df_interval["comment_count"] = pd.to_numeric(df_interval["comment_count"])
    # reaction used later add up likes + dislikes + comments
    df_interval["reactions"] = df_interval["like_count"] + df_interval["dislike_count"] + df_interval["comment_count"]
    df_tags_stats_1['view_count'] = np.where(df_tags_stats_1['id'] == df_interval['id'], df_interval["view_count"], df_tags_stats_1['view_count'])
    df_tags_stats_1['like_count'] = np.where(df_tags_stats_1['id'] == df_interval['id'], df_interval["like_count"], df_tags_stats_1['like_count'])
    df_tags_stats_1['dislike_count'] = np.where(df_tags_stats_1['id'] == df_interval['id'], df_interval["dislike_count"], df_tags_stats_1['dislike_count'])
    df_tags_stats_1['comment_count'] = np.where(df_tags_stats_1['id'] == df_interval['id'], df_interval["comment_count"], df_tags_stats_1['comment_count'])
    
    global hour_measure
    if hour_measure < 61:
        df_tags_stats_1['one_hour_views'] = df_tags_stats_1['view_count']
    
    hour_measure += 30;

    
    all_videos_performance_percentage = df_tags_stats_1['one_hour_views'].median()
    df_tags_stats_1['performance_percentage'] = np.where(df_tags_stats_1['id'] == df_interval['id'], df_tags_stats_1["one_hour_views"] / all_videos_performance_percentage, 0)
   
    conditions = [(df_tags_stats_1['performance_percentage'] < 0.3),
                  (df_tags_stats_1['performance_percentage'] >= 0.3) & (df_tags_stats_1['performance_percentage'] < 0.5), 
                  (df_tags_stats_1['performance_percentage'] >= 0.5) & (df_tags_stats_1['performance_percentage'] < 0.7), 
                  (df_tags_stats_1['performance_percentage'] >= 0.7) & (df_tags_stats_1['performance_percentage'] < 0.9), 
                  (df_tags_stats_1['performance_percentage'] >= 0.9)]

    choices = ['Bad','Below Average','Average','Good', 'Very Good']
    df_tags_stats_1['performance'] = np.select(conditions, choices, default=np.nan)

    

    return df_interval

#Background Task Scheduler


In [19]:
import time
import atexit

from apscheduler.schedulers.background import BackgroundScheduler

list_blank = []
df_new = pd.DataFrame(list_blank)
 
def job():
    global df_new
    df_new = dict_to_series(video_list)
    df_new['time'] = time.time()
    df1 = df_new.groupby(['id'])
    # df1.plot()
    # df_new.set_index("time", inplace = True)

 
 
scheduler = BackgroundScheduler()
scheduler.add_job(func=job, trigger="interval", minutes=30)
scheduler.start()

## Shutdown the scheduler when you want / as you wish


In [20]:
scheduler.shutdown()

In [21]:
df_new.head(5)

Unnamed: 0,id,title,description,published,tag_count,tags,statistics,view_count,like_count,dislike_count,comment_count,reactions,time
0,lxSBH6p0v1U,Tutorial 2-Linear Algebra For ML-How To Add 2 ...,Linear Algebra playlist: https://www.youtube.c...,2021-06-02T11:33:09Z,0,,"{'viewCount': '264', 'likeCount': '25', 'disli...",264,25,3,4,32,1622641000.0
1,cB3ycWAaznA,Linear Algebra-What is Scalar and Vectors And ...,Subscribe my vlogging channel\nhttps://www.you...,2021-06-01T12:07:20Z,0,,"{'viewCount': '1632', 'likeCount': '96', 'disl...",1632,96,3,18,117,1622641000.0
2,bTMVj6DPdnY,Success Story Of Ameya- Transition To Devops I...,Subscribe my vlogging channel\nhttps://www.you...,2021-06-01T04:30:12Z,0,,"{'viewCount': '1748', 'likeCount': '56', 'disl...",1748,56,1,11,68,1622641000.0
3,Pp90jv0ylAM,Explaining An End To End Machine Learning Proj...,Subscribe my vlogging channel\nhttps://www.you...,2021-05-31T14:15:13Z,0,,"{'viewCount': '12914', 'likeCount': '607', 'di...",12914,607,7,25,639,1622641000.0
4,G9bJcNYv3fQ,Explaining An End To End Data Analyst/Business...,Subscribe my vlogging channel\nhttps://www.you...,2021-05-30T13:30:13Z,0,,"{'viewCount': '11220', 'likeCount': '501', 'di...",11220,501,10,20,531,1622641000.0


In [22]:
df_new.shape

(1044, 13)

In [23]:
df_tags_stats_1.head(5)

Unnamed: 0,id,title,description,published,tag_count,tags,statistics,view_count,like_count,dislike_count,comment_count,title_length,reactions,one_hour_views,performance_percentage,performance
0,lxSBH6p0v1U,Tutorial 2-Linear Algebra For ML-How To Add 2 ...,Linear Algebra playlist: https://www.youtube.c...,2021-06-02T11:33:09Z,0,,"{'viewCount': '234', 'likeCount': '21', 'disli...",264,25,3,4,53,26,264,0.023572,Bad
1,cB3ycWAaznA,Linear Algebra-What is Scalar and Vectors And ...,Subscribe my vlogging channel\nhttps://www.you...,2021-06-01T12:07:20Z,0,,"{'viewCount': '1605', 'likeCount': '95', 'disl...",1632,96,3,18,100,116,1632,0.145721,Bad
2,bTMVj6DPdnY,Success Story Of Ameya- Transition To Devops I...,Subscribe my vlogging channel\nhttps://www.you...,2021-06-01T04:30:12Z,0,,"{'viewCount': '1740', 'likeCount': '55', 'disl...",1748,56,1,11,78,67,1748,0.156078,Bad
3,Pp90jv0ylAM,Explaining An End To End Machine Learning Proj...,Subscribe my vlogging channel\nhttps://www.you...,2021-05-31T14:15:13Z,0,,"{'viewCount': '12761', 'likeCount': '600', 'di...",12914,607,7,25,66,632,12914,1.153087,Very Good
4,G9bJcNYv3fQ,Explaining An End To End Data Analyst/Business...,Subscribe my vlogging channel\nhttps://www.you...,2021-05-30T13:30:13Z,0,,"{'viewCount': '11182', 'likeCount': '498', 'di...",11220,501,10,20,65,528,11220,1.00183,Very Good


In [24]:
df_tags_stats_1.to_csv("youtubeData.csv")

In [33]:
df_tags_stats_1.shape

(1044, 16)

In [25]:
df1 = df_new[['id','title', 'view_count','like_count','dislike_count','comment_count', 'reactions', 'time']]
df1 = df1.groupby(['id'])
df1.head().style

Unnamed: 0,id,title,view_count,like_count,dislike_count,comment_count,reactions,time
0,lxSBH6p0v1U,Tutorial 2-Linear Algebra For ML-How To Add 2 Vectors,264,25,3,4,32,1622640780.118958
1,cB3ycWAaznA,Linear Algebra-What is Scalar and Vectors And Its Practical Applications In Machine Learning? ⭐⭐⭐⭐⭐⭐,1632,96,3,18,117,1622640780.118958
2,bTMVj6DPdnY,Success Story Of Ameya- Transition To Devops In Vodafone At GermanyFt:iNeuron,1748,56,1,11,68,1622640780.118958
3,Pp90jv0ylAM,Explaining An End To End Machine Learning Project To A Interviewer,12914,607,7,25,639,1622640780.118958
4,G9bJcNYv3fQ,Explaining An End To End Data Analyst/Business Analytics Projects,11220,501,10,20,531,1622640780.118958
5,sMpi7nfrwz8,Success Story Of Dhruv- Transition As Data Scientist In McKinsey & Company,9748,283,2,19,304,1622640780.118958
6,eXZEFrKGaxA,Permutation And Combination Easily Explained,2549,155,4,17,176,1622640780.118958
7,ipNLzDmfMhI,Success Story Of Rahul- Transition From Support To Data Analyst|ft: iNeuron,6663,174,4,19,197,1622640780.118958
8,3XiJrn_8F9Q,Custom Training Question Answer Model Using Transformer BERT,2178,93,0,12,105,1622640780.118958
9,T2jfbqZe98Q,Success Story Of Purvansh- How He Got Into GSOC In The Field Of Machine Learning| Ft:iNeuron,3133,132,0,6,138,1622640780.118958


#Maximum number of Channel_list

Problem:

Bonus points for:

i) pseudo algorithm for fetching as many youtube channels as possible. 

In [26]:
# Function to get the maximum number of channels
# It will also contain the upload playlist ID we can use to grab videos.
def get_channel_lists(youtube):
    channel_list = []
    request = youtube.search().list(
        part="snippet",
        channelType="any",
        maxResults=50,
        order="date",
        publishedAfter="2021-05-26T05:40:00+00:00",
        publishedBefore="2021-05-27T05:40:00+00:00",
        type="channel"
    )

    next_page = True

    while next_page:
        response = request.execute()
        channels = response['items']

        for channel in channels:
            channel_id = channel['id']['channelId']
            if channel_id not in channel_list:
                channel_list.append(channel_id)

        # Do we have more pages?
        if 'nextPageToken' in response.keys():
            next_page = True
            request = youtube.search().list(
                part="snippet",
                channelType="any",
                maxResults=50,
                order="date",
                pageToken=response['nextPageToken'],
                publishedAfter="2021-05-26T05:40:00+00:00",
                publishedBefore="2021-05-27T05:40:00+00:00",
                type="channel"
            )
        else:
            next_page = False
    
    return channel_list

In [27]:
channel_list = get_channel_lists(youtube)
print(len(channel_list))

589


#Filter videos by Tag

In [28]:
def video_details_by_tag(youtube, tag):
    video_details_list = []
    request = youtube.search().list(
        part="snippet",
        maxResults=50,
        order="date",
        publishedAfter="2021-05-26T05:40:00+00:00",
        publishedBefore="2021-05-27T05:40:00+00:00",
        q=tag,
        type="video",
        videoType="any",
        prettyPrint=True,
    )

    next_page = True

    while next_page:
        response = request.execute()
        video_data = response['items']

        for video in video_data:
            video_data_dict = {
                "id" : video['id']['videoId'],
                "url" : f"https://www.youtube.com/watch?v={ video['id']['videoId'] }",
                "thumbnails" : video['snippet']['thumbnails']['high']['url'],
                "title" : video['snippet']['title'],
            }

            if video_data_dict not in video_details_list:
                video_details_list.append(video_data_dict)

        # Do we have more pages?
        if 'nextPageToken' in response.keys():
            next_page = True
            request = youtube.search().list(
                part="snippet",
                maxResults=50,
                order="date",
                publishedAfter="2021-05-26T05:40:00+00:00",
                publishedBefore="2021-05-27T05:40:00+00:00",
                q=tag,
                pageToken=response['nextPageToken'],
                type="video",
                videoType="any",
                prettyPrint=True,
            )
        else:
            next_page = False
    
    return video_details_list


In [29]:
video_details_list = video_details_by_tag(youtube, "django")
print(len(video_details_list))

79


In [30]:
video_details_list

[{'id': 'RuxrnDpahxw',
  'thumbnails': 'https://i.ytimg.com/vi/RuxrnDpahxw/hqdefault.jpg',
  'title': 'How To Download Free E-commerce Templates | E-commerce Website Using Django | Django | Hindi 🤨',
  'url': 'https://www.youtube.com/watch?v=RuxrnDpahxw'},
 {'id': 'YvzUN6eMEKg',
  'thumbnails': 'https://i.ytimg.com/vi/YvzUN6eMEKg/hqdefault.jpg',
  'title': '23.기초부터 제작하는 파이썬 장고(Python Django) 프로젝트 -   Django + 디자인 프레임워크 적용 화면 꾸미기 2',
  'url': 'https://www.youtube.com/watch?v=YvzUN6eMEKg'},
 {'id': 'Q-CLW_mzk44',
  'thumbnails': 'https://i.ytimg.com/vi/Q-CLW_mzk44/hqdefault.jpg',
  'title': '🔴 Créer un blog complet avec django de A Z  #05 Bien comprendre la structure d&#39;un projet django',
  'url': 'https://www.youtube.com/watch?v=Q-CLW_mzk44'},
 {'id': 'Qk9zcpYKLVg',
  'thumbnails': 'https://i.ytimg.com/vi/Qk9zcpYKLVg/hqdefault.jpg',
  'title': 'créer votre Netflix avec django et bootstrap',
  'url': 'https://www.youtube.com/watch?v=Qk9zcpYKLVg'},
 {'id': 'zVNdh8ZBmPc',
  'thumbnails'