In [None]:
import googleapiclient.discovery
import googleapiclient.errors

from keys import YOUTUBE_API_KEY
from typing import Dict, Any

api_service_name= "youtube"
api_version = "v3"

youtube = googleapiclient.discovery.build(api_service_name,
                                          api_version,
                                          developerKey=YOUTUBE_API_KEY)

def get_playlist(playlist_id: str, is_first_request=True, next_page_token=None) -> Dict[str, Any]:
    
    max_results = "50"

    if is_first_request:
        response = youtube.playlistItems().list(
            part = "snippet",
            playlistId = playlist_id,
            maxResults = max_results
        ).execute()
    else:
         response = youtube.playlistItems().list(
            part = "snippet",
            playlistId = playlist_id,
            maxResults = max_results,
            pageToken = next_page_token
             
        ).execute()
        
    
    return response
    

In [None]:
# 4 playlists of top most popular videos on youtube of all time
# https://www.youtube.com/playlist?list=PLirAqAtl_h2r5g8xGajEwdXd3x1sZh8hC

playlist_id_list = ["PLirAqAtl_h2r5g8xGajEwdXd3x1sZh8hC", "PLirAqAtl_h2o4xCWaBsDH3BKNQs8YqLCL",
                    "PLirAqAtl_h2p57Njt3QJDtwxAPZENJrIp", "PLirAqAtl_h2rTbOXU2Oc-7WBBHmFrnyUC"]
res_item_list = []

for playlist_id in playlist_id_list:
    
    response = get_playlist(playlist_id)
    res_item_list.extend(response.get('items'))
    print(playlist_id)
    
    while ('nextPageToken' in response):
        next_page_token = response.get('nextPageToken')
        print(f"current len: {len(res_item_list)}")
        response = get_playlist(playlist_id,
                                is_first_request = False,
                                next_page_token = next_page_token)
        res_item_list.extend(response.get('items'))

In [None]:
import json

# dump raw data
with open('all_raw_data.json', 'w') as f:
    json.dump(res_item_list, f, ensure_ascii=True)
    
file = open('all_raw_data.json')
data = json.load(file)
data[0]

In [None]:
# create csv
import pandas as pd

df_items = []

for res in res_item_list:
    video_id = res['snippet']['resourceId']['videoId']
    video_url = 'https://www.youtube.com/watch?v=' + video_id
    video_title = res['snippet']['title']
    
    try:
        channel_title = res['snippet']['videoOwnerChannelTitle']
    except KeyError:
        channel_title = ''
        
    try:
        channel_id = res['snippet']['videoOwnerChannelId']
    except KeyError:
        channel_id = ''
    
    df_items.append((video_id, video_url, video_title,
                     channel_title, channel_id))
    
df = pd.DataFrame(df_items, columns = ['video_id', 'video_url', 'video_title',
                                       'channel_title', 'channel_id'])
                  

In [None]:
from collections import Counter

Counter(df['video_title'])

In [None]:
df.drop_duplicates(subset=['video_id'], inplace=True)
df = df[(df['video_title'] != 'Private video') & (df['video_title'] != 'Deleted video')]
df = df.reset_index(drop=True)
df.to_csv('most_popular_videos/most_popular_1999.csv', index=False)