In [1]:
import requests
import numpy as np
import pandas as pd
import re
import datetime 

In [28]:
def get_playlist_id(channel_id, api_key):
    playlist_url = "https://www.googleapis.com/youtube/v3/channels?"
    playlist_part = "contentDetails"
    playlist_parameters = {"key": api_key,
                           "part": playlist_part,
                           "id": channel_id}

    playlist_query = requests.get(playlist_url, playlist_parameters)
    playlist_results = playlist_query.json()
    playlist_id = playlist_results["items"][0][playlist_part]["relatedPlaylists"]["uploads"]
    return playlist_id

    
def get_video_ids(playlist_id, api_key):
    video_url = "https://www.googleapis.com/youtube/v3/playlistItems?"
    video_part = "snippet"
    max_results = 50  # api supports up to 50
    video_parameters = {"key": api_key,
                        "part": video_part,
                        "playlistId": playlist_id,
                        "maxResults": max_results,
                        "pageToken": ""}
    video_query = requests.get(video_url, video_parameters)
    video_results = video_query.json()
    video_data = []
    for i in range(len(video_results["items"])):
        video_data.append(video_results["items"][i]["snippet"]["resourceId"]["videoId"])
    while "nextPageToken" in video_results:
        video_parameters["pageToken"] = video_results["nextPageToken"]
        video_query = requests.get(video_url, video_parameters)
        video_results = video_query.json()
        for i in range(len(video_results["items"])):
            video_data.append(video_results["items"][i]["snippet"]["resourceId"]["videoId"])            
    return video_data

    
def get_video_info(video_ids, api_key):    
    video_url = "https://www.googleapis.com/youtube/v3/videos?"
    video_part = "snippet, contentDetails, statistics"
    snippet_columns = ["title", "description","categoryId", "publishedAt"] #, "defaultAudioLanguage"
    statistics_columns = ["viewCount", "likeCount", "dislikeCount", "favoriteCount","commentCount"]
    output = []
    i = 0
    while(((i*50) + 50) < len(video_ids)):
        id_list = ", ".join(video_ids[i*50:50+(i*50)])
        video_parameters = {"key": api_key,
                            "id": id_list,
                            "part": video_part}
    
        video_query = requests.get(video_url, video_parameters)
        video_results = video_query.json()
        for j in range(50):
            row = [video_ids[(i*50)+j]]
            row.extend([video_results["items"][j]["snippet"][column] for column in snippet_columns])
            row[-1] = datetime.datetime.strptime(row[-1], "%Y-%m-%dT%H:%M:%S.%fZ").strftime('%m/%d/%Y %H:%M:%S')
            row.append(datetime.datetime.now().strftime('%m/%d/%Y %H:%M:%S'))
            row.append(get_duration_in_seconds(video_results["items"][j]["contentDetails"]["duration"]))  
            row.extend([video_results["items"][j]["statistics"][column] for column in statistics_columns])
            output.append(row)
            j = j + 1
        i = i + 1
        
    id_list = ", ".join(video_ids[i*50:len(video_ids)])
    video_parameters = {"key": api_key,
                        "id": id_list,
                        "part": video_part}

    video_query = requests.get(video_url, video_parameters)
    video_results = video_query.json()
    for j in range(len(video_ids) - i*50):
        row = [video_ids[(i*50)+j]]
        row.extend([video_results["items"][j]["snippet"][column] for column in snippet_columns])
        row[-1] = datetime.datetime.strptime(row[-1], "%Y-%m-%dT%H:%M:%S.%fZ").strftime('%m/%d/%Y %H:%M:%S')
        row.append(datetime.datetime.now().strftime('%m/%d/%Y %H:%M:%S'))
        row.append(get_duration_in_seconds(video_results["items"][j]["contentDetails"]["duration"]))  
        row.extend([video_results["items"][j]["statistics"][column] for column in statistics_columns])
        output.append(row)
        j = j + 1
    i = i + 1
    return output


def get_duration_in_seconds(duration):    
    days = re.findall(r'[0-9]+D', duration)
    hours = re.findall(r'[0-9]+H', duration)
    minutes = re.findall(r'[0-9]+M', duration)
    seconds = re.findall(r'[0-9]+S', duration)
    if len(days) == 0:
        days = "00"
    else:
        days = days[0][:-1].rjust(2, '0') 
    if len(hours) == 0:
        hours = "00"
    else:
        hours = hours[0][:-1].rjust(2, '0')
    if len(minutes) == 0:
        minutes = "00"
    else:
        minutes = minutes[0][:-1].rjust(2, '0') 
    if len(seconds) == 0:
        seconds = "00"
    else:
        seconds = seconds[0][:-1].rjust(2, '0') 
    hours = int(hours) + int(days)*24
    minutes = int(minutes) + int(hours)*60
    seconds = int(seconds) + int(minutes)*60
    return seconds

In [30]:
arxiv = "UCNIkB2IeJ-6AmZv7bQ1oBYg" #arXiv
dunkey = "UCsvn_Po0SmunchJYOWpOxMg" #dunkey

columns = ["id", "title", "description", "categoryId", "publishedAt", "lastAvailable", "duration", "viewCount", "likeCount", "dislikeCount", "favoriteCount","commentCount"]

uploads = get_playlist_id(dunkey, api_key)
video_ids = get_video_ids(uploads, api_key)
pd.DataFrame(get_video_info(video_ids, api_key), columns=columns)

Unnamed: 0,id,title,description,categoryId,publishedAt,lastAvailable,duration,viewCount,likeCount,dislikeCount,favoriteCount,commentCount
0,Fn3g25ZEbak,Red Dead Redemption 2 (dunkview),(minor spoilers for Chapter 5)\n\ndunk store h...,20,11/07/2018 20:30:00,11/08/2018 09:25:47,384,1596038,98021,2614,0,7465
1,G6KXLtn6OHU,Dunkey Halloween Special,Check out our new halloween shirt at https://d...,20,10/25/2018 20:00:00,11/08/2018 09:25:47,537,2626492,118119,1082,0,7804
2,L9y25i-csW8,Call of Duty 15 : Black Ops 4,This time everyone has built in juggernaut.\n\...,20,10/21/2018 19:30:01,11/08/2018 09:25:47,371,3455148,176832,1571,0,9357
3,eFAgO5WKnQA,Assassin's Creed Good Franchise : Odyssey,"I gotta admit though, I didn't see any trailin...",20,10/10/2018 19:30:00,11/08/2018 09:25:47,254,2808138,112089,2919,0,8048
4,-Wv6okd57t8,Hamston Checks Out,This is my first overwatch video.\n\ndunk stor...,20,10/02/2018 19:30:00,11/08/2018 09:25:47,218,2674253,131422,1315,0,8132
5,8o1ieehttdA,Kingdom Hearts Explained,Here is a quick recap to prepare you for Kingd...,20,09/24/2018 19:30:00,11/08/2018 09:25:47,242,2307224,135000,1555,0,14041
6,hKd3Ui8f0-c,Mario's Balloon World,Get it ? C-erious. Because his name is C-Rex.\...,20,09/14/2018 19:30:00,11/08/2018 09:25:47,336,2444332,124303,962,0,4074
7,v1SpVYmcTGM,Spider-Man,With big power comes big power.\n\ndunk store ...,20,09/10/2018 19:30:00,11/08/2018 09:25:47,245,4058549,167041,3007,0,8150
8,RPjE9riEhtA,Shenmue (dunkview),Should've been called Shenmoo and instead you ...,20,09/01/2018 19:30:02,11/08/2018 09:25:47,242,2108124,85134,1110,0,4331
9,MdRLVgRRce8,The Majestic World of Dolphins,Take a dive with Morgan Freeman into the fasci...,27,08/27/2018 19:30:01,11/08/2018 09:25:47,172,2127400,149167,5942,0,15577
