# Youtube Api V3 usage for channel mining

# Install 
### pip install --upgrade google-api-python-client
### pip install --upgrade google-auth google-auth-oauthlib google-auth-httplib2
### pip install --upgrade oauth2client

### > 概要
#### input: youtube channel id
#### output: each video information of the target channel (pandas DataFrame)

In [1]:
import sys, os
from pprint import pprint

from apiclient.discovery import build
from apiclient.errors import HttpError
from oauth2client.tools import argparser

import devkey
DEVELOPER_KEY = devkey.api1

YOUTUBE_API_SERVICE_NAME = "youtube"
YOUTUBE_API_VERSION = "v3"
CHANNEL_ID = 'UCMsuwHzQPFMDtHaoR7_HDxg' # channel id of japanese popular (and cute) youtuber "Yukirinu"

In [3]:
# connection build
YT = build(YOUTUBE_API_SERVICE_NAME, YOUTUBE_API_VERSION, developerKey=DEVELOPER_KEY)

In [14]:
def get_channel_vlist( yt, cid, max_num ):
    channel_videos_tmp = []
    feed = yt.search().list( channelId=cid, maxResults=1, order='date', type='video', part='id' ).execute()
    nextPageToken = feed[ 'nextPageToken' ]
    video_idx = 1
    while 1:
        # if video_idx % 10 == 1: print( video_idx  )
        if video_idx > max_num: break
        video_idx += 1
        if not 'nextPageToken' in feed: break 
        nextPageToken = feed[ 'nextPageToken' ]
        vid = feed.get('items')[0]['id']['videoId']
        channel_videos_tmp.append( vid )
        feed = yt.search().list( channelId=cid, maxResults=1, order='date', type='video', part='id', pageToken = nextPageToken ).execute()
    return channel_videos_tmp

channel_videos = get_channel_vlist( YT, CHANNEL_ID, 10 )
len( channel_videos )

10

In [8]:
def get_video_detail( vid ):
    video_detail = YT.videos().list(part="id,snippet,statistics,contentDetails,topicDetails",id=vid).execute()
    return video_detail.get("items",[])[0]

vlist_details = [ get_video_detail( i ) for i in channel_videos ]    
channel_video_info = {} # { idx : video_details(dictionary) }
for i, v_detail in enumerate(vlist_details):
    channel_video_info[i] = v_detail

In [9]:
# 参考: https://developers.google.com/youtube/v3/docs/videos?hl=ja#resource
info_tree = {
    'id': None,
    'snippet': ['title','description','liveBroadcastContent','tags','publishedAt','thumbnails'],
    'statistics': ['viewCount','likeCount','favoriteCount','dislikeCount','commentCount'],
    'contentDetails':['caption','definition','dimension','duration','projection'],
    'topicDetails':['TopicIds', 'relevantTopicIds'],
}

df_cols = []
for parent, child in info_tree.items():
    if child == None: df_cols.append( parent )
    else:
        for c in child: df_cols.append( c )

rows = []
for i, v in channel_video_info.items():
    row = []
    for parent, child in info_tree.items():
        if child == None: row.append( v[parent] )
        elif parent == 'snippet':
            for c in child:
                if c == 'thumbnails':
                    row.append( v[parent][c]['default']['url'] )
                elif c == 'tags':
                    if not 'tags' in v['snippet']: row.append( None )
                    else: row.append( v[parent][c] )
                else:
                    row.append( v[parent][c] )
        elif parent == 'topicDetails':
            if 'topicDetails' in v:
                for c in child:
                    if c in v[parent]: row.append( v[parent][c] )
                    else: row.append( None )
            else:
                row.append( None )
                row.append( None )
        else:
            for c in child: row.append( v[parent][c] )
    rows.append( row )

In [13]:
# get DataFrame
import pandas as pd
df = pd.DataFrame( rows, columns=df_cols )
print( df.shape )
df.head()

(10, 19)


Unnamed: 0,id,title,description,liveBroadcastContent,tags,publishedAt,thumbnails,viewCount,likeCount,favoriteCount,dislikeCount,commentCount,caption,definition,dimension,duration,projection,TopicIds,relevantTopicIds
0,IoPEHz9HcK4,勉強の悩みに全て答えます！ゆきりぬ先生の質問相談コーナー！！,少しでも良いと思ってもらえたら高評価おねがいします٩(๑❛ᴗ❛๑)۶ \n\n\n\nゆきり...,none,"[ゆきりぬ, YouTube, YouTuber, ゆーちゅーぶ, ゆーちゅーばー, 面白,...",2018-06-26T11:37:06.000Z,https://i.ytimg.com/vi/IoPEHz9HcK4/default.jpg,92978,2257,0,146,415,False,hd,2d,PT10M13S,rectangular,,"[/m/02jjt, /m/02jjt]"
1,BYfxhfLIKzA,ゆきりぬ毎日メイク〜芋すぎた理系女子がYouTube2年で辿り着いたメイク法〜 Everyd...,高校・大学時代に全く化粧をしなかったので、YouTubeを始めて２年間、メイクには悩みに悩ん...,none,"[ゆきりぬ, YouTube, YouTuber, ゆーちゅーぶ, ゆーちゅーばー, 面白,...",2018-06-23T13:00:58.000Z,https://i.ytimg.com/vi/BYfxhfLIKzA/default.jpg,157716,3364,0,235,719,False,hd,2d,PT17M40S,rectangular,,"[/m/019_rr, /m/019_rr]"
2,dVWPU1Mc1UU,コーラにお酒をこっそり盛られました。。。,さけりぬ半端ないって。\nYouTube出て間もないのに裏企画とか仕込んでくるもん。\nそん...,none,"[ゆきりぬ, YouTube, YouTuber, ゆーちゅーぶ, ゆーちゅーばー, 面白,...",2018-06-21T12:02:04.000Z,https://i.ytimg.com/vi/dVWPU1Mc1UU/default.jpg,289843,3381,0,811,851,False,hd,2d,PT4M16S,rectangular,,"[/m/02jjt, /m/02jjt]"
3,Oy6NmFo8paY,【ご報告】アメリカへ行くことになりました。,少しでも良いと思ってもらえたら高評価おねがいします٩(๑❛ᴗ❛๑)۶ \n\n\n\nゆきり...,none,"[ゆきりぬ, YouTube, YouTuber, ゆーちゅーぶ, ゆーちゅーばー, 面白,...",2018-06-19T11:59:39.000Z,https://i.ytimg.com/vi/Oy6NmFo8paY/default.jpg,272106,4388,0,1066,932,False,hd,2d,PT3M48S,rectangular,,"[/m/098wr, /m/02jjt, /m/02jjt]"
4,nXQwpAlDqsU,女友達に性格悪いってバラされました。,少しでも良いと思ってもらえたら高評価おねがいします٩(๑❛ᴗ❛๑)۶ \n\n\n\nゆきり...,none,"[ゆきりぬ, YouTube, YouTuber, ゆーちゅーぶ, ゆーちゅーばー, 面白,...",2018-06-17T11:27:47.000Z,https://i.ytimg.com/vi/nXQwpAlDqsU/default.jpg,255094,3636,0,549,686,False,hd,2d,PT4M48S,rectangular,,[/m/04rlf]


In [35]:
# 保存 ... 'channelid'_yyyymmdd.csv
import datetime
date = datetime.date.today().strftime("%Y%m%d")
df.to_csv( '%s_%s.csv' % ( CHANNEL_ID, date ) )

In [43]:
# 読み込み
df_new = pd.read_csv( '%s_%s.csv' % ( CHANNEL_ID, date ), index_col=0 )
df_new.head()