In [20]:
import httplib2
import os
import sys
import csv

from apiclient.discovery import build_from_document
from apiclient.errors import HttpError
from oauth2client.client import flow_from_clientsecrets
from oauth2client.file import Storage
from oauth2client.tools import argparser, run_flow

from tqdm import tqdm_notebook as tqdm

In [21]:
CLIENT_SECRETS_FILE = "client_secrets.json"

YOUTUBE_READ_WRITE_SSL_SCOPE = "https://www.googleapis.com/auth/youtube.force-ssl"
YOUTUBE_API_SERVICE_NAME = "youtube"
YOUTUBE_API_VERSION = "v3"
MISSING_CLIENT_SECRETS_MESSAGE = "JUST 42"

In [22]:
def get_statistics_views(youtube,video_id):
    
    response = youtube.videos().list(part='statistics, snippet',id=video_id).execute()
    
    info = {'channelId': response['items'][0]['snippet']['channelId'], 
            'title': response['items'][0]['snippet']['title'], 
            'description': response['items'][0]['snippet']['description'],            
            'tags': response['items'][0]['snippet']['tags'],
            'categoryId': response['items'][0]['snippet']['categoryId'],
            'defaultLanguage': response['items'][0]['snippet']['defaultLanguage'],
            
            'viewCount': response['items'][0]['statistics']['viewCount'],
            'likeCount': response['items'][0]['statistics']['likeCount'],
            'dislikeCount': response['items'][0]['statistics']['dislikeCount'],
            'favoriteCount': response['items'][0]['statistics']['favoriteCount'],
            'commentCount': response['items'][0]['statistics']['commentCount']}     
    return info

In [23]:
def get_comments_with_replies(youtube, video_id, token="", thread_id=1):
    
    results = youtube.commentThreads().list(
        part="snippet",
        pageToken=token,
        videoId=video_id,
        textFormat="plainText",
        maxResults=100
    ).execute()
    
    comments = []
    
    for item in results["items"]:
        
        comment = item["snippet"]["topLevelComment"]
        text = comment["snippet"]["textDisplay"]
        
        like_count = comment["snippet"]["likeCount"]
        reply_count = item["snippet"]["totalReplyCount"]
        
        entry = {'text': text, 'parent': True, 'cid': thread_id, 
                 'like_count': like_count, 'reply_count': reply_count}
        
        comments.append(entry)
        
        if reply_count:
            parentId = item["snippet"]["topLevelComment"]["id"]
            replies = get_replies(parentId, thread_id, children=[], token='')
            comments = comments + replies
        
        thread_id += 1      
    
    token = results["nextPageToken"] if "nextPageToken" in results else 0

    return comments, thread_id, token

In [24]:
def get_replies(parentId, thread_id, children=[], token=''):
        
    results = youtube.comments().list(
        part="snippet",
        pageToken=token,
        parentId=parentId,
        maxResults=100
    ).execute()
        
    for item in results["items"]:
        
        text = item["snippet"]["textDisplay"]      
        like_count = item['snippet']['likeCount']
        
        entry = {'text': text, 'parent': False, 'cid': thread_id, 
                 'like_count': like_count, 'reply_count': 0}
        
        children.append(entry)
    
    if "nextPageToken" in results:
        return get_replies(parentId, thread_id, children, results["nextPageToken"])
    else:
        return children

In [25]:
def comments_to_csv(youtube, video_id, filename, total):
       
    try:
        pbar = tqdm(total=total)
        
        with open(filename, 'w') as csvfile:
            
            comments, thread_id, token = get_comments_with_replies(youtube, video_id)
            pbar.update(len(comments))
            
            writer = csv.DictWriter(csvfile, fieldnames=comments[0].keys())
            writer.writeheader()
            
            for data in comments:
                writer.writerow(data)
                
            while token:
                comments, thread_id, token = get_comments_with_replies(youtube, video_id, 
                                                               token=token, thread_id=thread_id)
                pbar.update(len(comments))
                
                for data in comments:
                    writer.writerow(data)
                    
        pbar.close()
                
    except Exception as e: 
        print(e)
        print("Fix me, mamma!") 

In [26]:
flow = flow_from_clientsecrets(CLIENT_SECRETS_FILE, scope=YOUTUBE_READ_WRITE_SSL_SCOPE,
        message=MISSING_CLIENT_SECRETS_MESSAGE)

storage = Storage("main.py-oauth2.json")
credentials = storage.get()

with open("youtube-v3-discoverydocument.json", "r") as f:
    doc = f.read()
    youtube = build_from_document(doc, http=credentials.authorize(httplib2.Http()))

In [27]:
#gillette commercial
video_id = 'koPmuEyP3a0'

In [28]:
info = get_statistics_views(youtube, video_id)
filename = '{}_info.csv'.format(video_id)

with open(filename, 'w') as csvfile:

    writer = csv.DictWriter(csvfile, fieldnames=info.keys())
    writer.writeheader()
    writer.writerow(info)

comments_to_csv(youtube, video_id, '{}.csv'.format(video_id), total = int(info['commentCount']))