https://www.pingshiuanchua.com/blog/post/using-youtube-api-to-analyse-youtube-comments-on-python

## Extract YouTube Comments

### Method 1: html

In [None]:
import configparser
from urllib.parse import urlencode
import requests

import time
import json


In [None]:
config = configparser.ConfigParser()
config.read('key.properties')
key = config['google']['python.playground']

**Get Video IDs**

In [None]:
def html_get_videoid(part, maxResults, q, key=key):
    response = requests.get('https://www.googleapis.com/youtube/v3/search?' +
                            urlencode({'part': part,
                                       'maxResults':maxResults,
                                       'q': q,
                                       'key': key}))
    
    search_info = response.json()

    try:
        if response.ok :
            for i in range(maxResults):
                if search_info['items'][i]['snippet']['channelTitle'] == '周杰倫 Jay Chou':
                    video_id = search_info['items'][i]['id']['videoId']
            return video_id
    except IndexError:
        print('Failed to get status, response:', search_info)
        raise

In [None]:
video_id = html_get_videoid(part='snippet', maxResults=5, q='Jay Chou Mojito', key=key)

In [None]:
video_id

**Get statistics data**

In [None]:
def html_get_statistics_data(video_id, key=key):
    response = requests.get('https://www.googleapis.com/youtube/v3/videos?' +
                            urlencode({'part':'statistics',
                                       'id': video_id,
                                       'key': key}))
    statistics_data = response.json()

    try:
        if response.ok :
            view_count = statistics_data['items'][0]['statistics']['viewCount']
            like_count = statistics_data['items'][0]['statistics']['likeCount']
            dislike_count = statistics_data['items'][0]['statistics']['dislikeCount']
            comment_count = statistics_data['items'][0]['statistics']['commentCount']
            return view_count, like_count, dislike_count, comment_count
    except IndexError:
        print('Failed to get status, response:', statistics_data)
        raise
    
    

In [None]:
view_count, like_count, dislike_count, comment_count = html_get_statistics_data(video_id=video_id, key=key)

In [None]:
print('view_count:', view_count)
print('like_count:', like_count)
print('dislike_count:', dislike_count)
print('comment_count:', comment_count)


**Get comments**

In [None]:
def html_get_comments(part, videoId, key=key):
    time.sleep(1)

    response = requests.get('https://www.googleapis.com/youtube/v3/commentThreads?' +
                        urlencode({'part':part,
                                   'videoId':videoId,
                                   'key': key}))
    resp = response.json()
    resp_tot = resp.copy()
    if 'nextPageToken' in resp:
        page_token = resp['nextPageToken']
        print("resp['nextPageToken']:", resp['nextPageToken'])
        while page_token:
            print('page_token:', page_token)
            response_tk = requests.get('https://www.googleapis.com/youtube/v3/commentThreads?' +
                        urlencode({'part':part,
                                   'pageToken':page_token,
                                   'videoId':videoId,
                                   'key': key}))
            resp_tk = response_tk.json()
            print("len(resp_tk['items']):", len(resp_tk['items']))
            for i in range(len(resp_tk['items'])):
                resp_tot['items'].append(resp_tk['items'][i])
            if 'nextPageToken' in resp_tk:
                page_token = resp_tk['nextPageToken']
                print('page_token:', page_token)
                print('===')
            else:
                break

    return resp_tot


In [None]:
comments_json = html_get_comments(part='snippet', videoId='-biOGdYiF-I', key=key)

In [None]:
len(comments_json['items'])

In [None]:
with open('comments_json.json', 'w') as json_file:
    json.dump(comments_json, json_file)

### Method 2: python

**Get Video IDs**

In [None]:
import os
import googleapiclient.discovery

In [None]:
def py_get_videoid(part, maxResults, q, key=key):
    api_service_name = 'youtube'
    api_version = 'v3'

    youtube = googleapiclient.discovery.build(
        api_service_name, api_version, developerKey = key)

    request = youtube.search().list(
        part=part,
        maxResults=maxResults,
        q=q
    )
    search_info = request.execute()

    try:
        for i in range(maxResults):
            if search_info['items'][i]['snippet']['channelTitle'] == '周杰倫 Jay Chou':
                video_id = search_info['items'][i]['id']['videoId']
        return video_id
    except IndexError:
        print('Failed to get status, response:', search_info)
        raise

In [None]:
os.environ["OAUTHLIB_INSECURE_TRANSPORT"] = "1"

video_id = py_get_videoid(part='snippet', maxResults=5, q='Jay Chou Mojito', key=key)

In [None]:
video_id

**Get statistics data**

In [None]:
def py_get_statistics_data(video_id, key=key):
    api_service_name = 'youtube'
    api_version = 'v3'

    youtube = googleapiclient.discovery.build(
        api_service_name, api_version, developerKey = key)
    
    request = youtube.videos().list(
        part='statistics',
        id=video_id
    )
    response = request.execute()

    try:
        view_count = statistics_data['items'][0]['statistics']['viewCount']
        like_count = statistics_data['items'][0]['statistics']['likeCount']
        dislike_count = statistics_data['items'][0]['statistics']['dislikeCount']
        comment_count = statistics_data['items'][0]['statistics']['commentCount']
        return view_count, like_count, dislike_count, comment_count
    except IndexError:
        print('Failed to get status, response:', statistics_data)
        raise


In [None]:
view_count, like_count, dislike_count, comment_count = py_get_statistics_data(video_id=video_id, key=key)

In [None]:
print('view_count:', view_count)
print('like_count:', like_count)
print('dislike_count:', dislike_count)
print('comment_count:', comment_count)


**Get comments**

In [None]:
def py_get_comments(part, videoId, key=key):
    time.sleep(1)

    api_service_name = 'youtube'
    api_version = 'v3'

    youtube = googleapiclient.discovery.build(
        api_service_name, api_version, developerKey = key)
    
    request = youtube.commentThreads().list(
        part=part,
        videoId=videoId
    )
    resp = request.execute()
    resp_tot = resp.copy()

    if 'nextPageToken' in resp:
        page_token = resp['nextPageToken']
        print("resp['nextPageToken']:", resp['nextPageToken'])
        while page_token:
            print('page_token:', page_token)
            response_tk = youtube.commentThreads().list(
                part=part,
                pageToken=page_token,
                videoId=videoId
            )
            resp_tk = response_tk.execute()
            print("len(resp_tk['items']):", len(resp_tk['items']))
            for i in range(len(resp_tk['items'])):
                resp_tot['items'].append(resp_tk['items'][i])
            if 'nextPageToken' in resp_tk:
                page_token = resp_tk['nextPageToken']
                print('page_token:', page_token)
                print('===')
            else:
                break

    return resp_tot


In [None]:
comments_json = html_get_comments(part='snippet', videoId='-biOGdYiF-I', key=key)

In [None]:
len(comments_json['items'])

In [None]:
with open('comments_json.json', 'w') as json_file:
    json.dump(comments_json, json_file)