# Youtube配信のコメントデータを拾うテスト

**下記リンクを参考に、必要なクラス、メソッド等を簡単に纏めたメモ**
- https://github.com/shughes-uk/python-youtubechat/blob/master/youtubechat/ytchat.py
- https://starhoshi.hatenablog.com/entry/2018/01/07/015121

(old)
- https://blog.sky-net.pw/article/86
- https://github.com/youtube/api-samples/blob/master/python/list_broadcasts.py

## 注意
- 他人の放送からコメントデータを拾う場合は、`LiveBloadcast: list`で`ChatID`を拾うことはできない。[参考](https://stackoverflow.com/questions/34688099/how-do-i-use-livebroadcastslist-from-youtube-api)
    - 代わりに、`Videos: list ->  VideoID ->  LiveChatMessages: list -> activeChatID`というフェーズで取得するしか無い。


- 放送中のデータは、最新コメントから遡って最大75件しか取得できない。(公式では最大2000件とあるが、実際は75件に制限されている)

## 準備

In [1]:
# This Python file uses the following encoding: utf-8

import cgi
import logging
import sys
import threading
import time
from datetime import datetime, timedelta
from json import dumps, loads
from pprint import pformat

import dateutil.parser
import httplib2
from oauth2client import client
from oauth2client.file import Storage

import webbrowser


PY3 = sys.version_info[0] == 3
if PY3:
    from urllib.parse import urlencode
    from queue import Queue
else:
    from Queue import Queue
    from urllib import urlencode


In [2]:
def _json_request(http, url, method='GET', headers=None, body=None):
    resp, content = http.request(url, method, headers=headers, body=body)
    content_type, content_type_params = cgi.parse_header(resp.get('content-type', 'application/json; charset=UTF-8'))
    charset = content_type_params.get('charset', 'UTF-8')
    data = loads(content.decode(charset))
    if 'error' in data:
        error = data['error']
        raise YoutubeLiveChatError(error['message'], error.get('code'), error.get('errors'))
    return resp, data

In [3]:
class LiveChatApi(object):
    
    def __init__(self, http):
        """
        代入するhttpは、以下の手順で拾う
        1. storage = Storage(credential_file)
        2. credentials = storage.get()
        3. http = credentials.authorize(httplib2.Http())
        """
        self.http = http
        self.logger = logging.getLogger("liveChat_api")
        
        
    def get_live_chat_id(self, video_id):
        url = "https://www.googleapis.com/youtube/v3/videos?"
        params = {'part': 'liveStreamingDetails','id': video_id}
        params = urlencode(params)
        response_obj, video_data = _json_request(self.http, url + params)
        livechatId = video_data['items'][0]['liveStreamingDetails']['activeLiveChatId']
        return livechatId
        

    def get_all_messages(self, livechatId):
        data = self.live_chat_messages_list(livechatId, maxResults=2000)
        total_items = data['pageInfo']['totalResults']
        pageToken = data['nextPageToken']
        if len(data['items']) < total_items:
            time.sleep(data['pollingIntervalMillis'] / 1000)
            while len(data['items']) < total_items:
                other_data = self.live_chat_messages_list(livechatId, maxResults=2000, pageToken=pageToken)
                if not other_data['items']:
                    break
                else:
                    data['items'].extend(other_data['items'])
                    pageToken = other_data['nextPageToken']
                    time.sleep(other_data['pollingIntervalMillis'] / 1000)
        return data

    def live_chat_messages_list(self,
                                livechatId,
                                part='snippet,authorDetails',
                                maxResults=200,
                                pageToken=None,
                                profileImageSize=None):
        url = 'https://www.googleapis.com/youtube/v3/liveChat/messages'
        url = url + '?liveChatId={0}'.format(livechatId)
        if pageToken:
            url = url + '&pageToken={0}'.format(pageToken)
        if profileImageSize:
            url = url + '&profileImageSize={0}'.format(profileImageSize)
        url = url + '&part={0}'.format(part)
        url = url + '&maxResults={0}'.format(maxResults)
        resp, data = _json_request(self.http, url)
        return data

## 認証

In [None]:
if not hasattr(__builtins__,'raw_input'):
    # Python 3
    raw_input = input
flow = client.flow_from_clientsecrets(
    'client_secrets.json',
    scope=['https://www.googleapis.com/auth/youtube', 'https://www.googleapis.com/auth/youtube.force-ssl'],
    redirect_uri='urn:ietf:wg:oauth:2.0:oob')
auth_uri = flow.step1_get_authorize_url()
webbrowser.open(auth_uri)
auth_code = raw_input("auth code: ") # Sコードを貼り付ける
credentials = flow.step2_exchange(auth_code)
http_auth = credentials.authorize(httplib2.Http())
storage = Storage("oauth_creds")
storage.put(credentials)

## コメントデータ取得

In [5]:
# Initial settings
credential_file = "oauth_creds"
storage = Storage(credential_file)
credentials = storage.get()
http = credentials.authorize(httplib2.Http())

# Getting live streaming all chat data
livechat = LiveChatApi(http)
video_id = "7ja9BBrqbJg"
chat_id = livechat.get_live_chat_id(video_id)
chat_data = livechat.get_all_messages(chat_id)

In [6]:
chat_data

{'kind': 'youtube#liveChatMessageListResponse',
 'etag': '"DuHzAJ-eQIiCIp7p4ldoVcVAOeY/OWq99JlhK41I2wmpVVUooZQjv4Y"',
 'nextPageToken': 'GM3FpLvel9wCIK6467Tfl9wC',
 'pollingIntervalMillis': 30000,
 'offlineAt': '2018-07-11T19:03:51.000Z',
 'pageInfo': {'totalResults': 75, 'resultsPerPage': 75},
 'items': [{'kind': 'youtube#liveChatMessage',
   'etag': '"DuHzAJ-eQIiCIp7p4ldoVcVAOeY/8LBQv66EqyflltPq3gR5Jh_d8Ag"',
   'id': 'LCC.Cg8KDQoLN2phOUJCcnFiSmcSOgoaQ0lteHV0SE5sOXdDRlVrTFpBb2Q4MklKYlESHENMM1EzY1hObDl3Q0ZRdnNXQW9kbUg4SFpBLTA',
   'snippet': {'type': 'textMessageEvent',
    'liveChatId': 'Cg0KCzdqYTlCQnJxYkpn',
    'authorChannelId': 'UC5NtcWICb8ggcWVDi5ZieeQ',
    'publishedAt': '2018-07-11T17:48:20.202Z',
    'hasDisplayContent': True,
    'displayMessage': '音量(0)お久しぶりという名のこんにちわ',
    'textMessageDetails': {'messageText': '音量(0)お久しぶりという名のこんにちわ'}},
   'authorDetails': {'channelId': 'UC5NtcWICb8ggcWVDi5ZieeQ',
    'channelUrl': 'http://www.youtube.com/channel/UC5NtcWICb8ggcWVDi5ZieeQ'