In [48]:
# import sys
# !{sys.executable} -m pip install python-dotenv
# !{sys.executable} -m pip install google-api-python-client

In [49]:
# !pip install iteration_utilities

In [50]:
import os
from dotenv import load_dotenv
from googleapiclient.discovery import build
import re
from tqdm import tqdm
from iteration_utilities import unique_everseen
import csv

In [51]:
load_dotenv()
api_key = os.getenv("API_KEY")

In [52]:
# create a service object for interacting with the YouTube Data API
youtube = build("youtube", "v3", developerKey=api_key, cache_discovery=False)

In [53]:
# TEST
# link = "https://www.youtube.com/watch?v=36N1Bz7qW0A"
# request = youtube.commentThreads().list(part='id,replies,snippet', videoId=extract_video_id(link))
# response = request.execute()
# print(response)

In [54]:
def extract_video_id(url):
    video_id = None
    if "youtu.be" in url:
        video_id = url.split("/")[-1]
    else:
        query_string = url.split("?")[-1]
        parameters = query_string.split("&")
        for param in parameters:
            if param.startswith("v="):
                video_id = param[2:]
                break

    return video_id

In [55]:
comments = []

In [56]:
def make_csv(comments, channelID=None):
    header = comments[0].keys()

    if channelID:
#         filename = f'comments_{channelID}_{today}.csv'
        filename = f'comments.csv'
    else:
#         filename = f'comments_{today}.csv'
        filename = f'comments.csv'

    with open(filename, 'a', encoding='utf8', newline='') as f:
        writer = csv.DictWriter(f, fieldnames=header, extrasaction='ignore')
        writer.writeheader()
        writer.writerows(comments)

In [57]:
def process_comments(response_items, csv_output=False):

    for res in response_items:

        # loop through the replies
        if 'replies' in res.keys():
            for reply in res['replies']['comments']:
                comment = reply['snippet']
                comment['commentId'] = reply['id']
                comments.append(comment)
        else:
            comment = {}
            comment['snippet'] = res['snippet']['topLevelComment']['snippet']
            comment['snippet']['parentId'] = None
            comment['snippet']['commentId'] = res['snippet']['topLevelComment']['id']

            comments.append(comment['snippet'])

    if csv_output:
         make_csv(comments)
    
    print(f'Finished processing {len(comments)} comments.')
    return comments

In [58]:
def comment_threads(videoID, to_csv=False):
    
    comments_list = []
    
    request = youtube.commentThreads().list(
        part='id,replies,snippet',
        videoId=videoID,
    )
    response = request.execute()
    comments_list.extend(process_comments(response['items']))

    # if there is nextPageToken, then keep calling the API
    while response.get('nextPageToken', None):
        request = youtube.commentThreads().list(
            part='id,replies,snippet',
            videoId=videoID,
            pageToken=response['nextPageToken']
        )
        response = request.execute()
        comments_list.extend(process_comments(response['items']))

    comments_list = list(unique_everseen(comments_list))

    print(f"Finished fetching comments for {videoID}. {len(comments_list)} comments found.")
    
    if to_csv:
#         make_csv(comments_list, videoID)
        make_csv(comments_list)
    return comments_list

In [59]:
video_ids = [
    '36N1Bz7qW0A'
]

In [60]:
%%time
if __name__ == '__main__':
    # get comments
#     response = comment_threads(videoID='7Kt6ouYqacQ', to_csv=True)
#     print(response)
    for video_id in tqdm(video_ids, desc='Processing videos'):
        response = comment_threads(videoID=video_id, to_csv=True)

Processing videos:   0%|                                                                         | 0/1 [00:00<?, ?it/s]

Finished processing 23 comments.
Finished processing 45 comments.
Finished processing 65 comments.
Finished processing 85 comments.
Finished processing 105 comments.
Finished processing 125 comments.
Finished processing 148 comments.


Processing videos: 100%|█████████████████████████████████████████████████████████████████| 1/1 [00:01<00:00,  1.89s/it]

Finished fetching comments for 36N1Bz7qW0A. 148 comments found.
CPU times: total: 438 ms
Wall time: 1.9 s



