In [1]:
# import sys
# !{sys.executable} -m pip install python-dotenv
# !{sys.executable} -m pip install google-api-python-client
# !{sys.executable} -m pip install iteration-utilities

In [2]:
# Code Link - https://github.com/onlyphantom/youtube_api_python/blob/main/yt_public.py

In [3]:
import os
from dotenv import load_dotenv
from googleapiclient.discovery import build
from iteration_utilities import unique_everseen
import csv
from tqdm import tqdm

In [4]:
comments = []

In [5]:
def process_comments(response_items, csv_output=False):

    for res in response_items:

        # loop through the replies
        if 'replies' in res.keys():
            for reply in res['replies']['comments']:
                comment = reply['snippet']
                comment['commentId'] = reply['id']
                comments.append(comment)
        else:
            comment = {}
            comment['snippet'] = res['snippet']['topLevelComment']['snippet']
            comment['snippet']['parentId'] = None
            comment['snippet']['commentId'] = res['snippet']['topLevelComment']['id']

            comments.append(comment['snippet'])

    if csv_output:
         make_csv(comments)
    
#     print(f'Finished processing {len(comments)} comments.')
    return comments

In [6]:
def make_csv(comments, channelID=None):
    header = comments[0].keys()

    if channelID:
#         filename = f'comments_{channelID}_{today}.csv'
        filename = f'comments.csv'
    else:
#         filename = f'comments_{today}.csv'
        filename = f'comments.csv'

    with open(filename, 'a', encoding='utf8', newline='') as f:
        writer = csv.DictWriter(f, fieldnames=header, extrasaction='ignore')
        writer.writeheader()
        writer.writerows(comments)

In [7]:
load_dotenv()
API_KEY = os.getenv("API_KEY")

In [8]:
#create a service object for interacting with the YouTube Data API
youtube = build("youtube", "v3", developerKey=API_KEY, cache_discovery=False)

In [9]:
def comment_threads(videoID, to_csv=False):
    
    comments_list = []
    
    request = youtube.commentThreads().list(
        part='id,replies,snippet',
        videoId=videoID,
    )
    response = request.execute()
    comments_list.extend(process_comments(response['items']))

    # if there is nextPageToken, then keep calling the API
    while response.get('nextPageToken', None):
        request = youtube.commentThreads().list(
            part='id,replies,snippet',
            videoId=videoID,
            pageToken=response['nextPageToken']
        )
        response = request.execute()
        comments_list.extend(process_comments(response['items']))

    comments_list = list(unique_everseen(comments_list))

    print(f"Finished fetching comments for {videoID}. {len(comments_list)} comments found.")
    
    if to_csv:
#         make_csv(comments_list, videoID)
        make_csv(comments_list)
    return comments_list

In [10]:
# video_ids = [
#     'EoArJKQ6t18'
# ]
video_ids = [
    '-WrRhys0mHU',
    's-ZPAcc9i4w',
    'deNOEjZtSfI'
]


In [None]:
%%time
if __name__ == '__main__':
    # get comments
#     response = comment_threads(videoID='7Kt6ouYqacQ', to_csv=True)
#     print(response)
    for video_id in tqdm(video_ids, desc='Processing videos'):
        response = comment_threads(videoID=video_id, to_csv=True)

Processing videos:   5%|███                                                             | 1/21 [01:11<23:52, 71.60s/it]

Finished fetching comments for yE5c23LEhZw. 3588 comments found.


Processing videos:  10%|██████                                                         | 2/21 [03:17<32:48, 103.62s/it]

Finished fetching comments for FMGH8r_Ih5I. 5768 comments found.


Processing videos:  14%|█████████▏                                                      | 3/21 [03:56<22:11, 73.95s/it]

Finished fetching comments for B2NAMXMHQqU. 6272 comments found.


Processing videos:  19%|████████████▏                                                   | 4/21 [04:30<16:33, 58.45s/it]

Finished fetching comments for VIqyhPZah7k. 6652 comments found.
Finished fetching comments for PSnKTv27104. 6757 comments found.


Processing videos:  24%|███████████████▏                                                | 5/21 [04:42<11:07, 41.71s/it]

Finished fetching comments for MSy5ZF05PQQ. 11346 comments found.


Processing videos:  29%|█████████████████▍                                           | 6/21 [28:14<2:06:54, 507.60s/it]

Finished fetching comments for hJdl9E2GRYw. 12613 comments found.


Processing videos:  33%|████████████████████▎                                        | 7/21 [51:39<3:06:50, 800.73s/it]

Finished fetching comments for 7Jcvd_G8uT4. 12969 comments found.


Processing videos:  38%|███████████████████████▏                                     | 8/21 [57:48<2:23:43, 663.32s/it]

Finished fetching comments for ihb6GxDy2Z4. 13050 comments found.


Processing videos:  43%|██████████████████████████▏                                  | 9/21 [59:23<1:37:09, 485.78s/it]

Finished fetching comments for 5fTBWT2aO_E. 13880 comments found.


Processing videos:  48%|███████████████████████████▌                              | 10/21 [1:15:05<1:54:52, 626.59s/it]

Finished fetching comments for -9TIZROu-fY. 14031 comments found.


Processing videos:  52%|██████████████████████████████▍                           | 11/21 [1:17:27<1:19:42, 478.23s/it]

Finished fetching comments for 0OWV1FWABDA. 19688 comments found.


Processing videos:  57%|████████████████████████████████▌                        | 12/21 [3:18:06<6:20:13, 2534.85s/it]

Finished fetching comments for FYoMO1s6jco. 20593 comments found.


Processing videos:  62%|███████████████████████████████████▎                     | 13/21 [3:43:26<4:56:59, 2227.43s/it]

Finished fetching comments for 6G749ZVOrsk. 20598 comments found.


Processing videos:  67%|██████████████████████████████████████                   | 14/21 [3:44:05<3:02:45, 1566.51s/it]

Finished fetching comments for kwI1tiiOxXE. 20686 comments found.


Processing videos:  71%|████████████████████████████████████████▋                | 15/21 [3:47:18<1:55:14, 1152.39s/it]

Finished fetching comments for umabahhE8d4. 20740 comments found.


Processing videos:  76%|████████████████████████████████████████████▏             | 16/21 [3:49:14<1:10:03, 840.63s/it]

Finished fetching comments for pYJkf5CIviQ. 20767 comments found.


Processing videos:  81%|████████████████████████████████████████████████▌           | 17/21 [3:50:56<41:13, 618.34s/it]

Finished fetching comments for R98JLQXIwYo. 20858 comments found.


Processing videos:  86%|███████████████████████████████████████████████████▍        | 18/21 [3:56:37<26:45, 535.08s/it]

In [None]:
'''
import os
from dotenv import load_dotenv
from googleapiclient.discovery import build
from iteration_utilities import unique_everseen
import csv
comments = []
def make_csv(comments, channelID=None):
    header = comments[0].keys()

    if channelID:
#         filename = f'comments_{channelID}_{today}.csv'
        filename = f'comments.csv'
    else:
#         filename = f'comments_{today}.csv'
        filename = f'comments.csv'

    with open(filename, 'w', encoding='utf8', newline='') as f:
        writer = csv.DictWriter(f, fieldnames=header, extrasaction='ignore')
        writer.writeheader()
        writer.writerows(comments)
def comment_threads(videoID, to_csv=False):
    
    comments_list = []
    
    request = youtube.commentThreads().list(
        part='id,replies,snippet',
        videoId=videoID,
    )
    response = request.execute()
    comments_list.extend(process_comments(response['items']))

    # if there is nextPageToken, then keep calling the API
    while response.get('nextPageToken', None):
        request = youtube.commentThreads().list(
            part='id,replies,snippet',
            videoId=videoID,
            pageToken=response['nextPageToken']
        )
        response = request.execute()
        comments_list.extend(process_comments(response['items']))

    comments_list = list(unique_everseen(comments_list))

    print(f"Finished fetching comments for {videoID}. {len(comments_list)} comments found.")
    
    if to_csv:
#         make_csv(comments_list, videoID)
        make_csv(comments_list)
    return comments_list
if __name__ == '__main__':
    # get comments
    response = comment_threads(videoID='G4EVRUFttG0', to_csv=True)
    print(response)

'''