In [1]:
# import sys
# !{sys.executable} -m pip install python-dotenv
# !{sys.executable} -m pip install google-api-python-client
# !{sys.executable} -m pip install iteration-utilities

In [2]:
# Code Link - https://github.com/onlyphantom/youtube_api_python/blob/main/yt_public.py

In [3]:
import os
from dotenv import load_dotenv
from googleapiclient.discovery import build
from iteration_utilities import unique_everseen
import csv
from tqdm import tqdm

In [4]:
comments = []

In [5]:
def process_comments(response_items, csv_output=False):

    for res in response_items:

        # loop through the replies
        if 'replies' in res.keys():
            for reply in res['replies']['comments']:
                comment = reply['snippet']
                comment['commentId'] = reply['id']
                comments.append(comment)
        else:
            comment = {}
            comment['snippet'] = res['snippet']['topLevelComment']['snippet']
            comment['snippet']['parentId'] = None
            comment['snippet']['commentId'] = res['snippet']['topLevelComment']['id']

            comments.append(comment['snippet'])

    if csv_output:
         make_csv(comments)
    
#     print(f'Finished processing {len(comments)} comments.')
    return comments

In [6]:
def make_csv(comments, channelID=None):
    header = comments[0].keys()

    if channelID:
#         filename = f'comments_{channelID}_{today}.csv'
        filename = f'comments.csv'
    else:
#         filename = f'comments_{today}.csv'
        filename = f'comments.csv'

    with open(filename, 'a', encoding='utf8', newline='') as f:
        writer = csv.DictWriter(f, fieldnames=header, extrasaction='ignore')
        writer.writeheader()
        writer.writerows(comments)

In [7]:
load_dotenv()
API_KEY = os.getenv("API_KEY")

In [8]:
#create a service object for interacting with the YouTube Data API
youtube = build("youtube", "v3", developerKey=API_KEY, cache_discovery=False)

In [9]:
def comment_threads(videoID, to_csv=False):
    
    comments_list = []
    
    request = youtube.commentThreads().list(
        part='id,replies,snippet',
        videoId=videoID,
    )
    response = request.execute()
    comments_list.extend(process_comments(response['items']))

    # if there is nextPageToken, then keep calling the API
    while response.get('nextPageToken', None):
        request = youtube.commentThreads().list(
            part='id,replies,snippet',
            videoId=videoID,
            pageToken=response['nextPageToken']
        )
        response = request.execute()
        comments_list.extend(process_comments(response['items']))

    comments_list = list(unique_everseen(comments_list))

    print(f"Finished fetching comments for {videoID}. {len(comments_list)} comments found.")
    
    if to_csv:
#         make_csv(comments_list, videoID)
        make_csv(comments_list)
    return comments_list

In [10]:
video_ids = [
    "rlKvtJlQc8U",
    "WGiEH-xrW74", "6peIIVkeY2w", "SB7cw4ilTOk", "gPzPcRszfuQ", "ePBUJJW90_4",
    "RClNT_u-6w8", "w5SKwRMCvQE", "PoKw7TrdYIQ", "4VPsaq0MBxE", "0jmZYk_Fc0g",
    "Mb8s2QSlT6E", "PHnpWVSr8tY", "MH4A82UQZn4", "ZbDwquYgaPM", "f8sYK4Ca6j4",
    "M7yv_BksSGk", "zUzRyHTK3dU", "OnjWjPchNeg", "t_2mmSp_KmA", "9Nw2YebWCz8",
    "b5Dk2HSfQYw", "jiXKeqeLULY", "lqN1uj52K-M", "-96XmT3gcr0", "_RJ8hbw7jJ0",
    "5WfO6yjCvtw", "j_6_CtqhidU", "Yi5_dl7Tfps", "y9PgmL3cYiU", "09CgkWyqRO8",
    "wuaAbcN13KE", "OUqqWRfCE0w", "CmOqJCsFEMo", "xncAlCvIqeI", "JrPhT1ggbR4",
    "JDz9SLo10pM", "BldgfZ1nfIY", "0rBn-_aVMuY", "GnUjvTZWGDQ", "BNZyfikdO9I",
    "pE3A75jHWsw", "P1ttJgHR0uY", "jNYyApxcpOU", "L8SuDRlpKBM", "jjQiurszvLs",
    "I_N04KkoYPk", "tO--TeUZ0Ik", "vFLbvgoJqzo", "6pTdreSJmtE", 
    "HsGa76se5Ns", "saJ7uiVwtEI", "QsW-ygDmhK8", "L_AVwEDNr70", "XTVOPH8AHEM",
    "LJ2U78nOi7c", "ZmblPQ1XR7k", "A6-yGHgrst4", "KfyYF5_pDzY", "xKSwVOzsOFk",
    "EglsC8Xix8s", "s8vtfwT2GW8", "txR94UXx-5U", "AxGRuCTxmkM"
]


In [None]:
%%time
if __name__ == '__main__':
    # get comments
#     response = comment_threads(videoID='7Kt6ouYqacQ', to_csv=True)
#     print(response)
    for video_id in tqdm(video_ids, desc='Processing videos'):
        response = comment_threads(videoID=video_id, to_csv=True)

Processing videos:   0%|                                                                        | 0/72 [00:00<?, ?it/s]

Finished fetching comments for 7Kt6ouYqacQ. 5379 comments found.


Processing videos:   3%|█▋                                                         | 2/72 [35:25<23:02:53, 1185.34s/it]

Finished fetching comments for H8BN9VMhYx0. 10355 comments found.


Processing videos:   4%|██▌                                                         | 3/72 [36:42<13:01:08, 679.26s/it]

Finished fetching comments for ydT_tJ-PL_c. 10608 comments found.
Finished fetching comments for ckvuixGufDM. 12658 comments found.


Processing videos:   6%|███▎                                                        | 4/72 [50:49<14:04:54, 745.51s/it]

Finished fetching comments for Rk9uyBqvC0k. 12915 comments found.


Processing videos:   7%|████▏                                                       | 5/72 [53:52<10:06:11, 542.86s/it]

Finished fetching comments for OoSxfvbQk88. 13369 comments found.


Processing videos:   8%|█████                                                        | 6/72 [58:10<8:10:34, 445.98s/it]

Finished fetching comments for yC5g-FKCRZA. 14812 comments found.


Processing videos:  10%|█████▋                                                    | 7/72 [1:14:33<11:13:05, 621.32s/it]

Finished fetching comments for B39p_2_RUKo. 14873 comments found.


Processing videos:  11%|██████▌                                                    | 8/72 [1:15:11<7:44:50, 435.78s/it]

In [None]:
'''
import os
from dotenv import load_dotenv
from googleapiclient.discovery import build
from iteration_utilities import unique_everseen
import csv
comments = []
def make_csv(comments, channelID=None):
    header = comments[0].keys()

    if channelID:
#         filename = f'comments_{channelID}_{today}.csv'
        filename = f'comments.csv'
    else:
#         filename = f'comments_{today}.csv'
        filename = f'comments.csv'

    with open(filename, 'w', encoding='utf8', newline='') as f:
        writer = csv.DictWriter(f, fieldnames=header, extrasaction='ignore')
        writer.writeheader()
        writer.writerows(comments)
def comment_threads(videoID, to_csv=False):
    
    comments_list = []
    
    request = youtube.commentThreads().list(
        part='id,replies,snippet',
        videoId=videoID,
    )
    response = request.execute()
    comments_list.extend(process_comments(response['items']))

    # if there is nextPageToken, then keep calling the API
    while response.get('nextPageToken', None):
        request = youtube.commentThreads().list(
            part='id,replies,snippet',
            videoId=videoID,
            pageToken=response['nextPageToken']
        )
        response = request.execute()
        comments_list.extend(process_comments(response['items']))

    comments_list = list(unique_everseen(comments_list))

    print(f"Finished fetching comments for {videoID}. {len(comments_list)} comments found.")
    
    if to_csv:
#         make_csv(comments_list, videoID)
        make_csv(comments_list)
    return comments_list
if __name__ == '__main__':
    # get comments
    response = comment_threads(videoID='G4EVRUFttG0', to_csv=True)
    print(response)

'''