In [1]:
# import sys
# !{sys.executable} -m pip install python-dotenv
# !{sys.executable} -m pip install google-api-python-client
# !{sys.executable} -m pip install iteration-utilities

In [2]:
# Code Link - https://github.com/onlyphantom/youtube_api_python/blob/main/yt_public.py

In [3]:
import os
from dotenv import load_dotenv
from googleapiclient.discovery import build
from iteration_utilities import unique_everseen
import csv
from tqdm import tqdm

In [4]:
comments = []

In [5]:
def process_comments(response_items, csv_output=False):

    for res in response_items:

        # loop through the replies
        if 'replies' in res.keys():
            for reply in res['replies']['comments']:
                comment = reply['snippet']
                comment['commentId'] = reply['id']
                comments.append(comment)
        else:
            comment = {}
            comment['snippet'] = res['snippet']['topLevelComment']['snippet']
            comment['snippet']['parentId'] = None
            comment['snippet']['commentId'] = res['snippet']['topLevelComment']['id']

            comments.append(comment['snippet'])

    if csv_output:
         make_csv(comments)
    
#     print(f'Finished processing {len(comments)} comments.')
    return comments

In [6]:
def make_csv(comments, channelID=None):
    header = comments[0].keys()

    if channelID:
#         filename = f'comments_{channelID}_{today}.csv'
        filename = f'comments_july6.csv'
    else:
#         filename = f'comments_{today}.csv'
        filename = f'comments_july6.csv'

    with open(filename, 'a', encoding='utf8', newline='') as f:
        writer = csv.DictWriter(f, fieldnames=header, extrasaction='ignore')
        writer.writeheader()
        writer.writerows(comments)

In [7]:
load_dotenv()
API_KEY = os.getenv("API_KEY")

In [8]:
#create a service object for interacting with the YouTube Data API
youtube = build("youtube", "v3", developerKey=API_KEY, cache_discovery=False)

In [9]:
def comment_threads(videoID, to_csv=False):
    
    comments_list = []
    
    request = youtube.commentThreads().list(
        part='id,replies,snippet',
        videoId=videoID,
    )
    response = request.execute()
    comments_list.extend(process_comments(response['items']))

    # if there is nextPageToken, then keep calling the API
    while response.get('nextPageToken', None):
        request = youtube.commentThreads().list(
            part='id,replies,snippet',
            videoId=videoID,
            pageToken=response['nextPageToken']
        )
        response = request.execute()
        comments_list.extend(process_comments(response['items']))

    comments_list = list(unique_everseen(comments_list))

    print(f"Finished fetching comments for {videoID}. {len(comments_list)} comments found.")
    
    if to_csv:
#         make_csv(comments_list, videoID)
        make_csv(comments_list)
    return comments_list

In [10]:
video_ids = [
    'jV78iR4Vi68',
    'U62U2cgPagI',
    'TB7yAjRgO-c',
    'BTUQ8dzMPDQ',
    'nl2CLw4PLFA',
    'x0TheaRQ6ZE',
    'gIxQOlxb2iY',
    'GvxUyrWqmFg',
    'olmhYxH8_HM'
]

In [11]:
if __name__ == '__main__':
    # get comments
#     response = comment_threads(videoID='Pemat3Cp_NQ', to_csv=True)
#     print(response)
    for video_id in tqdm(video_ids, desc='Processing videos'):
        response = comment_threads(videoID=video_id, to_csv=True)

Processing videos:  11%|███████▏                                                         | 1/9 [00:00<00:03,  2.14it/s]

Finished fetching comments for jV78iR4Vi68. 6 comments found.


Processing videos:  22%|██████████████▍                                                  | 2/9 [00:04<00:16,  2.30s/it]

Finished fetching comments for U62U2cgPagI. 268 comments found.


Processing videos:  33%|█████████████████████▋                                           | 3/9 [00:04<00:08,  1.37s/it]

Finished fetching comments for TB7yAjRgO-c. 287 comments found.


Processing videos:  44%|████████████████████████████▉                                    | 4/9 [00:05<00:05,  1.19s/it]

Finished fetching comments for BTUQ8dzMPDQ. 351 comments found.


Processing videos:  56%|████████████████████████████████████                             | 5/9 [00:05<00:03,  1.03it/s]

Finished fetching comments for nl2CLw4PLFA. 378 comments found.


Processing videos:  67%|███████████████████████████████████████████▎                     | 6/9 [00:11<00:08,  2.74s/it]

Finished fetching comments for x0TheaRQ6ZE. 819 comments found.


Processing videos:  78%|██████████████████████████████████████████████████▌              | 7/9 [00:12<00:03,  1.95s/it]

Finished fetching comments for gIxQOlxb2iY. 832 comments found.


Processing videos:  89%|█████████████████████████████████████████████████████████▊       | 8/9 [00:12<00:01,  1.51s/it]

Finished fetching comments for GvxUyrWqmFg. 865 comments found.


Processing videos: 100%|█████████████████████████████████████████████████████████████████| 9/9 [00:15<00:00,  1.67s/it]

Finished fetching comments for olmhYxH8_HM. 1002 comments found.





In [12]:
'''
import os
from dotenv import load_dotenv
from googleapiclient.discovery import build
from iteration_utilities import unique_everseen
import csv
comments = []
def make_csv(comments, channelID=None):
    header = comments[0].keys()

    if channelID:
#         filename = f'comments_{channelID}_{today}.csv'
        filename = f'comments.csv'
    else:
#         filename = f'comments_{today}.csv'
        filename = f'comments.csv'

    with open(filename, 'w', encoding='utf8', newline='') as f:
        writer = csv.DictWriter(f, fieldnames=header, extrasaction='ignore')
        writer.writeheader()
        writer.writerows(comments)
def comment_threads(videoID, to_csv=False):
    
    comments_list = []
    
    request = youtube.commentThreads().list(
        part='id,replies,snippet',
        videoId=videoID,
    )
    response = request.execute()
    comments_list.extend(process_comments(response['items']))

    # if there is nextPageToken, then keep calling the API
    while response.get('nextPageToken', None):
        request = youtube.commentThreads().list(
            part='id,replies,snippet',
            videoId=videoID,
            pageToken=response['nextPageToken']
        )
        response = request.execute()
        comments_list.extend(process_comments(response['items']))

    comments_list = list(unique_everseen(comments_list))

    print(f"Finished fetching comments for {videoID}. {len(comments_list)} comments found.")
    
    if to_csv:
#         make_csv(comments_list, videoID)
        make_csv(comments_list)
    return comments_list
if __name__ == '__main__':
    # get comments
    response = comment_threads(videoID='G4EVRUFttG0', to_csv=True)
    print(response)

'''

'\nimport os\nfrom dotenv import load_dotenv\nfrom googleapiclient.discovery import build\nfrom iteration_utilities import unique_everseen\nimport csv\ncomments = []\ndef make_csv(comments, channelID=None):\n    header = comments[0].keys()\n\n    if channelID:\n#         filename = f\'comments_{channelID}_{today}.csv\'\n        filename = f\'comments.csv\'\n    else:\n#         filename = f\'comments_{today}.csv\'\n        filename = f\'comments.csv\'\n\n    with open(filename, \'w\', encoding=\'utf8\', newline=\'\') as f:\n        writer = csv.DictWriter(f, fieldnames=header, extrasaction=\'ignore\')\n        writer.writeheader()\n        writer.writerows(comments)\ndef comment_threads(videoID, to_csv=False):\n    \n    comments_list = []\n    \n    request = youtube.commentThreads().list(\n        part=\'id,replies,snippet\',\n        videoId=videoID,\n    )\n    response = request.execute()\n    comments_list.extend(process_comments(response[\'items\']))\n\n    # if there is nextPag