In [13]:
# import sys
# !{sys.executable} -m pip install python-dotenv
# !{sys.executable} -m pip install google-api-python-client
# !{sys.executable} -m pip install iteration-utilities

In [14]:
# Code Link - https://github.com/onlyphantom/youtube_api_python/blob/main/yt_public.py

In [15]:
import os
from dotenv import load_dotenv
from googleapiclient.discovery import build
from iteration_utilities import unique_everseen
import csv
from tqdm import tqdm

In [16]:
comments = []

In [17]:
def process_comments(response_items, csv_output=False):

    for res in response_items:

        # loop through the replies
        if 'replies' in res.keys():
            for reply in res['replies']['comments']:
                comment = reply['snippet']
                comment['commentId'] = reply['id']
                comments.append(comment)
        else:
            comment = {}
            comment['snippet'] = res['snippet']['topLevelComment']['snippet']
            comment['snippet']['parentId'] = None
            comment['snippet']['commentId'] = res['snippet']['topLevelComment']['id']

            comments.append(comment['snippet'])

    if csv_output:
         make_csv(comments)
    
#     print(f'Finished processing {len(comments)} comments.')
    return comments

In [18]:
def make_csv(comments, channelID=None):
    header = comments[0].keys()

    if channelID:
#         filename = f'comments_{channelID}_{today}.csv'
        filename = f'comments_july6.csv'
    else:
#         filename = f'comments_{today}.csv'
        filename = f'comments_july6.csv'

    with open(filename, 'a', encoding='utf8', newline='') as f:
        writer = csv.DictWriter(f, fieldnames=header, extrasaction='ignore')
        writer.writeheader()
        writer.writerows(comments)

In [19]:
load_dotenv()
API_KEY = os.getenv("API_KEY")

In [20]:
#create a service object for interacting with the YouTube Data API
youtube = build("youtube", "v3", developerKey=API_KEY, cache_discovery=False)

In [21]:
def comment_threads(videoID, to_csv=False):
    
    comments_list = []
    
    request = youtube.commentThreads().list(
        part='id,replies,snippet',
        videoId=videoID,
    )
    response = request.execute()
    comments_list.extend(process_comments(response['items']))

    # if there is nextPageToken, then keep calling the API
    while response.get('nextPageToken', None):
        request = youtube.commentThreads().list(
            part='id,replies,snippet',
            videoId=videoID,
            pageToken=response['nextPageToken']
        )
        response = request.execute()
        comments_list.extend(process_comments(response['items']))

    comments_list = list(unique_everseen(comments_list))

    print(f"Finished fetching comments for {videoID}. {len(comments_list)} comments found.")
    
    if to_csv:
#         make_csv(comments_list, videoID)
        make_csv(comments_list)
    return comments_list

In [22]:
video_ids = [
    "HsGa76se5Ns",
    "saJ7uiVwtEI",
    "QsW-ygDmhK8",
    "L_AVwEDNr70",
    "XTVOPH8AHEM",
    "LJ2U78nOi7c",
    "ZmblPQ1XR7k",
    "A6-yGHgrst4",
    "KfyYF5_pDzY",
    "xKSwVOzsOFk",
    "EglsC8Xix8s",
    "s8vtfwT2GW8",
    "txR94UXx-5U"
]

In [23]:
%%time
if __name__ == '__main__':
    # get comments
#     response = comment_threads(videoID='7Kt6ouYqacQ', to_csv=True)
#     print(response)
    for video_id in tqdm(video_ids, desc='Processing videos'):
        response = comment_threads(videoID=video_id, to_csv=True)

Processing videos:   8%|████▉                                                           | 1/13 [00:00<00:09,  1.26it/s]

Finished fetching comments for HsGa76se5Ns. 43 comments found.


Processing videos:  15%|█████████▊                                                      | 2/13 [00:01<00:10,  1.06it/s]

Finished fetching comments for saJ7uiVwtEI. 97 comments found.


Processing videos:  23%|██████████████▊                                                 | 3/13 [00:02<00:06,  1.59it/s]

Finished fetching comments for QsW-ygDmhK8. 102 comments found.


Processing videos:  31%|███████████████████▋                                            | 4/13 [00:02<00:04,  2.13it/s]

Finished fetching comments for L_AVwEDNr70. 103 comments found.


Processing videos:  38%|████████████████████████▌                                       | 5/13 [00:03<00:04,  1.60it/s]

Finished fetching comments for XTVOPH8AHEM. 156 comments found.


Processing videos:  46%|█████████████████████████████▌                                  | 6/13 [00:04<00:06,  1.16it/s]

Finished fetching comments for LJ2U78nOi7c. 247 comments found.


Processing videos:  54%|██████████████████████████████████▍                             | 7/13 [00:43<01:19, 13.28s/it]

Finished fetching comments for ZmblPQ1XR7k. 2184 comments found.
Finished fetching comments for A6-yGHgrst4. 2197 comments found.


Processing videos:  69%|████████████████████████████████████████████▎                   | 9/13 [00:46<00:28,  7.21s/it]

Finished fetching comments for KfyYF5_pDzY. 2249 comments found.


Processing videos:  77%|████████████████████████████████████████████████▍              | 10/13 [00:48<00:16,  5.40s/it]

Finished fetching comments for xKSwVOzsOFk. 2280 comments found.


Processing videos:  85%|█████████████████████████████████████████████████████▎         | 11/13 [01:03<00:16,  8.36s/it]

Finished fetching comments for EglsC8Xix8s. 2742 comments found.


Processing videos:  92%|██████████████████████████████████████████████████████████▏    | 12/13 [01:52<00:20, 20.81s/it]

Finished fetching comments for s8vtfwT2GW8. 3836 comments found.


Processing videos: 100%|███████████████████████████████████████████████████████████████| 13/13 [02:13<00:00, 10.29s/it]

Finished fetching comments for txR94UXx-5U. 4206 comments found.
CPU times: total: 1min 7s
Wall time: 2min 13s





In [24]:
'''
import os
from dotenv import load_dotenv
from googleapiclient.discovery import build
from iteration_utilities import unique_everseen
import csv
comments = []
def make_csv(comments, channelID=None):
    header = comments[0].keys()

    if channelID:
#         filename = f'comments_{channelID}_{today}.csv'
        filename = f'comments.csv'
    else:
#         filename = f'comments_{today}.csv'
        filename = f'comments.csv'

    with open(filename, 'w', encoding='utf8', newline='') as f:
        writer = csv.DictWriter(f, fieldnames=header, extrasaction='ignore')
        writer.writeheader()
        writer.writerows(comments)
def comment_threads(videoID, to_csv=False):
    
    comments_list = []
    
    request = youtube.commentThreads().list(
        part='id,replies,snippet',
        videoId=videoID,
    )
    response = request.execute()
    comments_list.extend(process_comments(response['items']))

    # if there is nextPageToken, then keep calling the API
    while response.get('nextPageToken', None):
        request = youtube.commentThreads().list(
            part='id,replies,snippet',
            videoId=videoID,
            pageToken=response['nextPageToken']
        )
        response = request.execute()
        comments_list.extend(process_comments(response['items']))

    comments_list = list(unique_everseen(comments_list))

    print(f"Finished fetching comments for {videoID}. {len(comments_list)} comments found.")
    
    if to_csv:
#         make_csv(comments_list, videoID)
        make_csv(comments_list)
    return comments_list
if __name__ == '__main__':
    # get comments
    response = comment_threads(videoID='G4EVRUFttG0', to_csv=True)
    print(response)

'''

'\nimport os\nfrom dotenv import load_dotenv\nfrom googleapiclient.discovery import build\nfrom iteration_utilities import unique_everseen\nimport csv\ncomments = []\ndef make_csv(comments, channelID=None):\n    header = comments[0].keys()\n\n    if channelID:\n#         filename = f\'comments_{channelID}_{today}.csv\'\n        filename = f\'comments.csv\'\n    else:\n#         filename = f\'comments_{today}.csv\'\n        filename = f\'comments.csv\'\n\n    with open(filename, \'w\', encoding=\'utf8\', newline=\'\') as f:\n        writer = csv.DictWriter(f, fieldnames=header, extrasaction=\'ignore\')\n        writer.writeheader()\n        writer.writerows(comments)\ndef comment_threads(videoID, to_csv=False):\n    \n    comments_list = []\n    \n    request = youtube.commentThreads().list(\n        part=\'id,replies,snippet\',\n        videoId=videoID,\n    )\n    response = request.execute()\n    comments_list.extend(process_comments(response[\'items\']))\n\n    # if there is nextPag