In [14]:
# import sys
# !{sys.executable} -m pip install python-dotenv
# !{sys.executable} -m pip install google-api-python-client

In [15]:
# !pip install iteration_utilities

In [1]:
import os
from dotenv import load_dotenv
from googleapiclient.discovery import build
import re
from tqdm import tqdm
from iteration_utilities import unique_everseen
import csv

In [2]:
load_dotenv()
api_key = os.getenv("API_KEY")

In [3]:
# create a service object for interacting with the YouTube Data API
youtube = build("youtube", "v3", developerKey=api_key, cache_discovery=False)

In [4]:
# TEST
# link = "https://www.youtube.com/watch?v=36N1Bz7qW0A"
# request = youtube.commentThreads().list(part='id,replies,snippet', videoId=extract_video_id(link))
# response = request.execute()
# print(response)

In [13]:
def extract_video_id(url):
    video_id = None
    if "youtu.be" in url:
        video_id = url.split("/")[-1]
    else:
        query_string = url.split("?")[-1]
        parameters = query_string.split("&")
        for param in parameters:
            if param.startswith("v="):
                video_id = param[2:]
                break

    return video_id

In [6]:
comments = []

In [7]:
def make_csv(comments, channelID=None):
    header = comments[0].keys()

    if channelID:
#         filename = f'comments_{channelID}_{today}.csv'
        filename = f'comments.csv'
    else:
#         filename = f'comments_{today}.csv'
        filename = f'comments.csv'

    with open(filename, 'a', encoding='utf8', newline='') as f:
        writer = csv.DictWriter(f, fieldnames=header, extrasaction='ignore')
        writer.writeheader()
        writer.writerows(comments)

In [8]:
def process_comments(response_items, csv_output=False):

    for res in response_items:

        # loop through the replies
        if 'replies' in res.keys():
            for reply in res['replies']['comments']:
                comment = reply['snippet']
                comment['commentId'] = reply['id']
                comments.append(comment)
        else:
            comment = {}
            comment['snippet'] = res['snippet']['topLevelComment']['snippet']
            comment['snippet']['parentId'] = None
            comment['snippet']['commentId'] = res['snippet']['topLevelComment']['id']

            comments.append(comment['snippet'])

    if csv_output:
         make_csv(comments)
    
    print(f'Finished processing {len(comments)} comments.')
    return comments

In [9]:
def comment_threads(videoID, to_csv=False):
    
    comments_list = []
    
    request = youtube.commentThreads().list(
        part='id,replies,snippet',
        videoId=videoID,
    )
    response = request.execute()
    comments_list.extend(process_comments(response['items']))

    # if there is nextPageToken, then keep calling the API
    while response.get('nextPageToken', None):
        request = youtube.commentThreads().list(
            part='id,replies,snippet',
            videoId=videoID,
            pageToken=response['nextPageToken']
        )
        response = request.execute()
        comments_list.extend(process_comments(response['items']))

    comments_list = list(unique_everseen(comments_list))

    print(f"Finished fetching comments for {videoID}. {len(comments_list)} comments found.")
    
    if to_csv:
#         make_csv(comments_list, videoID)
        make_csv(comments_list)
    return comments_list

In [17]:
video_ids = [
    "9g3692xrYog",
    "-L-xfr3pyMM",
    "tC0TWLQwxlg",
    "XAD-xUbncew",
    "xDCtlAGjA0c",
    "YFpYNRX67hk",
    "Cp48NX8clAA",
    "ThvNwfGwaOE",
    "S3WE134o-mo",
    "ikTln_LOADg",
    "d5FKIh21kVQ",
    "fOoMDPcy-ao",
    "AfS_X2bfAJg",
    "YVxSNRhVypA",
    "ttDpr-6_GBI",
    "ur4uq7_ewUQ",
    "5gay09DQdQI",
    "f-RaA60FUrw",
    "TwqmICVZ7xQ",
    "WrsZJxpZcH0",
    "2yTT_gsiXL0",
    "_ShclHTKJoY"
]

In [18]:
%%time
if __name__ == '__main__':
    # get comments
#     response = comment_threads(videoID='7Kt6ouYqacQ', to_csv=True)
#     print(response)
    for video_id in tqdm(video_ids, desc='Processing videos'):
        response = comment_threads(videoID=video_id, to_csv=True)

Processing videos:   0%|                                                                        | 0/22 [00:00<?, ?it/s]

Finished processing 20 comments.
Finished processing 40 comments.
Finished processing 60 comments.
Finished processing 81 comments.


Processing videos:   5%|██▉                                                             | 1/22 [00:01<00:37,  1.77s/it]

Finished processing 97 comments.
Finished fetching comments for 9g3692xrYog. 97 comments found.
Finished processing 117 comments.
Finished processing 140 comments.


Processing videos:   9%|█████▊                                                          | 2/22 [00:02<00:23,  1.17s/it]

Finished processing 150 comments.
Finished fetching comments for -L-xfr3pyMM. 150 comments found.
Finished processing 170 comments.
Finished processing 190 comments.
Finished processing 210 comments.
Finished processing 230 comments.
Finished processing 251 comments.
Finished processing 275 comments.


Processing videos:  14%|████████▋                                                       | 3/22 [00:04<00:27,  1.44s/it]

Finished processing 284 comments.
Finished fetching comments for tC0TWLQwxlg. 284 comments found.
Finished processing 304 comments.
Finished processing 324 comments.
Finished processing 344 comments.
Finished processing 365 comments.
Finished processing 385 comments.
Finished processing 405 comments.
Finished processing 426 comments.


Processing videos:  18%|███████████▋                                                    | 4/22 [00:06<00:30,  1.67s/it]

Finished processing 431 comments.
Finished fetching comments for XAD-xUbncew. 431 comments found.
Finished processing 451 comments.
Finished processing 471 comments.
Finished processing 491 comments.
Finished processing 511 comments.
Finished processing 531 comments.
Finished processing 552 comments.
Finished processing 573 comments.
Finished processing 593 comments.
Finished processing 614 comments.


Processing videos:  23%|██████████████▌                                                 | 5/22 [00:09<00:34,  2.04s/it]

Finished processing 625 comments.
Finished fetching comments for xDCtlAGjA0c. 625 comments found.


Processing videos:  27%|█████████████████▍                                              | 6/22 [00:09<00:23,  1.44s/it]

Finished processing 633 comments.
Finished fetching comments for YFpYNRX67hk. 633 comments found.
Finished processing 653 comments.
Finished processing 661 comments.


Processing videos:  32%|████████████████████▎                                           | 7/22 [00:09<00:17,  1.16s/it]

Finished fetching comments for Cp48NX8clAA. 661 comments found.
Finished processing 681 comments.
Finished processing 687 comments.


Processing videos:  36%|███████████████████████▎                                        | 8/22 [00:10<00:13,  1.06it/s]

Finished fetching comments for ThvNwfGwaOE. 687 comments found.


Processing videos:  41%|██████████████████████████▏                                     | 9/22 [00:10<00:09,  1.33it/s]

Finished processing 704 comments.
Finished fetching comments for S3WE134o-mo. 704 comments found.


Processing videos:  45%|████████████████████████████▋                                  | 10/22 [00:11<00:07,  1.51it/s]

Finished processing 720 comments.
Finished fetching comments for ikTln_LOADg. 720 comments found.
Finished processing 740 comments.


Processing videos:  50%|███████████████████████████████▌                               | 11/22 [00:11<00:07,  1.52it/s]

Finished processing 746 comments.
Finished fetching comments for d5FKIh21kVQ. 746 comments found.
Finished processing 766 comments.
Finished processing 786 comments.
Finished processing 806 comments.
Finished processing 811 comments.


Processing videos:  55%|██████████████████████████████████▎                            | 12/22 [00:12<00:08,  1.21it/s]

Finished fetching comments for fOoMDPcy-ao. 811 comments found.
Finished processing 831 comments.
Finished processing 851 comments.
Finished processing 853 comments.


Processing videos:  59%|█████████████████████████████████████▏                         | 13/22 [00:13<00:07,  1.21it/s]

Finished fetching comments for AfS_X2bfAJg. 853 comments found.
Finished processing 873 comments.
Finished processing 891 comments.


Processing videos:  64%|████████████████████████████████████████                       | 14/22 [00:14<00:05,  1.34it/s]

Finished fetching comments for YVxSNRhVypA. 891 comments found.


Processing videos:  68%|██████████████████████████████████████████▉                    | 15/22 [00:14<00:04,  1.58it/s]

Finished processing 892 comments.
Finished fetching comments for ttDpr-6_GBI. 892 comments found.


Processing videos:  73%|█████████████████████████████████████████████▊                 | 16/22 [00:15<00:03,  1.86it/s]

Finished processing 912 comments.
Finished fetching comments for ur4uq7_ewUQ. 912 comments found.


Processing videos:  77%|████████████████████████████████████████████████▋              | 17/22 [00:15<00:02,  2.13it/s]

Finished processing 927 comments.
Finished fetching comments for 5gay09DQdQI. 927 comments found.
Finished processing 947 comments.


Processing videos:  82%|███████████████████████████████████████████████████▌           | 18/22 [00:15<00:02,  1.99it/s]

Finished processing 964 comments.
Finished fetching comments for f-RaA60FUrw. 964 comments found.
Finished processing 984 comments.
Finished processing 1004 comments.
Finished processing 1024 comments.
Finished processing 1044 comments.
Finished processing 1064 comments.
Finished processing 1084 comments.
Finished processing 1105 comments.
Finished processing 1129 comments.
Finished processing 1151 comments.
Finished processing 1181 comments.
Finished processing 1209 comments.
Finished processing 1240 comments.
Finished processing 1260 comments.
Finished processing 1298 comments.
Finished processing 1334 comments.


Processing videos:  86%|██████████████████████████████████████████████████████▍        | 19/22 [00:21<00:05,  1.94s/it]

Finished processing 1345 comments.
Finished fetching comments for TwqmICVZ7xQ. 1345 comments found.
Finished processing 1365 comments.
Finished processing 1385 comments.
Finished processing 1408 comments.


Processing videos:  91%|█████████████████████████████████████████████████████████▎     | 20/22 [00:22<00:03,  1.77s/it]

Finished processing 1417 comments.
Finished fetching comments for WrsZJxpZcH0. 1417 comments found.
Finished processing 1437 comments.
Finished processing 1457 comments.
Finished processing 1477 comments.
Finished processing 1500 comments.
Finished processing 1529 comments.
Finished processing 1549 comments.
Finished processing 1581 comments.
Finished processing 1620 comments.
Finished processing 1629 comments.


Processing videos:  95%|████████████████████████████████████████████████████████████▏  | 21/22 [00:26<00:02,  2.43s/it]

Finished fetching comments for 2yTT_gsiXL0. 1629 comments found.
Finished processing 1649 comments.
Finished processing 1669 comments.
Finished processing 1689 comments.
Finished processing 1709 comments.
Finished processing 1729 comments.
Finished processing 1749 comments.
Finished processing 1769 comments.
Finished processing 1790 comments.
Finished processing 1810 comments.
Finished processing 1836 comments.


Processing videos: 100%|███████████████████████████████████████████████████████████████| 22/22 [00:31<00:00,  1.45s/it]

Finished processing 1852 comments.
Finished fetching comments for _ShclHTKJoY. 1852 comments found.
CPU times: total: 8.33 s
Wall time: 32 s





In [14]:
def extract_shorts_id(url):
    if 'youtube.com/shorts/' in url:
        return url.split('youtube.com/shorts/')[1].strip('/')
    else:
        return None

In [11]:
# smriti irani
links = [
    'https://www.youtube.com/watch?v=9g3692xrYog',
    'https://www.youtube.com/watch?v=-L-xfr3pyMM', 
    'https://www.youtube.com/watch?v=tC0TWLQwxlg',
    'https://www.youtube.com/watch?v=XAD-xUbncew',
    'https://www.youtube.com/watch?v=xDCtlAGjA0c'
]
short_links = [
    "https://www.youtube.com/shorts/YFpYNRX67hk",
    "https://www.youtube.com/shorts/Cp48NX8clAA",
    "https://www.youtube.com/shorts/ThvNwfGwaOE",
    "https://www.youtube.com/shorts/S3WE134o-mo",
    "https://www.youtube.com/shorts/ikTln_LOADg",
    "https://www.youtube.com/shorts/d5FKIh21kVQ",
    "https://www.youtube.com/shorts/fOoMDPcy-ao",
    "https://www.youtube.com/shorts/AfS_X2bfAJg",
    "https://www.youtube.com/shorts/YVxSNRhVypA",
    "https://www.youtube.com/shorts/ttDpr-6_GBI",
    "https://www.youtube.com/shorts/ur4uq7_ewUQ",
    "https://www.youtube.com/shorts/5gay09DQdQI",
    "https://www.youtube.com/shorts/f-RaA60FUrw",
    "https://www.youtube.com/shorts/TwqmICVZ7xQ",
    "https://www.youtube.com/shorts/WrsZJxpZcH0",
    "https://www.youtube.com/shorts/2yTT_gsiXL0",
    "https://www.youtube.com/shorts/_ShclHTKJoY"
]

In [15]:
for l in short_links:
#     link = extract_video_id(l)
    link = extract_shorts_id(l)
    print(link)

YFpYNRX67hk
Cp48NX8clAA
ThvNwfGwaOE
S3WE134o-mo
ikTln_LOADg
d5FKIh21kVQ
fOoMDPcy-ao
AfS_X2bfAJg
YVxSNRhVypA
ttDpr-6_GBI
ur4uq7_ewUQ
5gay09DQdQI
f-RaA60FUrw
TwqmICVZ7xQ
WrsZJxpZcH0
2yTT_gsiXL0
_ShclHTKJoY
