In [7]:
api_key = "ADD_YOUR_YOUTUBE_API_KEY_HERE"

In [5]:
import re

def extract_video_id(url):
    """
    Extracts the video ID from a YouTube URL.

    Parameters:
    - url (str): The YouTube URL.

    Returns:
    - str: The extracted video ID or None if the URL is invalid.
    """
    # Regular expression for extracting the video ID from various YouTube URL formats
    regex_patterns = [
        r'(?:https?://)?(?:www\.)?youtube\.com/watch\?v=([a-zA-Z0-9_-]{11})', # Standard format
        r'(?:https?://)?youtu\.be/([a-zA-Z0-9_-]{11})', # Shortened URL
        r'(?:https?://)?(?:www\.)?youtube\.com/embed/([a-zA-Z0-9_-]{11})', # Embed format
        r'(?:https?://)?(?:www\.)?youtube\.com/v/([a-zA-Z0-9_-]{11})', # Another variation of embed format
        r'(?:https?://)?(?:www\.)?youtube\.com/watch\?.*v=([a-zA-Z0-9_-]{11})', # URL with additional parameters after video ID
    ]
    
    for pattern in regex_patterns:
        match = re.search(pattern, url)
        if match:
            return match.group(1)
    
    return None

In [6]:
from googleapiclient.discovery import build
from googleapiclient.errors import HttpError
import json

# Replace 'YOUR_API_KEY' with your actual API key
DEVELOPER_KEY = api_key
YOUTUBE_API_SERVICE_NAME = 'youtube'
YOUTUBE_API_VERSION = 'v3'

def get_video_comments(service, page_limit=20, **kwargs):
    comments = []
    results = service.commentThreads().list(**kwargs).execute()
    
    page_count = 0

    while results and page_count < page_limit:
        for item in results['items']:
            comment = item['snippet']['topLevelComment']['snippet']['textDisplay']
            author = item['snippet']['topLevelComment']['snippet']['authorDisplayName']
            # Optionally, you can also fetch the author's channel ID
            # author_channel_id = item['snippet']['topLevelComment']['snippet']['authorChannelId']['value']
            comments.append((author, comment))

        # Check if there are any more pages of comments
        if 'nextPageToken' in results:
            kwargs['pageToken'] = results['nextPageToken']
            results = service.commentThreads().list(**kwargs).execute()
            page_count += 1
        else:
            break

    return comments

def youtube_build():
    return build(YOUTUBE_API_SERVICE_NAME, YOUTUBE_API_VERSION,
                 developerKey=DEVELOPER_KEY)

def main():
    
    names_to_urls = {
        "lex-interview-tucker": "https://www.youtube.com/watch?v=f_lRdkH_QoY",
        "tucker-interview-putin": "https://www.youtube.com/watch?v=fOCWBhuDdDo",
        "nawalny-60-minutes": "https://www.youtube.com/watch?v=O6zdi6Q6p7o",
        "putin-dw-news": "https://www.youtube.com/watch?v=iNnA-CJh_e8",
        "cnn-ukraine": "https://www.youtube.com/watch?v=ndod_uYWBVg",
        "cnn-ukraine2": "https://www.youtube.com/watch?v=nTuRqpyMu_4",
        
        "putin-fox-reelection": "https://www.youtube.com/watch?v=asMTuoCmSsA",
        "putin-austrian-ministier": "https://www.youtube.com/watch?v=1J1PYVDf9z4",
        "putin-challenges-west": "https://www.youtube.com/watch?v=jhJAcA32Q8k",
        "putin-interview-nbc": "https://www.youtube.com/watch?v=m6pJd6O_NT0",
        "trump-putin-documentary-part-1": "https://www.youtube.com/watch?v=455vMrEm958",
        "trump-putin-documentary-part-2": "https://www.youtube.com/watch?v=300xF8SxWyM"
    }
    
    for name, url in names_to_urls.items():
        video_id = extract_video_id(url)
        youtube = youtube_build()

        try:
            comments = get_video_comments(youtube, part='snippet', videoId=video_id, textFormat='plainText')

            with open(f"{name}.json", "w") as f:
                f.write(json.dumps(comments))
            
        except HttpError as e:
            print(f"An HTTP error {e.resp.status} occurred:\n{e.content}")

if __name__ == '__main__':
    main()