In [3]:
# 1. Install the downloader tool (Run this once)
!pip install yt-dlp

import csv
from yt_dlp import YoutubeDL
import pandas as pd

# --- CONFIGURATION ---
# Paste the YouTube URL of the video you want to analyze here
video_url = "https://www.youtube.com/watch?v=dlHu-gAyglw"

# --- THE DOWNLOADER SCRIPT ---
def download_youtube_comments(url):
    print(f"Starting download for: {url}")

    # Options tailored for extracting text only (no video/audio)
    opts = {
        "getcomments": True,
        "skip_download": True,      # Don't download the video file
        "quiet": True,              # Less clutter in output
        "no_warnings": True,
        "ignore_no_formats_error": True,
        "extractor_args": {
            "youtube": {
                "max_comments": ["all"],       # Download ALL comments (or change to ["500"] for speed)
                "player_client": ["web_safari"] # Mimic a browser to avoid blocking
            }
        },
    }

    comments_data = []

    with YoutubeDL(opts) as yt:
        try:
            info = yt.extract_info(url, download=False)
            raw_comments = info.get("comments") or []

            print(f"Found {len(raw_comments)} comments. Processing...")

            # Extract only the data needed for your Data Science Project
            for c in raw_comments:
                comments_data.append({
                    "author": c.get("author"),
                    "text": c.get("text"),           # This is your main input for NLP
                    "timestamp": c.get("timestamp"), # Essential for Demand Forecasting (Time Series)
                    "likes": c.get("like_count")     # Proxy for Sentiment/Agreement
                })

        except Exception as e:
            print(f"Error: {e}")

    # --- SAVE TO CSV ---
    if comments_data:
        filename = "youtube_comments.csv"
        # Create a DataFrame for clean formatting
        df = pd.DataFrame(comments_data)

        # Save to CSV (utf-8-sig handles Emojis correctly)
        df.to_csv(filename, index=False, encoding="utf-8-sig")

        print(f"\nSUCCESS! Downloaded {len(df)} comments.")
        print(f"File saved as: {filename}")
        print("First 5 rows:")
        print(df.head())
    else:
        print("No comments found or video is private/restricted.")

# Run the function
download_youtube_comments(video_url)

Starting download for: https://www.youtube.com/watch?v=dlHu-gAyglw
Found 1 comments. Processing...

SUCCESS! Downloaded 1 comments.
File saved as: youtube_comments.csv
First 5 rows:
              author                                               text  \
0  @YuttaSihingGusti  thanks for your informatin. its work for me. g...   

    timestamp  likes  
0  1769904000      0  
