Following the tutorial from https://scrapingking.medium.com/scrape-youtube-comments-using-python-and-selenium-43a7b39d80c3


In [1]:
import os
from pathlib import Path
import sys
import time
from typing import Optional, Literal, Final, List

from bs4 import BeautifulSoup
from selenium import webdriver
from selenium.webdriver.remote.webdriver import WebDriver
from selenium.webdriver.remote.webelement import WebElement
from selenium.webdriver.common.by import By

In [2]:
vid_url: Final[str] = u"https://www.youtube.com/watch?v=ilwMM-CEO6w"

In [3]:
def get_chrome_driver(
    driver_exe: Optional[os.PathLike] = None,
) -> Optional[WebDriver]:
    """Get the Chrome driver executable for Selenium. The driver is
    searched in the environment variable 'PATH'

    Args:
        driver_exe: Path of executable (default="chromedriver")

    Returns:
        Chrome webdriver if executable found, else None
    """
    driver: WebDriver

    if driver_exe is None:
        driver_exe = "chromedriver"

    if Path(driver_exe).exists():
        return webdriver.Chrome(driver_exe)

    split_token: Literal[";", ":"] = ":"
    if "win" in sys.platform:
        split_token = ";"
        driver_exe += ".exe"
    chrome_driver_path: Optional[Path] = None

    for file_path in os.environ.get("PATH").split(split_token):
        temp_file = Path(file_path) / driver_exe
        if temp_file.exists():
            chrome_driver_path = temp_file
            break

    if chrome_driver_path is not None and chrome_driver_path.exists():
        driver = webdriver.Chrome(chrome_driver_path)
        return driver
    return None


def get_driver(
    driver_type: Literal["chrome"] = "chrome",
    driver_exe: Optional[os.PathLike] = None,
) -> Optional[WebDriver]:
    """Get the specified driver for Selenium

    Args:
        driver_type: Type of driver like 'chrome'
        driver_exe: PathLike executable

    Returns:
        Webdriver if executable found, else None
    """
    if driver_type == "chrome":
        return get_chrome_driver(driver_exe)
    return None


def get_yt_comments(
    vid_url: str,
    pages: int = 7,
    min_sleep_sec: float = 2.0
) -> List[str]:
    """Obtain Youtube comments as a list of unicode strings. Due to the nature of
    Youtube at this time, comments are loaded as the user scrolls down the page,
    so the number of page scrolls is an optional argument,

    Args:
        vid_url: URL of video on Youtube
        pages: Number comment pages to load
        min_sleep_sec: Wait time between comment page scrolling

    Returns:
         Comments list
    """
    comments: List[str]

    driver = get_driver()
    if driver is None:
        raise TypeError("Unable to get web driver!")
    driver.get(vid_url)
    driver.maximize_window()

    # Scroll to first comments page
    time.sleep(2 * min_sleep_sec)
    driver.execute_script("window.scrollTo(0, 1000);")

    for _ in range(pages - 1):
      time.sleep(min_sleep_sec)
      driver.execute_script("window.scrollTo(0, 10000);")

    comments_section: WebElement
    [comments_section] = driver.find_elements(
        by=By.XPATH,
        value='//*[@id="comments"]'
    )
    comments_html: Final[str] = comments_section.get_attribute("innerHTML")

    # parse the HTML content with BeautifulSoup
    soup: BeautifulSoup = BeautifulSoup(markup=comments_html, features="html.parser")
    comments = [
        comment.text
        for comment in soup.find_all(
            name="yt-formatted-string",
            attrs={"class": "style-scope ytd-comment-renderer"}
        )
    ]
    return comments


video_comments = get_yt_comments(vid_url)

  driver = webdriver.Chrome(chrome_driver_path)


In [4]:
video_comments

['This guy is the most at ease and calming man I have ever heard and has a great sense of humor. Best physics communicator on YouTube.',
 "I know I'm a couple years late but your commitment to nearly free, open access, high quality science education is truly commendable.",
 'Yay! Thank you SO MUCH for doing this - making time from home amidst the pandemic. It’s such a comfort in these crazy times. I’ve always loved your presenting style and ALL your videos, and this is like sitting down to a home cooked roast dinner!! Comforting and great for my endorphin levels. Well until we became vegetarians at least, but my Wife does a mean veggie lasagna . You are the best!!! Fermilab ROCKS!!!',
 'Even in these dark times, educators going to educate.',
 "These videos are way better than most other science videos I come across. PBS Spacetime is also great but they come at you a mile a minute. Don's pacing is perfect for me. I can't wait for the next video!",
 'I\'ve been watching your "in front of