In [1]:
from selenium import webdriver
from selenium.webdriver.common.by import By
from selenium.webdriver.chrome.options import Options
from selenium.common.exceptions import StaleElementReferenceException
import time

In [3]:
def scrape_youtube_comments(video_url, max_attempts=10, scroll_pause_time=2, scroll_increment=500):
    """
    Scrape comments from a YouTube video.
    
    Args:
        video_url: URL of the YouTube video
        max_attempts: Maximum number of scroll attempts with no new content
        scroll_pause_time: Time to pause between scrolls (seconds)
        scroll_increment: Pixels to scroll each time
        
    Returns:
        List of unique comments
    """
    # Set up Chrome options
    chrome_options = Options()
    chrome_options.add_argument("--disable-notifications")
    chrome_options.add_argument("--disable-infobars")
    
    # Initialize the driver
    driver = webdriver.Chrome(options=chrome_options)
    driver.maximize_window()
    
    try:
        # Load the video
        driver.get(video_url)
        time.sleep(3)
        
        # Initialize variables
        all_comments = []
        seen_comments = set()
        attempts = 0
        
        # Initial scroll to trigger comments section
        driver.execute_script("window.scrollTo(0, 700)")
        time.sleep(3)
        
        last_height = driver.execute_script("return document.documentElement.scrollHeight")
        
        # Main scrolling loop
        while attempts < max_attempts:
            # Scroll down gradually
            current_position = driver.execute_script("return window.pageYOffset")
            target_position = current_position + scroll_increment
            driver.execute_script(f"window.scrollTo(0, {target_position})")
            time.sleep(scroll_pause_time)
            
            # Extract comments with error handling
            try:
                comments = driver.find_elements(By.CLASS_NAME, "yt-core-attributed-string.yt-core-attributed-string--white-space-pre-wrap")
                
                # Process new comments
                for comment in comments:
                    try:
                        comment_text = comment.text
                        if comment_text and comment_text not in seen_comments:
                            seen_comments.add(comment_text)
                            all_comments.append(comment_text)
                            print(f"New comment added: {comment_text[:50]}..." if len(comment_text) > 50 else f"New comment added: {comment_text}")
                    except StaleElementReferenceException:
                        # Handle stale elements (page content changed)
                        continue
            except Exception as e:
                print(f"Error extracting comments: {str(e)}")
            
            # Check if we've reached bottom
            new_height = driver.execute_script("return document.documentElement.scrollHeight")
            if new_height == last_height:
                attempts += 1
                print(f"No new content detected ({attempts}/{max_attempts})")
            else:
                attempts = 0  # Reset counter if new content was loaded
                
            last_height = new_height
            
        print(f"\nFinished scrolling! Total unique comments collected: {len(all_comments)}")
        return all_comments
        
    finally:
        # Ensure browser is closed even if an exception occurs
        driver.quit()

# Text Use

In [15]:
# Example usage
if __name__ == "__main__":
    video_url = "https://www.youtube.com/watch?v=ZTiUoKSAHaU"
    
    # Store the returned comments in a variable
    collected_comments = scrape_youtube_comments(video_url)
    
    # Now you can access the comments using the 'collected_comments' variable
    
    # Print all collected comments
    print("\nAll Comments:")
    for i, comment in enumerate(collected_comments, 1):
        print(f"{i}. {comment}")
    
    # Save to a file if desired
    def save_comments_to_file(comments, filename="youtube_comments.txt"):
        with open(filename, "w", encoding="utf-8") as f:
            for i, comment in enumerate(comments, 1):
                f.write(f"Comment #{i}:\n{comment}\n\n")
        print(f"Comments saved to {filename}")
    
    # Save the comments to a file
    save_comments_to_file(collected_comments)

New comment added: You saw how I condensed 1000+ hours of System Desi...
New comment added: While many are charging ₹1.6 lakhs for a similar r...
New comment added: Notes - 

1st Month - Programming Language + Compl...
New comment added: 1:40 -1st Month - Programming Language + Complexit...
New comment added: Big request! Please make a roadmap for engineering...
New comment added: trust me if you have never been topper giy you can...
New comment added: Great content 
New comment added: Thanks For this
New comment added: Well structured video.
New comment added: Thank you mam
New comment added: Now i have to crack Google just to See you daily.
New comment added: Thank you 
New comment added: Thanks for providing such a valuable session from ...
New comment added: But i believe your past experience got you into bi...
New comment added: Di can you guide on job hunt and how to reach recr...
New comment added: Great
New comment added: Useful content, By the way you're looking gorgeous...
Ne

# CSV Use

In [None]:
def save_comments_to_csv(comments, filename="youtube_comments.csv"):
    """
    Save comments to a CSV file.
    
    Args:
        comments: List of comments to save
        filename: Name of the CSV file
    """
    with open(filename, "w", encoding="utf-8", newline="") as f:
        writer = csv.writer(f)
        writer.writerow(["Comment Number", "Comment Text"])
        for i, comment in enumerate(comments, 1):
            writer.writerow([i, comment])
    print(f"Comments saved to {filename}")

# Example usage
if __name__ == "__main__":
    video_url = "https://www.youtube.com/watch?v=4A_kmjtsJ7c"
    
    # Scrape comments
    collected_comments = scrape_youtube_comments(video_url)
    
    # Print all collected comments
    print("\nAll Comments:")
    for i, comment in enumerate(collected_comments, 1):
        print(f"{i}. {comment}")
    
    # Save comments to CSV
    save_comments_to_csv(collected_comments)