In [5]:
# For Fetching Comments 
from googleapiclient.discovery import build 
# For filtering comments 
import re 
# For filtering comments with just emojis 
import emoji
# Analyze the sentiments of the comment
#from vaderSentiment.vaderSentiment import SentimentIntensityAnalyzer
# For visualization 
import matplotlib.pyplot as plt
import csv
from datetime import datetime
from dotenv import load_dotenv
import os
import pandas as pd
from bertopic import BERTopic
import numpy

In [4]:
API_KEY = os.getenv('API_KEY') # Put in your API Key
 
youtube = build('youtube', 'v3', developerKey=API_KEY)


In [5]:

def get_video_details(video_id):
    try:
        video_response = youtube.videos().list(
            part='snippet',
            id=video_id
        ).execute()
        
        if 'items' in video_response and video_response['items']:
            return video_response['items'][0]['snippet']
        else:
            print(f"Error: No video found with ID {video_id}")
            return None
    except HttpError as e:
        print(f"An HTTP error occurred for video ID {video_id}: {str(e)}")
        return None

def get_video_comments(video_id, max_results=100):
    comments = []
    try:
        request = youtube.commentThreads().list(
            part="snippet",
            videoId=video_id,
            maxResults=max_results
        )
        response = request.execute()

        for item in response['items']:
            comment = item['snippet']['topLevelComment']['snippet']
            comments.append({
                'video_id': video_id,
                'text': comment['textDisplay'],
                'likeCount': comment['likeCount'],
                'publishedAt': comment['publishedAt']
            })

        return comments
    except HttpError as e:
        print(f"An HTTP error occurred for video ID {video_id}: {str(e)}")
        return []

def process_videos_from_csv(input_file, output_file):
    all_comments = []

    with open(input_file, 'r', encoding='utf-8') as csvfile:
        reader = csv.DictReader(csvfile)
        for row in reader:
            video_id = row['video_url'].split('v=')[-1].strip()
            video_details = get_video_details(video_id)
            
            if video_details:
                print(f"Fetching comments for video: {video_details['title']}")
                comments = get_video_comments(video_id)
                for comment in comments:
                    comment['video_title'] = video_details['title']
                all_comments.extend(comments)
                print(f"Fetched {len(comments)} comments")
            else:
                print(f"Could not fetch details for video ID: {video_id}")

    if all_comments:
        with open(output_file, 'w', newline='', encoding='utf-8') as csvfile:
            fieldnames = ['video_id', 'video_title', 'text', 'likeCount', 'publishedAt']
            writer = csv.DictWriter(csvfile, fieldnames=fieldnames)
            
            writer.writeheader()
            for comment in all_comments:
                writer.writerow(comment)
        
        print(f"Processed comments for {len(set([c['video_id'] for c in all_comments]))} videos. Results saved to {output_file}")
    else:
        print("No comments were fetched.")

# Usage
input_file = 'inp_files/Election 2024 YT Videos - Sheet1.csv'  # Replace with your input CSV file name
output_file = 'inp_files/output_video_comments.csv'  # Replace with your desired output file name

process_videos_from_csv(input_file, output_file)

Error: No video found with ID Alljw46hEwc
Could not fetch details for video ID: Alljw46hEwc
Error: No video found with ID U_-TvkO6bwU
Could not fetch details for video ID: U_-TvkO6bwU
Fetching comments for video: Full Debate: Harris vs. Trump in 2024 ABC News Presidential Debate | WSJ
Fetched 100 comments
Error: No video found with ID OQgE0ETV81s&t=28s
Could not fetch details for video ID: OQgE0ETV81s&t=28s
Fetching comments for video: LIVE: Donald Trump speaks after presidential debate in Arizona
Fetched 100 comments
Fetching comments for video: US Presidential Debate 2024: Top Highlights | Kamala Harris Vs Donald Trump | Best 9 Minutes
Fetched 100 comments
Fetching comments for video: Harris v Trump: highlights of the US presidential election debate
Fetched 100 comments
Fetching comments for video: FULL VIDEO | 2024 Presidential Debate from ABC News
Fetched 100 comments
Fetching comments for video: Donald Trump says no more Kamala Harris debates | BBC News
Fetched 100 comments
Fetchi

In [6]:
df = pd.read_csv(output_file)
df

Unnamed: 0,video_id,video_title,text,likeCount,publishedAt
0,VgsC_aBquUE,Full Debate: Harris vs. Trump in 2024 ABC News...,"Read analysis from the debate here: <a href=""h...",1004,2024-09-11T14:03:41Z
1,VgsC_aBquUE,Full Debate: Harris vs. Trump in 2024 ABC News...,Vote for kamala!!!!,0,2024-09-27T02:05:43Z
2,VgsC_aBquUE,Full Debate: Harris vs. Trump in 2024 ABC News...,"&#39;Excuse me, excuse me, I have to respond. ...",0,2024-09-27T02:03:12Z
3,VgsC_aBquUE,Full Debate: Harris vs. Trump in 2024 ABC News...,He hates hel. Lol,0,2024-09-27T01:40:49Z
4,VgsC_aBquUE,Full Debate: Harris vs. Trump in 2024 ABC News...,Has anyone seen spot? He&#39;s missing. My nei...,0,2024-09-27T01:32:47Z
...,...,...,...,...,...
1901,wh2txoPfUS4,2024 presidential race: Polls show tight race ...,"Well folks, polls are not factual, nor are the...",0,2024-09-23T21:29:19Z
1902,wh2txoPfUS4,2024 presidential race: Polls show tight race ...,Trump might win Texas but he won&#39;t win the...,0,2024-09-23T21:28:59Z
1903,wh2txoPfUS4,2024 presidential race: Polls show tight race ...,This is apparently the only pole that shows tr...,3,2024-09-23T21:17:20Z
1904,wh2txoPfUS4,2024 presidential race: Polls show tight race ...,"Only a fool would support Donald Trump,<br><br...",1,2024-09-23T21:02:45Z


In [7]:
df['text'][25]

'I love jubilee'

Adding the topic model **experiments below:**

In [8]:
comments = df['text'][300:1000]
len(comments)

700

In [9]:
topic_model = BERTopic(embedding_model="all-MiniLM-L6-v2")

In [10]:
topics, probs = topic_model.fit_transform(comments)

OMP: Info #276: omp_set_nested routine deprecated, please use omp_set_max_active_levels instead.


: 