In [67]:
from pytube import YouTube, Channel
import whisper
from moviepy.editor import AudioFileClip
import os
from googleapiclient.discovery import build
from datetime import datetime, timedelta, timezone
import pandas as pd
import pytz
import numpy as np
import re

#### Create Data directories

In [2]:
def create_dirs(path):
    directory_path = path

    # Check if the directory exists
    if not os.path.exists(directory_path):
        # If it doesn't exist, create it
        os.makedirs(directory_path)
        print("Directory created:", directory_path)
    else:
        print("Directory already exists:", directory_path)

In [3]:
create_dirs('data')
create_dirs(os.path.join('data','audio'))
create_dirs(os.path.join('data','text'))

Directory already exists: data
Directory created: data\audio
Directory already exists: data\text


#### Generate Video URLs

In [329]:
def generate_video_urls(channel_name):

    import requests
    # Set your API key
    # API_KEY = "AIzaSyBTuh0ypeNqiDVhNIvn5ofhuIvx8qmt-r4" aryama.s1008@gmail.com
    API_KEY = "AIzaSyATIBuk9YgT0xznkIsLVGnsZTknMrVGrgE" #other email

    # Create a YouTube API service
    youtube = build('youtube', 'v3', developerKey=API_KEY)

    # Set the YouTube channel ID
    search_channel_name = channel_name
    channel_id= requests.get(f'https://www.googleapis.com/youtube/v3/search?part=id&q={search_channel_name}&type=channel&key={API_KEY}').json()['items'][0]['id']['channelId']
    print(f"Channel ID for {search_channel_name}: {channel_id}")

    # Number of videos to fetch
    NUM_VIDEOS = 50

    # Fetch the latest videos for the channel
    request = youtube.search().list(
        part='snippet',
        
        channelId= channel_id,
        order='date',

        maxResults=NUM_VIDEOS,
        type='video',
        fields="items(id,snippet(publishedAt,channelId,channelTitle,title))"
    )
    response = request.execute()

    # we can also take the time right now and then fetch the videos released 24 hrs prior
    # tz='US/Eastern'
    # a= datetime.now()
    # a= a.astimezone(timezone(tz))

    # take the latest date when the video was released and fetch the videos released 24 hrs prior
    tz=pytz.timezone('US/Eastern')
    latest_date= datetime.strptime(response['items'][0]['snippet']['publishedAt'], "%Y-%m-%dT%H:%M:%SZ")
    latest_date= latest_date.astimezone(tz) #converting datetime to EST timezone

    video_urls = []
    selected_response = []
    video_ids = []
    for item in response['items']:
        t= datetime.strptime(item['snippet']['publishedAt'], "%Y-%m-%dT%H:%M:%SZ")
        t= t.astimezone(tz)

        # if latest_date - t <= timedelta(days= 2):
        if (latest_date - t).days==0:
            video_urls.append('https://www.youtube.com/watch?v=' + item['id']['videoId'])
            selected_response.append(item)
            video_ids.append(item['id']['videoId'])

            

    return video_urls, selected_response, video_ids


In [330]:
ursl, res, id=generate_video_urls('Bloomberg Television')

Channel ID for Bloomberg Television: UCIALMKvObZNtJ6AmdCLP7Lg


In [316]:
print(len(ursl), len(res), len(id))

31 31 31


In [346]:
res[0]['snippet']['title']

'Dell, Retail Earnings, FOMC Minutes | What We&#39;re Watching'

In [336]:
# list the channels taht you want to use to fetch the videos
list_channels= ['Yahoo finance', 'Bloomberg television', 'World economics Forum' ]

# creating a dataframe to collect metadat for the fetched videos
final_df = pd.DataFrame(columns= ['video_urls', 'response_metadata','video_ids'])

for channel in list_channels:
    urls, response, ids= generate_video_urls(channel)
    df= pd.DataFrame(columns= ['video_urls', 'response_metadata','video_ids'])
    df.video_urls= urls
    df.response_metadata= response
    df.video_ids= ids
    final_df = pd.concat([final_df,df])

final_df = final_df.reset_index(drop = True)

Channel ID for Yahoo finance: UCEAZeUIeJs0IjQiqTCdVSIg
Channel ID for Bloomberg television: UCIALMKvObZNtJ6AmdCLP7Lg
Channel ID for World economics Forum: UCw-kH-Od73XDAt7qtH9uBYA


In [347]:
final_df['channel_name']= final_df.response_metadata.apply(lambda x: x['snippet']['channelTitle'])
# final_df['video_title']= final_df.response_metadata.apply(lambda x: x['snippet']['title'])

In [77]:
final_df.to_pickle('final_df.pkl')

#### Download Youtube Videos

In [40]:
final_df = pd.read_pickle('final_df.pkl')

In [28]:
# Download audio function and converts mp4 video to mp3 audio

def download_audio(url, output_path='data\\audio'):

    # Handle unaccepted special characters to save the audio file and replace spaces with underscore
    def _handle_filenames(filename):
        import re
        new_string = re.sub('[^A-z0-9 -.]', '', filename).lower().replace(" ", "_")
        return new_string

    # Create a YouTube object with the URL
    yt = YouTube(url)
    filename= yt.title

    # Get the audio stream with the highest quality
    audio_stream = yt.streams.get_audio_only()

    # if the audio file exists do not download
    if os.path.exists(os.path.join(output_path, audio_stream.default_filename)):
        print(f'File already exists : {os.path.join(output_path, audio_stream.default_filename)}')
        mp3_filename = _handle_filenames(f"{filename}.mp3")
        mp3_file_path = os.path.join(output_path, mp3_filename)
        print(mp3_file_path)
        return mp3_file_path

    # Download the audio stream
    temp_file = audio_stream.download(output_path=output_path)

    # Load the downloaded file
    audio_clip = AudioFileClip(temp_file)

    # Set the filename for the MP3 file
    mp3_filename = filename if filename.endswith('.mp3') else f"{filename}.mp3"

    mp3_filename = _handle_filenames(mp3_filename)
    
    print(mp3_filename)
    mp3_file_path = os.path.join(output_path, mp3_filename)

    # Convert to MP3 and save

    audio_clip.write_audiofile(mp3_file_path, codec="libmp3lame")

    # Remove the temporary file
    # os.remove(temp_file)

    print(f"Downloaded and converted to MP3: {mp3_file_path}")
    return mp3_file_path
# Example usage


In [29]:
# extracts the video title from the video urls in the dataframe

def get_video_title(url):
    # Create a YouTube object with the URL
    yt = YouTube(url)
    return yt.title


In [61]:
# extracts puclishedAt date from the video urls in the dataframe

def get_date(item):
    tz=pytz.timezone('US/Eastern')
    t= datetime.strptime(item['snippet']['publishedAt'], "%Y-%m-%dT%H:%M:%SZ")
    t= t.astimezone(tz)
    return str(t.date())



In [None]:
final_df['mp3_file_path'] = np.nan
for i,url in enumerate(final_df.video_urls):
    try:
        final_df.loc[i, 'video_title'] = get_video_title(url)
        file_path = download_audio(url= url)
        final_df.loc[i, 'mp3_file_path']= file_path
    except Exception as e:
        print(repr(e))
        continue

In [None]:
final_df['publishedAt'] = np.nan

# Download all the audio files given the video urls in the dataframe
for i, d in enumerate(final_df.response_metadata):
    final_df.loc[i, 'publishedAt'] = get_date(d)


In [76]:
from IPython.display import Audio
Audio(final_df.mp3_final_path[0]) # To check whether the mp3 file plays

#### Transcribe : OpenAI - Whisper 

In [53]:
def transcribe_audio(audio_file):

    model = whisper.load_model("small")  # You can choose other models like 'tiny', 'small', 'medium', 'large'
    result = model.transcribe(audio_file)
   
    return result

In [74]:
# final_df= final_df.dropna(how= 'any')
# final_df= final_df.reset_index(drop=True)
# final_df[final_df['mp3_file_path'].isna()]
# final_df

In [75]:

final_df['txt_file_path'] = np.nan
for i in range(final_df.shape[0]):
    result= transcribe_audio(audio_file= final_df.loc[i, 'mp3_file_path'])
    # print(f'Transcription of file {final_df.loc[i, 'mp3_file_path']} complete')

    base_path = os.path.join('data', 'text')
    filename = (final_df.loc[i, 'video_title'])
    filename = re.sub('[^A-z0-9 -.]', '', filename).lower().replace(" ", "_")
    filename = filename + '.txt'
    file_path = os.path.join(base_path, filename)

    final_df.loc[i, 'txt_file_path']= file_path
    print(i)   
    
    with open(file_path, "w") as file:
        file.write(result['text'])
    print(f'Text file saved to  : {file_path}')



0
Text file saved to  : data\text\nvidia's_long-term_growth_is_uncertain_strategist.txt
1
Text file saved to  : data\text\outgoing_boeing_ceo_to_stay_on_board_as_safety_concerns_linger.txt
2
Text file saved to  : data\text\booking_holdings_versus_disney_why_only_one_is_a_buy.txt
3
Text file saved to  : data\text\stock_market_today_dow_closes_above_40,000_for_first_time_to_cap_winning_week_for_stocks__may_17.txt
4
Text file saved to  : data\text\tesla_supercharger_network_it_makes_economic_sense_to_bring_people_back,_analyst_explains.txt
5
Text file saved to  : data\text\microsoft_to_step_up_subscription,_ai_game_analyst.txt
6
Text file saved to  : data\text\stock_market_why_247_trading_may_be_inevitable.txt
7
Text file saved to  : data\text\white_house_press_secretary_karine_jean-pierre_holds_briefing.txt
8
Text file saved to  : data\text\biden_and_trump_agree_to_debate_what_to_expect_#shorts.txt
9
Text file saved to  : data\text\gas_an_extreme_hurricane_season_could_push_prices_to_$4.

In [76]:
final_df

Unnamed: 0,video_urls,response_metadata,video_ids,channel_name,video_title,mp3_file_path,publishedAt,txt_file_path
0,https://www.youtube.com/watch?v=Kxr3c3ZHlAM,"{'id': {'kind': 'youtube#video', 'videoId': 'K...",Kxr3c3ZHlAM,Yahoo Finance,Nvidia's long-term growth is uncertain: Strate...,data\audio\nvidia's_long-term_growth_is_uncert...,2024-05-17,data\text\nvidia's_long-term_growth_is_uncerta...
1,https://www.youtube.com/watch?v=RB5SSca3LEI,"{'id': {'kind': 'youtube#video', 'videoId': 'R...",RB5SSca3LEI,Yahoo Finance,Outgoing Boeing CEO to stay on board as safety...,data\audio\outgoing_boeing_ceo_to_stay_on_boar...,2024-05-17,data\text\outgoing_boeing_ceo_to_stay_on_board...
2,https://www.youtube.com/watch?v=2yJS4GaZbPk,"{'id': {'kind': 'youtube#video', 'videoId': '2...",2yJS4GaZbPk,Yahoo Finance,Booking Holdings versus Disney: Why only one i...,data\audio\booking_holdings_versus_disney_why_...,2024-05-17,data\text\booking_holdings_versus_disney_why_o...
3,https://www.youtube.com/watch?v=2nO2VTkC_d0,"{'id': {'kind': 'youtube#video', 'videoId': '2...",2nO2VTkC_d0,Yahoo Finance,"Stock market today: Dow closes above 40,000 fo...",data\audio\stock_market_today_dow_closes_above...,2024-05-17,data\text\stock_market_today_dow_closes_above_...
4,https://www.youtube.com/watch?v=Anr6OYhgAfM,"{'id': {'kind': 'youtube#video', 'videoId': 'A...",Anr6OYhgAfM,Yahoo Finance,@tesla Supercharger network: ‘It makes economi...,data\audio\tesla_supercharger_network_it_makes...,2024-05-17,data\text\tesla_supercharger_network_it_makes_...
...,...,...,...,...,...,...,...,...
74,https://www.youtube.com/watch?v=SEXiFHN3Mbk,"{'id': {'kind': 'youtube#video', 'videoId': 'S...",SEXiFHN3Mbk,Bloomberg Television,TSMC's 42% Surge Leads to Weighting Limits for...,data\audio\tsmc's_42%_surge_leads_to_weighting...,2024-05-17,data\text\tsmc's_42%_surge_leads_to_weighting_...
75,https://www.youtube.com/watch?v=UHU0_73b-X8,"{'id': {'kind': 'youtube#video', 'videoId': 'U...",UHU0_73b-X8,Bloomberg Television,Xi Jinping and Vladimir Putin Vow to Cooperate...,data\audio\xi_jinping_and_vladimir_putin_vow_t...,2024-05-17,data\text\xi_jinping_and_vladimir_putin_vow_to...
76,https://www.youtube.com/watch?v=TAr3DuUnLSg,"{'id': {'kind': 'youtube#video', 'videoId': 'T...",TAr3DuUnLSg,Bloomberg Television,Biden Blocks Release of Interview Tapes on Cla...,data\audio\biden_blocks_release_of_interview_t...,2024-05-17,data\text\biden_blocks_release_of_interview_ta...
77,https://www.youtube.com/watch?v=0f8nNqcCNEU,"{'id': {'kind': 'youtube#video', 'videoId': '0...",0f8nNqcCNEU,Bloomberg Television,China Regulators Set to Discuss Property Aid W...,data\audio\china_regulators_set_to_discuss_pro...,2024-05-17,data\text\china_regulators_set_to_discuss_prop...


### Sandbox- I am using this part to experiment. No need to read it.

In [None]:
# print(yt.title)
# print(yt.publish_date)
# print(yt.check_availability())
# print(yt.rating)
# # print(yt.streaming_data)
# print(yt.author)

# base_path= 'data\\text'
# filename= (os.path.basename(audio_file_path).split('.mp3')[0]) + '.txt'
# os.path.join(base_path, filename)


In [296]:
import requests
# Set your API key
API_KEY = "AIzaSyBTuh0ypeNqiDVhNIvn5ofhuIvx8qmt-r4"

# Create a YouTube API service
youtube = build('youtube', 'v3', developerKey=API_KEY)

# Set the YouTube channel ID
# CHANNEL_ID = 'UCEAZeUIeJs0IjQiqTCdVSIg'
# search_channel_name = 'YahooFinance'
search_channel_name = 'Epicurious'
channel_id= requests.get(f'https://www.googleapis.com/youtube/v3/search?part=id&q={search_channel_name}&type=channel&key={API_KEY}').json()['items'][0]['id']['channelId']
print(f"Channel ID for {search_channel_name}: {channel_id}")

# Number of videos to fetch
NUM_VIDEOS = 50 #The maxResults parameter specifies the maximum number of items that should be returned in the result set. Acceptable values are 0 to 50, inclusive. The default value is 5.

# Fetch the latest videos for the channel
request = youtube.search().list(
    part='snippet',
    
    channelId= channel_id,
    # q= 'world economy news',
    order='date',

    maxResults=NUM_VIDEOS,
    type='video',
    fields="items(id,snippet(publishedAt,channelId,channelTitle,title,description))"
)
response = request.execute()



Channel ID for Epicurious: UCcjhYlL1WRBjKaJsMH_h7Lg


In [153]:
request1 = youtube.channels().list(
    part='snippet')
response1 = request.execute()
response1['items'][0]

{'id': {'kind': 'youtube#video', 'videoId': 'PDiwMcAUibI'},
 'snippet': {'publishedAt': '2024-05-17T18:54:56Z',
  'channelId': 'UCIALMKvObZNtJ6AmdCLP7Lg',
  'title': 'Bonds and Streaming | Bloomberg Surveillance | May 17, 2024',
  'description': 'Watch Tom and Paul LIVE every day on YouTube: http://bit.ly/3vTiACF. Bloomberg Surveillance hosted by Tom Keene and Paul ...',
  'channelTitle': 'Bloomberg Television'}}

In [199]:
response['items'][4]

{'id': {'kind': 'youtube#video', 'videoId': '9fmT5mGKVJc'},
 'snippet': {'publishedAt': '2023-09-10T15:12:04Z',
  'channelId': 'UCcjhYlL1WRBjKaJsMH_h7Lg',
  'title': 'Mayo Over Butter For Grilled Cheese',
  'description': "Professional chef and culinary instructor Frank Proto can't hear the haters—he knows mayonnaise is superior to butter when it ...",
  'channelTitle': 'Epicurious'}}

In [127]:
for item in response['items']:
 print(item['snippet']['title'])

Financial World War Coming: Global Elite&#39;s Plan – &#39;You&#39;ll Own Nothing &amp; They&#39;ll Own You,&#39; Carol Roth
UN reports improved prospects for global economy, forecasts 2.7% growth in 2024
UN forecasts improved prospects for the world economy
The Economy Is In For A Soft Landing 🛬  #foryou #news #money #world #finance #economy
Live: Nifty Hits 22,500 Led By Autos &amp; Metals| M&amp;M, Kaynes &amp; Dixon Tech In Focus| Closing Bell
Discussion | India&#39;s economy is forecast to expand by 6.9% in 2024 and 6.6 per cent in 2025
Japanese economy shrinks amid weak consumer spendingーNHK WORLD-JAPAN NEWS
Nirmala Sitharaman LIVE | CII Annual Business Summit 2024 |Delhi |India |Economy |Finance
United Nation Raises India&#39;s 2024 Growth Projection To 6.9% From 6.2% | Indian Economy | GDP | UN
Goldilocks&#39; Comments and Global Economic News Thursday Evening 5 16 24
Biden&#39;s tariff move risks global trade &amp; growth, says IMF | World Business Watch | WION
Insights from A

In [299]:
datetime.strptime(response['items'][0]['snippet']['publishedAt'], "%Y-%m-%dT%H:%M:%SZ")

datetime.datetime(2024, 5, 15, 16, 0, 7)

In [308]:
# tz='US/Eastern'
tz=pytz.timezone('US/Eastern')
latest_date= datetime.strptime(response['items'][0]['snippet']['publishedAt'], "%Y-%m-%dT%H:%M:%SZ")
latest_date= latest_date.astimezone(tz)

# a= datetime.now()
# a= a.astimezone(timezone(tz))

print("latest date: ", latest_date)
video_urls= []
for item in response['items']:
    # print(item['id']['videoId'])

    t= datetime.strptime(item['snippet']['publishedAt'], "%Y-%m-%dT%H:%M:%SZ")
    t= t.astimezone(tz)

    # if latest_date - t <= timedelta(days= 1):
    if (latest_date - t).days==0:
    # if (a - t).days==0:
        video_urls.append('https://www.youtube.com/watch?v=' + item['id']['videoId'])
        # print('https://www.youtube.com/watch?v=' + item['id']['videoId']) 
        print(t)


latest date:  2024-05-15 17:00:07-04:00
2024-05-15 17:00:07-04:00


In [85]:
a= datetime.today()
b= datetime.strptime(response['items'][0]['snippet']['publishTime'], "%Y-%m-%dT%H:%M:%SZ")
(a - b).days

-1

In [79]:
from pytz import timezone
print(datetime.today().strftime('%Y-%m-%d %H:%M:%S'))

latest_date= datetime.strptime(response['items'][0]['snippet']['publishTime'], "%Y-%m-%dT%H:%M:%SZ")
print(latest_date)
print(latest_date.astimezone(timezone("US/Eastern")))

# print(latest_date.astimezone(timezone.utc).astimezone(timezone(-5)))

2024-05-17 13:48:15
2024-05-17 18:15:01
2024-05-17 19:15:01-04:00


In [103]:
a= datetime.now()
a

datetime.datetime(2024, 5, 17, 14, 0, 53, 842403)

In [109]:
tz='US/Eastern'
latest_date= datetime.strptime(response['items'][0]['snippet']['publishTime'], "%Y-%m-%dT%H:%M:%SZ")
latest_date= latest_date.astimezone(timezone(tz))

a= datetime.now()
a= a.astimezone(timezone(tz))

print("latest date: ", latest_date)
video_urls= []
for item in response['items']:
    # print(item['id']['videoId'])

    t= datetime.strptime(item['snippet']['publishTime'], "%Y-%m-%dT%H:%M:%SZ")
    t= t.astimezone(timezone(tz))

    # if latest_date - t <= timedelta(days= 1):
    # if (latest_date - t).days==0:
    if (a - t).days==0:
        video_urls.append('https://www.youtube.com/watch?v=' + item['id']['videoId'])
        # print('https://www.youtube.com/watch?v=' + item['id']['videoId']) 
        print(t)


latest date:  2024-05-17 17:00:04-04:00
2024-05-17 14:00:51-04:00
2024-05-17 13:30:29-04:00
2024-05-17 11:21:34-04:00
2024-05-17 09:30:10-04:00
2024-05-16 23:06:25-04:00
2024-05-16 23:00:14-04:00
2024-05-16 17:55:26-04:00
2024-05-16 17:56:21-04:00
2024-05-16 16:23:07-04:00
2024-05-16 16:00:27-04:00
2024-05-16 15:26:35-04:00


##### generate description

In [246]:
request = youtube.videos().list(part="snippet,statistics", id= '9fmT5mGKVJc')
response = request.execute()
# video_info = response["items"][0]["snippet"]
# title = video_info["title"]
# description = video_info["description"]
# view_count = response["items"][0]["statistics"]["viewCount"]
response['items'][0]['statistics']['commentCount']

'190'

In [202]:
def get_video_details(video_id):
    request = youtube.videos().list(part="snippet,statistics", id=video_id)
    response = request.execute()
    video_info = response["items"][0]["snippet"]
    title = video_info["title"]
    description = video_info["description"]
    view_count = response["items"][0]["statistics"]["viewCount"]
    # Add any other relevant information you need
    return title, description, view_count

# youtube = youtube_authenticate()
video_id = '9fmT5mGKVJc'
title, description, view_count = get_video_details(video_id)
print(f"Title: {title}\nDescription: {description}\nView Count: {view_count}")


Title: Mayo Over Butter For Grilled Cheese
Description: Professional chef and culinary instructor Frank Proto can't hear the haters—he knows mayonnaise is superior to butter when it comes to making a grilled cheese sandwich.

Start your free trial and access over 50,000 expertly-tested recipes from Epicurious, Bon Appétit and more on the Epicurious app. https://apps.apple.com/app/apple-store/id312101965?pt=45076&ct=EpiVideoDescriptionYT&mt=8

Still haven’t subscribed to Epicurious on YouTube? ►► http://bit.ly/epiyoutubesub

ABOUT EPICURIOUS
Browse thousands of recipes and videos from Bon Appétit, Gourmet, and more. Find inventive cooking ideas, ingredients, and restaurant menus from the world’s largest food archive.
View Count: 193378


##### generate comments

In [247]:
video_response = youtube.commentThreads().list(
    part='snippet,replies',
    videoId='9fmT5mGKVJc',
    order= 'time').execute()

request = youtube.videos().list(part="snippet,statistics", id= '9fmT5mGKVJc')

c=[]
comment_count= int(response['items'][0]['statistics']['commentCount'])

if comment_count >0:
    for item in video_response['items']:

        c.append(item['snippet']['topLevelComment']['snippet']['textDisplay'])




# while video_response:
#     for item in video_response['items']:
#         comment = item['snippet']['topLevelComment']['snippet']['textDisplay']
#         reply_count = item['snippet']['totalReplyCount']
#         if reply_count > 0:
#             for reply in item['replies']['comments']:
#                 reply_text = reply['snippet']['textDisplay']
#                 # Process comment and reply as needed
#                 print(comment, reply_text, end='\n\n')

#     if 'nextPageToken' in video_response:
#         video_response = youtube.commentThreads().list(
#             part='snippet,replies',
#             videoId=video_id,
#             pageToken=video_response['nextPageToken']
#         ).execute()
#     else:
#         break
c[:10]

['Try using honey mustard for a grilled ham &amp; cheese.  I love it.',
 'Whefes the finished project lol',
 'My brother and sister both prefer Mayo, but I like butter best. That said, if I don’t have any softened butter I will use Mayo in a pinch. It’s better than tearing the bread with cold butter or trying to soften, but not melt, the butter, lol.',
 'Golden Gully: You MADMAN',
 'Remember, you&#39;re entitled to your opinion no matter how wrong it is.',
 'Try some unsalted butter...',
 'Mayo is for freaks, butter is king in everything',
 'eww',
 'real pros know you put butter on the pan, melt it and then you put the mayo on top so it doesn&#39;t burn the mayo and actually cooks in the butter or alternatively in oil',
 'First it can burn because eggs can burn and 2 when have you eaten hot mayo and described it as a better flavor than butter.']

In [245]:
video_response['items'][0]['snippet']

{'channelId': 'UCcjhYlL1WRBjKaJsMH_h7Lg',
 'videoId': '9fmT5mGKVJc',
 'topLevelComment': {'kind': 'youtube#comment',
  'etag': 'S4bAiYTKoKle2k0WK7hBjPh18BQ',
  'id': 'UgxRq4rvMs8lCozqn0R4AaABAg',
  'snippet': {'channelId': 'UCcjhYlL1WRBjKaJsMH_h7Lg',
   'videoId': '9fmT5mGKVJc',
   'textDisplay': 'Try using honey mustard for a grilled ham &amp; cheese.  I love it.',
   'textOriginal': 'Try using honey mustard for a grilled ham & cheese.  I love it.',
   'authorDisplayName': '@augustusmilligan9244',
   'authorProfileImageUrl': 'https://yt3.ggpht.com/ytc/AIdro_nFvFqHAp0qG0MhsFJ7UWCuCqxkXaIQu7e4bzX1OEs=s48-c-k-c0x00ffffff-no-rj',
   'authorChannelUrl': 'http://www.youtube.com/@augustusmilligan9244',
   'authorChannelId': {'value': 'UCdu7AIHb0osnJ3oydDcPU5g'},
   'canRate': True,
   'viewerRating': 'none',
   'likeCount': 0,
   'publishedAt': '2024-04-08T11:57:45Z',
   'updatedAt': '2024-04-08T11:57:45Z'}},
 'canReply': True,
 'totalReplyCount': 0,
 'isPublic': True}

In [240]:
video_response['items'][2]['snippet']['topLevelComment']

{'kind': 'youtube#comment',
 'etag': 'd1BO4RPTmP1D_D2brKMq0h0TgsI',
 'id': 'UgxDIe_5mRkCSU3nJo94AaABAg',
 'snippet': {'channelId': 'UCcjhYlL1WRBjKaJsMH_h7Lg',
  'videoId': '9fmT5mGKVJc',
  'textDisplay': 'My brother and sister both prefer Mayo, but I like butter best. That said, if I don’t have any softened butter I will use Mayo in a pinch. It’s better than tearing the bread with cold butter or trying to soften, but not melt, the butter, lol.',
  'textOriginal': 'My brother and sister both prefer Mayo, but I like butter best. That said, if I don’t have any softened butter I will use Mayo in a pinch. It’s better than tearing the bread with cold butter or trying to soften, but not melt, the butter, lol.',
  'authorDisplayName': '@LovesWaterfalls',
  'authorProfileImageUrl': 'https://yt3.ggpht.com/ytc/AIdro_lpQF0QZRX67dT4-zKD5dhQYzDvhI6pZ3z6dYP0wXU0JD4=s48-c-k-c0x00ffffff-no-rj',
  'authorChannelUrl': 'http://www.youtube.com/@LovesWaterfalls',
  'authorChannelId': {'value': 'UCs1piMgonaq

In [237]:
from googleapiclient.discovery import build

# Initialize YouTube API with your API key


def get_comments(video_id, comments=[], token=''):
    video_response = youtube.commentThreads().list(
        part='snippet',
        videoId=video_id,
        pageToken=token
    ).execute()
    for item in video_response['items']:
        comment = item['snippet']['topLevelComment']
        text = comment['snippet']['textDisplay']
        comments.append(text)
        if 'nextPageToken' in video_response:
            return get_comments(video_id, comments, video_response['nextPageToken'])
        else:
            return comments

video_id = '9fmT5mGKVJc'
comment_threads = get_comments(video_id)
print(len(comment_threads))  # Total number of comments
print(comment_threads)

7
['Try using honey mustard for a grilled ham &amp; cheese.  I love it.', 'Salty with sweetness and a nice vinegary zip MMM MHM', 'Team Butter.<br>It&#39;s not about the browning, which you can still beautifully achieve with butter, it&#39;s about the flavour.<br><br>Butter tastes better than mayo. Always has.', 'no way this comment gets 100k likes..', 'Better to use natural butter than industrial mayo. Unless you make your own mayo.', 'Mayo is only more convenient. Easier to use. But Flavor sir?', 'I do that trick for Burger buns, it really does elevate your  toasted bread.']


In [207]:
comment_threads[2]

'Team Butter.<br>It&#39;s not about the browning, which you can still beautifully achieve with butter, it&#39;s about the flavour.<br><br>Butter tastes better than mayo. Always has.'