In [1]:
import googleapiclient.discovery
import os
import pandas as pd
from dotenv import load_dotenv

load_dotenv()

# Get api key from env
api_key = os.getenv('api_key')

youtube = googleapiclient.discovery.build('youtube', 'v3', developerKey = api_key)

# Specify youtubers to get comments from
username = 'mrbeast6000'

# Function to get channel id by username
def getChannelId(username):
    request = youtube.channels().list(
        part = 'snippet',
        forUsername = username
    )
    response = request.execute()

    channel_id = response['items'][0]['id']

    return channel_id

# Return json of channel data
def getUploadsId(channel_id):
    request = youtube.channels().list(
        part = 'contentDetails',
        id = channel_id
    )
    response = request.execute()

    uploads_id = response['items'][0]['contentDetails']['relatedPlaylists']['uploads']

    return uploads_id

# Get video ids
def getVideoIds(uploads_id):
    request = youtube.playlistItems().list(
        part = 'snippet',
        playlistId = uploads_id,
        maxResults = 50
    )
    response = request.execute()

    # Get all video ids
    video_ids = []
    video_titles = []
    for video in response['items']:
        video_ids.append(video['snippet']['resourceId']['videoId'])
        video_titles.append(video['snippet']['title'])

    # Return as dict
    return {'videoId': video_ids ,'videoTitle': video_titles}

# Get comments
def getComments(video_id, n_comments):
    request = youtube.commentThreads().list(
        part = 'snippet',
        videoId = video_id,
        maxResults = n_comments
    )
    
    response = request.execute()

    # Comments
    comment_id = []
    text = []
    like_count = []
    publish_date = []

    for item in response['items']:
        comment_id.append(item['id'])
        text.append(item['snippet']['topLevelComment']['snippet']['textDisplay'])
        like_count.append(item['snippet']['topLevelComment']['snippet']['likeCount'])
        publish_date.append(item['snippet']['topLevelComment']['snippet']['publishedAt'])

    return {'commentId': comment_id, 'text': text, 'likeCount': like_count, 'publishDate': publish_date}

In [2]:
mr_beast_channelId = getChannelId(username)
uploads = getUploadsId(mr_beast_channelId)
videos = getVideoIds(uploads)

In [11]:
comment_list = []
for video_id in videos['videoId']:
    comments = getComments(video_id, 1000)
    comment_list.append(comments)


In [None]:
comment_list = []
for video_id in videos['videoID']:
    comments = getComments(video_id, 1000)
    for comment in comments:
        comment_list.append(comment)

comment_df = pd.DataFrame(comment_list, columns = ['comments'])

comment_df.to_csv('comments.csv', index = False)