Notebook for collecting audience usernames for a channel

In [1]:
# EDIT THIS:
CHANNEL = "TYLER1LOL"

# Limit calls to stay under API quota
COMMENT_CAP = 50 # This gets multiplied by 100
VIDEO_CAP = 1 # This gets multiplied by 25

In [2]:
import os
import csv
from googleapiclient.discovery import build
from dotenv import load_dotenv

In [3]:
# Load YOUTUBE_API_KEY from .env file
load_dotenv()  
API_KEY = os.getenv('YOUTUBE_API_KEY')
youtube = build("youtube", "v3", developerKey=API_KEY)

In [4]:
# Get all usernames from a video's comments, including replies
def get_video_usernames(video_id, cap):
    usernames = []
    next_page_token = None
    count = 0
    
    while count < cap:
        count = count + 1

        request = youtube.commentThreads().list(
            part="snippet,replies",
            videoId=video_id,
            maxResults=100,
            pageToken=next_page_token
        )
        response = request.execute()
        
        for item in response.get("items", []):
            usernames.append(item["snippet"]["topLevelComment"]["snippet"]["authorDisplayName"])
            
            if 'replies' in item:
                for reply in item['replies']['comments']:
                    usernames.append(reply["snippet"]["authorDisplayName"])
        
        next_page_token = response.get("nextPageToken")
        
        if not next_page_token:
            break
    
    return usernames

In [14]:
# Append usernames from video to channel audience csv
def append_usernames_to_csv(file_path, usernames):
    with open(file_path, mode='a', newline='') as file:
        writer = csv.writer(file)
        for username in usernames:
            try:
                writer.writerow([username])
            except UnicodeEncodeError:
                print(f"Occurence of non UTF-8 character.")

In [6]:
def get_channel_id_by_custom_url(url):
    request = youtube.search().list(
        part='snippet',
        q=url,
        type='channel'
    )
    response = request.execute()
    
    if 'items' in response and response['items']:
        channel_id = response['items'][0]['id']['channelId']
        return channel_id
    else:
        return None

#custom_url = 'RedLetterMedia'
#channel_id = get_channel_id_by_custom_url(custom_url)
#print(f'Channel ID: {channel_id}')

In [7]:
def get_video_ids(channel_id, cap):
    video_ids = []
    next_page_token = None
    count = 0
    
    while count < cap:
        count = count + 1
        
        playlists_response = youtube.channels().list(
            part='contentDetails',
            id=channel_id
        ).execute()
        
        playlist_id = playlists_response['items'][0]['contentDetails']['relatedPlaylists']['uploads']
        
        playlist_items_response = youtube.playlistItems().list(
            part='contentDetails',
            playlistId=playlist_id,
            maxResults=25,
            pageToken=next_page_token
        ).execute()
        
        video_ids.extend(item['contentDetails']['videoId'] for item in playlist_items_response['items'])
        
        next_page_token = playlist_items_response.get('nextPageToken')
        if not next_page_token:
            break
    
    return video_ids

#video_ids = get_video_ids(channel_id)
#print(len(video_ids))

In [15]:
# Get audience for CHANNEL (a youtube channel's custom URL)

# create empty csv for channel
file_path = f"Channels/{CHANNEL}.csv"
with open(file_path, mode='w', encoding='utf-8') as file:
        pass

channel_id = get_channel_id_by_custom_url(CHANNEL)

video_ids = get_video_ids(channel_id, VIDEO_CAP)
num_videos = len(video_ids)
print(f"{num_videos} videos found.")

count = 1
for video_id in video_ids:
    usernames  = get_video_usernames(video_id, COMMENT_CAP)
    print(f"{count}/{num_videos}, {video_id}: {len(usernames)}")
    count = count + 1
    append_usernames_to_csv(file_path, usernames)

826 videos found.
21gl2hgjo7A: 3288
YieefGRusWQ: 5078
BOZ99Zw8dmw: 3089
5Ub2uT2z14w: 989
Pu0sIzDAH_Y: 1683
vkqXLLg0VZw: 1815
X-6WBWmoVEY: 12505
pygCruE-EJs: 3212
Occurence of non UTF-8 character.
1g9_uw5XjyM: 1148
MwO5fGL2MeY: 8730
TD1qwkCOqRo: 2633
dcqYEq5ANFA: 1841
Qzt5zmqu-ac: 1985
k2x1yP2cpE8: 2569
YrZhTkvSIXY: 2137
l-BxuvnTHEE: 2813
EqDwu-0m_mA: 2323
OOh4nSYKQ2Y: 1376
uWhCZmPpYy0: 5073
8pgmrrrupu4: 3816
EU1Ds06m2xw: 3821
-jiZMaMa8gs: 1100
bK6r8b_8jWI: 1711
yzJqarYU5Io: 3014
y3ZLDfB2N3g: 1640
L9zXv4TfMUg: 5957
kL8Ol0C76dQ: 3312
qxCz-SAmpDA: 2275
B1A9fjoqTn8: 3099
Occurence of non UTF-8 character.
ie_KKpWropk: 1935
GL9vi4hTQQU: 3026
OXq8cam0QOU: 3921
661UNW658VU: 863
HZAXlcTjCmI: 1700
7TVUTgoG1yI: 2262
fYgNqg6V8do: 2337
eQFdsYVD4eI: 1684
pLpxYMgQUT4: 1781
tCGwPwGi7tA: 1693
v48zbCMhyBA: 4006
FxRA_VAxlMA: 1640
kVgdih1RAHI: 2809
Q6LDZSi-lzU: 3679
mqwTELBPQOc: 2894
GpBerLYqT78: 2308
TmDh6LW-fAI: 2227
H7jeLfmzhpg: 3089
cPub1mjfPHk: 1412
SR9DIY_wh2Q: 1556
mOMJe51OYgo: 1709
hxM44sp2_MI: 24

HttpError: <HttpError 403 when requesting https://youtube.googleapis.com/youtube/v3/commentThreads?part=snippet%2Creplies&videoId=7PeEAqARU0E&maxResults=100&pageToken=Z2V0X25ld2VzdF9maXJzdC0tQ2dnSWdBUVZGN2ZST0JJRkNJZ2dHQUFTQlFpZElCZ0JFZ1VJcUNBWUFCSUZDSWtnR0FBU0JRaUhJQmdBR0FBaURRb0xDSm0td3VvRkVOQ1cybm8%3D&key=AIzaSyD1V09USoaBvY9-DAarMDF4eT_lmsnLbFY&alt=json returned "The request cannot be completed because you have exceeded your <a href="/youtube/v3/getting-started#quota">quota</a>.". Details: "[{'message': 'The request cannot be completed because you have exceeded your <a href="/youtube/v3/getting-started#quota">quota</a>.', 'domain': 'youtube.quota', 'reason': 'quotaExceeded'}]">