Notebook for collecting audience usernames for a channel

In [1]:
# Limit calls to stay under API quota
COMMENT_CAP = 50 # This gets multiplied by 100
VIDEO_CAP = 1 # This gets multiplied by 25

In [2]:
import os
import csv
from googleapiclient.discovery import build
from dotenv import load_dotenv

In [3]:
# Load YOUTUBE_API_KEY from .env file
load_dotenv()  
API_KEY = os.getenv('YOUTUBE_API_KEY')
youtube = build("youtube", "v3", developerKey=API_KEY)

In [4]:
# Get all usernames from a video's comments, including replies
def get_video_usernames(video_id, cap):
    usernames = []
    next_page_token = None
    count = 0
    
    while count < cap:
        count = count + 1

        try:
            request = youtube.commentThreads().list(
                part="snippet,replies",
                videoId=video_id,
                maxResults=100,
                pageToken=next_page_token
            )
            response = request.execute()
        except Exception as e:
            # HttpError when comments are disabled.
            print(e)
            return usernames
        
        for item in response.get("items", []):
            usernames.append(item["snippet"]["topLevelComment"]["snippet"]["authorDisplayName"])
            
            if 'replies' in item:
                for reply in item['replies']['comments']:
                    usernames.append(reply["snippet"]["authorDisplayName"])
        
        next_page_token = response.get("nextPageToken")
        
        if not next_page_token:
            break
    
    return usernames

In [5]:
# Append usernames from video to channel audience csv
def append_usernames_to_csv(file_path, usernames):
    with open(file_path, mode='a', newline='') as file:
        writer = csv.writer(file)
        for username in usernames:
            try:
                writer.writerow([username])
            except UnicodeEncodeError:
                print(f"Occurence of non UTF-8 character.")

In [6]:
def get_channel_id_by_custom_url(url):
    request = youtube.search().list(
        part='snippet',
        q=url,
        type='channel'
    )
    response = request.execute()
    
    if 'items' in response and response['items']:
        channel_id = response['items'][0]['id']['channelId']
        return channel_id
    else:
        return None

In [7]:
def get_video_ids(channel_id, cap):
    video_ids = []
    next_page_token = None
    count = 0
    
    while count < cap:
        count = count + 1
        
        playlists_response = youtube.channels().list(
            part='contentDetails',
            id=channel_id
        ).execute()
        
        playlist_id = playlists_response['items'][0]['contentDetails']['relatedPlaylists']['uploads']
        
        playlist_items_response = youtube.playlistItems().list(
            part='contentDetails',
            playlistId=playlist_id,
            maxResults=25,
            pageToken=next_page_token
        ).execute()
        
        video_ids.extend(item['contentDetails']['videoId'] for item in playlist_items_response['items'])
        
        next_page_token = playlist_items_response.get('nextPageToken')
        if not next_page_token:
            break
    
    return video_ids

In [8]:
# Get audience for CHANNEL (a youtube channel's custom URL)
def get_audience(channel):
        # create empty csv for channel
        file_path = f"Channels/{channel}.csv"
        with open(file_path, mode='w', encoding='utf-8') as file:
                pass

        channel_id = get_channel_id_by_custom_url(channel)

        video_ids = get_video_ids(channel_id, VIDEO_CAP)
        num_videos = len(video_ids)
        print(f"{num_videos} videos found.")

        count = 1
        for video_id in video_ids:
                usernames  = get_video_usernames(video_id, COMMENT_CAP)
                print(f"{count}/{num_videos}, {video_id}: {len(usernames)}")
                count = count + 1
                append_usernames_to_csv(file_path, usernames)

In [9]:
with open('channels_to_run.txt', 'r') as file:
    channels = file.readlines()

for channel in channels:
    channel = channel.strip()
    if channel:
        print(f"---- {channel} ----")
        get_audience(channel)

---- ZullietheWitch ----
25 videos found.
1/25, 15oxX3C-PYM: 799
2/25, 0T--MrFL97c: 922
3/25, 0Aijnyn1NEA: 731
4/25, nHjsz9oXh94: 739
5/25, 0KoGwQjKlEA: 782
6/25, TW61dH1m57Y: 547
7/25, P7t-gXCD98o: 1716
Occurence of non UTF-8 character.
8/25, G3CtGilQkpY: 650
9/25, 9oGgq3E4Tbg: 1113
10/25, itipQQjJHnI: 1006
11/25, X4xE5amx3aI: 1421
Occurence of non UTF-8 character.
12/25, JbL2aQubNlw: 684
Occurence of non UTF-8 character.
13/25, yxy20EpQGB4: 961
14/25, m3YAIq-gNYo: 1137
15/25, SQDBseP14lE: 948
16/25, AC6ja5B--30: 603
17/25, cxQD_tGucv0: 1141
18/25, 1m3Cf0Qf39I: 929
Occurence of non UTF-8 character.
19/25, 4dUrFFGrHYE: 519
Occurence of non UTF-8 character.
20/25, nRUX5MhccVE: 930
Occurence of non UTF-8 character.
21/25, ScJcgXtKls8: 858
22/25, glzoAUIiA4c: 1028
23/25, ILJBtBOQ2P8: 1290
24/25, FECFxyNpLbs: 505
25/25, mJ-AAPVvALY: 495
---- kiwami-japan ----
25 videos found.
1/25, Q1VWUL0G0uU: 5685
Occurence of non UTF-8 character.
Occurence of non UTF-8 character.
Occurence of non UTF-8 