In this notebook, I scraped data from top YouTubers in Taiwan and saved them as json files for further processing.

# Set up YouTube API keys & logging configurations

In [1]:
import requests
import os
import json
import pickle
import warnings
import random
import pandas as pd
from datetime import datetime
from functools import reduce
import concurrent.futures
import functools
import logging

from IPython.core.interactiveshell import InteractiveShell
InteractiveShell.ast_node_interactivity = "all"

# First GCP Account
YOUTUBE_API_KEY_3 = ""

# Second GCP Account
YOUTUBE_API_KEY_2 = ""

# Third GCP Account
YOUTUBE_API_KEY_1 = ""

# Fourth GCP Account
YOUTUBE_API_KEY_4 = ""

YOUTUBE_API_KEY_LIST = [
    YOUTUBE_API_KEY_1,
    YOUTUBE_API_KEY_2,
    YOUTUBE_API_KEY_3,
    YOUTUBE_API_KEY_4
]

CLUSTER_FILENAME = "youtube_spider_cluster"

# Logging Configurations
logger = logging.getLogger(__name__)
formatter = logging.Formatter('%(asctime)s:%(name)s:%(thread)d:%(threadName)s:%(message)s')
file_handler = logging.FileHandler('data-scraping.log')
stream_handler = logging.StreamHandler()

# set logging level
logger.setLevel(logging.INFO) 
# set logging format
file_handler.setFormatter(formatter) 
logger.addHandler(file_handler)
# set logging format
stream_handler.setFormatter(formatter) 
logger.addHandler(stream_handler)

# Define Spiders for scraping
There are two classes associated with this task.
* `YoutubeSpider`: Scrape and gather all data from a given YouTube channel
* `YoutubeSpiderCluster`: Manage resources for and gather results from Youtube spiders

In [2]:
###########################################################################################
##  -----------------------  Define YoutubeSpiderCluster Class ------------------------  ##
###########################################################################################

class YoutubeSpiderCluster():
    def __init__(self, api_key):
        # General Path Set-ups
        self.base_url = "https://www.googleapis.com/youtube/v3/"
        self.cluster_save_path = "youtube_data"
        self.channel_save_path = "youtube_data/channels"
        self.video_save_path = "youtube_data/videos"
        self.comment_save_path = "youtube_data/comments"
        self.api_key: str = api_key
        
        # Saved Crawlers (with data stored within each)
        self.youtube_spider_cluster: list[YoutubeSpider] = []
            
        # Administrative data
        self.youtube_channel_ids: list[str] = []
        self.top_taiwan_youtubers: pd.DataFrame
        
        
    def load_channel_ids(self, load_path, col_names=None, nrows=None, skiprows=None):
        """Get channel ids for all top Taiwan youtubers from the `load_path` argument"""
        if not col_names:
            col_names = ["Youtuber", "Link"]
            
        self.top_taiwan_youtubers: pd.DataFrame = pd.read_csv(load_path, header=None, names=col_names, nrows=nrows, skiprows=skiprows)
        self.youtube_channel_ids: list = self.top_taiwan_youtubers["Link"].str.split("/").apply(lambda x: x[-1]).tolist()
    
            
    def scrape_youtuber_channels(self):
        """Scrape Channel information & Save to Disk as json files (using multi-threading)"""
        
        # Define function & iterator to run in parallel
        zip_iterator = zip(self.top_taiwan_youtubers['Youtuber'], self.youtube_channel_ids)
        
        def build_spider_scrape_channel(youtuber, youtube_channel_id):
            logger.info(f"Start scraping Channel Info: {youtuber}")

            # Instantiate a spider (for each channel)
            youtube_spider = YoutubeSpider(self.base_url, self.api_key)

            # Get channel_features (for each channel)
            success = youtube_spider.get_channel_features(youtube_channel_id)
            
            if success:
                # Get video_ids (for each channel) & add to channel features
                youtube_spider.get_channel_all_videoIds()

                # Save channel features to disk
                youtube_spider.save_channel_features_to_json(self.channel_save_path)
            else:
                logger.info(f"Channel features not available: {youtuber}")

            # Return current YoutubeSpider object to save it in the cluster
            return youtube_spider
            
        
        # Run spiders in parallel
        spider_cluster = []
        max_workers = len(self.youtube_channel_ids)
        with concurrent.futures.ThreadPoolExecutor(max_workers=max_workers) as executer:
            futures = [executer.submit(build_spider_scrape_channel, youtuber, youtube_channel_id) for youtuber, youtube_channel_id in zip_iterator]
            for future in concurrent.futures.as_completed(futures):
                spider = future.result()
                logger.info(f"{spider} finished scraping Channel feature")
                spider_cluster.append(spider)
        
        # Save all spiders
        self.youtube_spider_cluster = spider_cluster
            
        # Save current YoutubeSpiderCluster object
        self._save()
    
            
    def scrape_youtuber_videos(self):
        """Scrape Video Information from `self.youtube_spider_cluster` & Save to Disk as pickle files"""
        
        # Define inner function to be mapped and run in parallel
        def  scrape_one_video_and_its_comments(video_id, youtube_spider):
                        
            # Get video_features (for each video)
            youtube_spider.get_video_features(video_id)

            # Get comments & replies features (for each video)
            # youtube_spider.get_video_all_comments(video_id) ### COMMENTS ARE NOT USED FOR THIS RESEARCH ###
            
            return video_id

        
        # Define outer function to be mapped and run in parallel
        def get_spider_scrape_all_videos_and_their_comments(youtube_spider):
            # Get all video ids from each channel
            all_video_ids = youtube_spider.get_channel_all_video_ids_()
            
            # Create a new function from `scrape_one_video_and_its_comments` (using `functools.partial`)
            # by pre-filling and fixing its `youtube_spider` argument to be the current passed-in youtube_spider
            current_spider_scrape_one_video_and_its_comments = functools.partial(scrape_one_video_and_its_comments, youtube_spider=youtube_spider)
            
            logger.debug(f"Start scraping Video from Channel: {youtube_spider.channel_features['title']}")
            with concurrent.futures.ThreadPoolExecutor(max_workers=100) as executer:
                futures = [executer.submit(current_spider_scrape_one_video_and_its_comments, video_id) for video_id in all_video_ids]
                for future in concurrent.futures.as_completed(futures):
                    video_id = future.result()
                    logger.debug(f"{youtube_spider} finished scraping video {video_id}")
                    
            # Save video features to disk
            youtube_spider.save_video_features_to_json(self.video_save_path)
            
            # Save comment features to disk
            # youtube_spider.save_comment_features_to_json(self.comment_save_path) ### COMMENTS ARE NOT USED FOR THIS RESEARCH ###
            
            # Save current YoutubeSpiderCluster object
            self._save()
            
            # Save current youtube_spider object
            return youtube_spider
            
            
        # Run spiders in parallel
        max_workers = len(self.youtube_spider_cluster)
        with concurrent.futures.ThreadPoolExecutor(max_workers=max_workers) as executer:
            youtube_spider_cluster = executer.map(get_spider_scrape_all_videos_and_their_comments, self.youtube_spider_cluster)
        
        # Update/Mutate YoutubeSpider objects in the cluster
        self.youtube_spider_cluster = list(youtube_spider_cluster)
        
        # Save current YoutubeSpiderCluster object
        self._save()
                
            
    def _save(self):
        """Save current `YoutubeSpiderCluster` object to Disk"""
        filename = f"{CLUSTER_FILENAME}.pkl"
        cluster_save_path = os.path.join(self.cluster_save_path, filename)
        with open(cluster_save_path, "wb") as pkl_file:
            pickle.dump(self, pkl_file)
            
###########################################################################################
##  ---------------------------  Define YoutubeSpider Class ---------------------------  ##
###########################################################################################
        
class YoutubeSpider():
    """Crawls All Data from a Given Channel"""
    def __init__(self, base_url, api_key):
        # Inherit Path Set-ups
        self.base_url = base_url
        self.api_key = api_key
        self.bad_api_keys: list[str] = []
        
        # Crawled Data Storage
        self.channel_features: dict = {}
        self.videos: list[dict] = []        # a list of video (dict)
        self.comments: list[tuple(str, list[dict])] = []      # a list of tuples of video_ids (str) and their comments (list[dict])
        self.channel_uploads_playlistId: str = None
            
            
    def __repr__(self):
        try:
            return f"YoutubeSpider({self.channel_features['title']})"
        except KeyError:
            return f"YoutubeSpider(None)"
        
    def set_another_api_key(self, path):
        # Record bad api keys
        self.bad_api_keys.append(self.api_key)
        
        # Reset api key with an available one, if any
        remaining_keys = [key for key in YOUTUBE_API_KEY_LIST if key not in self.bad_api_keys and key != ""] # filter out bad keys
        if len(remaining_keys) > 0:
            self.api_key = remaining_keys[0]
            logger.debug(f"Success! {self} Switched to API key #{YOUTUBE_API_KEY_LIST.index(self.api_key):02} ({len(remaining_keys)} API keys remaining).")
            return True
        
        # Check if any bad keys are available again (24 hours after quotaExceeded)
        for bad_key in self.bad_api_keys:
            url_with_bad_key = f"{self.base_url}{path}&key={bad_key}"
            r = requests.get(url_with_bad_key)
            
            if r.status_code == requests.codes.ok:
                logger.debug(f"{self} Restored a bad key #{YOUTUBE_API_KEY_LIST.index(self.api_key):02}!")
                if bad_key in self.bad_api_keys:
                    self.bad_api_keys.remove(bad_key)
                self.api_key = bad_key
                return True
            
        logger.debug("No API keys remaining.")
        return False
        

    def get_html_to_json(self, path):
        """Combine URL path, GET reponse from api, and return it as JSON"""
        api_url = f"{self.base_url}{path}&key={self.api_key}"
        r = requests.get(api_url)
            
        if r.status_code == 429:
            logger.warning("TOO MANY REQUESTS")
        
        if r.status_code == 403:
            
            # Run out of daily quota
            if r.json()['error']['errors'][0]['reason'] == "quotaExceeded":
                logger.warning(f" QUOTA EXCEEDED {self} (#{YOUTUBE_API_KEY_LIST.index(self.api_key):02})")
                
                reset_key_succeeded = self.set_another_api_key(path) # Reset api key
                
                if reset_key_succeeded: # still have api keys to use
                    new_api_url = f"{self.base_url}{path}&key={self.api_key}"
                    new_r = requests.get(new_api_url)
                    return new_r.json()
                
                return
            
            # Comments are not available for scraping
            if r.json()['error']['errors'][0]['reason'] == "commentsDisabled":
                logger.warning("COMMENTS DISABLED")
                return "COMMENTS DISABLED" # No comments to scrape (thus data will not not have 'items' key)
        
        # Other errors when fetching data online
        if r.status_code != requests.codes.ok:
            logger.warning(r.text)
            return
                    
        return r.json()


            
    def get_channel_features(self, channel_id, part='snippet,contentDetails,statistics,topicDetails,brandingSettings'):
        """Get Channel-specific features & Upload playlist's ID"""
        path = f'channels?part={part}&id={channel_id}'
        data = self.get_html_to_json(path)
        try:
            data_item = data['items'][0]
        except Exception as e: # 'NoneType' object has no attribute 'get', 'NoneType' object is not subscriptable
            logger.debug(f"Cannot get channel features from channel: {channel_id}")
            logger.debug(e)
            return False
        
        try:
            banner_url = data_item['brandingSettings'].get('image')['bannerExternalUrl']
        except TypeError:  # 'NoneType' object is not subscriptable
            banner_url = None
        
        try:
            # Save Channel Features to object
            self.channel_features = {
                # General
                "channel_id": channel_id,
                "title": data_item['snippet']['title'],
                "description": data_item['snippet']['description'],
                "published_at": data_item['snippet']['publishedAt'],
                "crawled_datetime": datetime.now().isoformat(),
                "country": data_item['snippet'].get('country'), # optional data (None if error)

                # Special attributes
                "thumbnail_default_url": data_item['snippet']['thumbnails']['default']['url'], # 88 x 88 px
                "thumbnail_medium_url": data_item['snippet']['thumbnails']['medium']['url'], # 240 x 240 px
                "thumbnail_high_url": data_item['snippet']['thumbnails']['high']['url'], # 800 x 800 px
                "banner_url": banner_url, # optional data
                "keywords": data_item['brandingSettings']['channel'].get('keywords'), # optional data (None if error)
                "topic_categories": data_item['topicDetails'].get('topicCategories'), # optional data (None if error)

                # Statistics
                "view_count": data_item['statistics']['viewCount'],
                "subscriber_count": data_item['statistics']['subscriberCount'],
                "ishidden_subscriber_count": data_item['statistics']['hiddenSubscriberCount'],
                "video_count": data_item['statistics']['videoCount'],

                # Playlist Id of all uploaded videos
                "all_uploads_playlistId": data_item['contentDetails']['relatedPlaylists']['uploads'],
                # All video ids in uploaded videos
                "all_video_ids": [],
            }
        except Exception as e:
            logger.error(e)
            logger.error(data_item)
        
        # Save channel_uploads_playlistId to object
        self.channel_uploads_playlistId = data_item['contentDetails']['relatedPlaylists']['uploads']
        
        return True


    def get_channel_all_videoIds(self, part='contentDetails', max_results=50):
        """Retrieve all video ids from `self.channel_uploads_playlistId` (must be run after the `self.get_channel_features` method)"""
        next_page_token = ""
        
        def get_video_id(data_item):
            self.channel_features['all_video_ids'].append(data_item['contentDetails']['videoId'])
        
        data_list = []
        while True:
            # Fetch data from next page
            path = f'playlistItems?part={part}&playlistId={self.channel_uploads_playlistId}&maxResults={max_results}&pageToken={next_page_token}'
            data = self.get_html_to_json(path)
            try:
                data_item = data['items'][0]
            except Exception as e: # 'NoneType' object has no attribute 'get', 'NoneType' object is not subscriptable
                logger.debug(f"Cannot get all video_ids from channel: {channel_id}")
                logger.debug(e)
                raise
            
            # Save data in this page
            data_list.append(data)

            # Stop fetching if no more next page
            next_page_token = data.get('nextPageToken')
            if not next_page_token:
                break
        
        # Retrieve all videoIds (in background multi-threads)
        with concurrent.futures.ThreadPoolExecutor() as executor:
            all_futures = [executor.submit(get_video_id, data_item) for data in data_list for data_item in data['items']]
            
        # Make sure all videoIds are fetched
        concurrent.futures.wait(all_futures)
        
        
    def save_channel_features_to_json(self, channel_save_path):
        """Save channel_features to external JSON file"""
        filename = f"{self.channel_features['channel_id']}.json"
        save_path = os.path.join(channel_save_path, filename)
        with open(save_path, "w") as json_file:
            json.dump(self.channel_features, json_file)
        logger.info(f"{self}: channel features saved")
            
            
    def get_channel_all_video_ids_(self):
        """Getter method for `self.channel_features['all_video_ids']`"""
        try:
            return self.channel_features['all_video_ids']
        except KeyError as e:
            logger.error(e)
            return None
    

    def get_video_features(self, video_id, part='snippet,contentDetails,status,statistics'):
        """Get video information"""
        path = f'videos?part={part}&id={video_id}'
        data = self.get_html_to_json(path)
        try:
            data_item = data['items'][0]
        except AttributeError: # 'NoneType' object has no attribute 'get'
            logger.error(f"Cannot get video ({video_id}) features from channel: {self.channel_features['channel_id']}")
            return
        except KeyError as e: # dict `data` does not contain key: "items"
            logger.error(e)
            return

        try:
            topic_categories = data_item.get('topicDetails')['topicCategories']
        except TypeError:  # 'NoneType' object is not subscriptable
            topic_categories = None

        # Retrieve Video Features
        video_features = {}
        try:
            video_features = {
                # General
                'video_id': video_id,
                "title": data_item['snippet']['title'],
                "description": data_item['snippet']['description'],
                "published_at": data_item['snippet']['publishedAt'],
                "crawled_datetime": datetime.now().isoformat(),
                "channel_id": data_item['snippet']['channelId'],
                "channel_title": data_item['snippet']['channelTitle'],

                # Special attributes
                "thumbnail_default_url": data_item['snippet']['thumbnails']['default']['url'],
                "thumbnail_medium_url": data_item['snippet']['thumbnails']['medium']['url'],
                "thumbnail_high_url": data_item['snippet']['thumbnails']['high']['url'],
                "tags": data_item['snippet'].get('tags'), # optional data (None if error)
                "category_id": data_item['snippet']['categoryId'],
                "default_language": data_item['snippet'].get('defaultLanguage'), # optional data (None if error)
                "default_audio_language": data_item['snippet'].get('defaultAudioLanguage'), # optional data (None if error)
                "topic_categories": topic_categories, # optional data (None if error)

                # Content Details
                "duration": data_item['contentDetails']['duration'], # ISO 8601 duration (PT#M#S, PT#H#M#S, P#DT#H#M#S)
                "definition": data_item['contentDetails']['definition'],
                "caption": data_item['contentDetails']['caption'],

                # Status
                "upload_status": data_item['status']['uploadStatus'],
                "embeddable": data_item['status']['embeddable'],
                "privacy_status": data_item['status']['privacyStatus'],

                # Statistics
                'like_count': data_item['statistics'].get('likeCount'), # optional data (None if error)
                'view_count': data_item['statistics']['viewCount'],
                'comment_count': data_item['statistics'].get('commentCount'), # optional data (None if error)
            }
        except Exception as e:
            logger.error(e)
            logger.error(data_item)
        
        # Save Video Features to object
        self.videos.append(video_features)
    

    def get_video_all_comments(self, video_id, part='snippet,replies', max_results=100):
        """Get all comments & their replies from a given video"""
        
        next_page_token = ""
        comments: list[dict] = []
        
        # Define a helper function for later use
        def _get_author_channel_id(comment):
            """Helper function to get authorChannelId, if any. Otherwise, return `None`"""
            try:
                return comment['snippet'].get('authorChannelId')['value']
            except TypeError:  # 'NoneType' object is not subscriptable
                return None
        
        # Define function to be mapped and run in parallel
        def get_comments_and_replies(data_item, comments):
            top_comment = data_item['snippet']['topLevelComment']
            
            # Retrieve Comments
            comment_features = {
                "comment_id": top_comment['id'],
                "video_id": video_id,
                "parent_id": None,
                "reply_count": data_item['snippet']['totalReplyCount'],
                "text_original": top_comment['snippet']['textOriginal'],
                "like_count": top_comment['snippet']['likeCount'],
                "published_at": top_comment['snippet']['publishedAt'],
                "updated_at": top_comment['snippet']['updatedAt'],
                "crawled_datetime": datetime.now().isoformat(),
                "author_channel_id": _get_author_channel_id(top_comment),
                "author_display_name": top_comment['snippet']['authorDisplayName']}
            comments.append(comment_features)
            
            # Retrieve Replies (if any)
            if comment_features["reply_count"] > 0:
                # Retrieve all replies for this comment (in background multi-threads)
                for reply in data_item['replies']['comments']:
                    reply_features = {
                        "comment_id": reply['id'],
                        "video_id": video_id,
                        "parent_id": reply['snippet']['parentId'],
                        "reply_count": None,
                        "text_original": reply['snippet']['textOriginal'],
                        "like_count": reply['snippet']['likeCount'],
                        "published_at": reply['snippet']['publishedAt'],
                        "updated_at": reply['snippet']['updatedAt'],
                        "crawled_datetime": datetime.now().isoformat(),
                        "author_channel_id": _get_author_channel_id(reply),
                        "author_display_name": reply['snippet']['authorDisplayName']}
                    comments.append(reply_features)

        
        # Main function logic
        data_list = []
        while True:
            # Fetch commentThreads data from next page
            path = f'commentThreads?part={part}&videoId={video_id}&maxResults={max_results}&pageToken={next_page_token}'
            data = self.get_html_to_json(path)
            
            if data == "COMMENTS DISABLED":
                logger.error(f"Comments are disabled for video {video_id} (channel: {self.channel_features['title']})")
                continue
            
            try:
                data_item = data['items'][0]
            except Exception as e: # 'NoneType' object has no attribute 'get', 'NoneType' object is not subscriptable
                logger.error(f"Cannot get comments' features from video {video_id} (channel: {self.channel_features['title']})")
                logger.error(e)
            else:
                # Save commentThreads data in this page
                data_list.append(data)
            
            # Stop fetching if no more next page
            next_page_token = data.get('nextPageToken', '')
            if not next_page_token:
                break
        
        # Retrieve all comments and their replies (in background multi-threads)
        with concurrent.futures.ThreadPoolExecutor(max_workers=100) as executor:
            all_futures = [executor.submit(functools.partial(get_comments_and_replies, comments=comments), data_item) for data in data_list for data_item in data['items']]
        
        # Make sure all comments are fetched
        concurrent.futures.wait(all_futures)
    
        # Save Comments to object
        self.comments.append((video_id, comments))
    
    
    def save_video_features_to_json(self, video_save_path):
        """Save video_features to external json file"""
        filename = f"{self.channel_features['channel_id']}_videos_new.json"
        save_path = os.path.join(video_save_path, filename)
        with open(save_path, "w") as json_file:
            json.dump(self.videos, json_file)
        logger.info(f"{self}: video features saved")
            
            
    def save_comment_features_to_json(self, comment_save_path):
        """Save comment_features to external json file"""
        filename = f"{self.channel_features['channel_id']}_comments_new.json"
        save_path = os.path.join(comment_save_path, filename)
        with open(save_path, "w") as json_file:
            json.dump(self.comments, json_file)
        logger.info(f"{self}: comment features saved")

# Scrape YouTube

In [None]:
# skiprows = 45 # 30,15,0 # rows already scraped
# nrows =  None # 15,15,15 # rows to scrape
# youtube_spider_cluster = YoutubeSpiderCluster(YOUTUBE_API_KEY_1)
# youtube_spider_cluster.load_channel_ids("data/TopTaiwanYoutubers.csv", nrows=nrows, skiprows=skiprows)
# youtube_spider_cluster.scrape_youtuber_channels()
# youtube_spider_cluster.scrape_youtuber_videos()

In [None]:
# skiprows = #45,30,15,0 # rows already scraped
# nrows = #15,15,15,15 # rows to scrape
# youtube_spider_cluster = YoutubeSpiderCluster(YOUTUBE_API_KEY_1)
# youtube_spider_cluster.load_channel_ids("data/TopTaiwanYoutubers_1.csv", nrows=nrows, skiprows=skiprows)
# youtube_spider_cluster.scrape_youtuber_channels()
# youtube_spider_cluster.scrape_youtuber_videos()

In [None]:
# skiprows = 45 # 30 # 15 # 0 # rows already scraped
# nrows = 15 # 15 # 15 # 15 # rows to scrape
# youtube_spider_cluster = YoutubeSpiderCluster(YOUTUBE_API_KEY_1)
# youtube_spider_cluster.load_channel_ids("data/TopTaiwanYoutubers_2.csv", nrows=nrows, skiprows=skiprows)
# youtube_spider_cluster.scrape_youtuber_channels()
# youtube_spider_cluster.scrape_youtuber_videos()

In [3]:
skiprows = 0 # rows already scraped
nrows = None # rows to scrape
youtube_spider_cluster = YoutubeSpiderCluster(YOUTUBE_API_KEY_1)
youtube_spider_cluster.load_channel_ids("data/MissingYoutubers.csv", nrows=nrows, skiprows=skiprows)
youtube_spider_cluster.scrape_youtuber_channels()
youtube_spider_cluster.scrape_youtuber_videos()

2023-05-14 09:41:44,573:__main__:139928976127744:ThreadPoolExecutor-0_0:Start scraping Channel Info: 【雙聲類】小光
2023-05-14 09:41:44,575:__main__:139928893650688:ThreadPoolExecutor-0_1:Start scraping Channel Info: 一隻阿圓 I am CIRCLE
2023-05-14 09:41:44,576:__main__:139928885257984:ThreadPoolExecutor-0_2:Start scraping Channel Info: 我是老爸,我不要當爸!
2023-05-14 09:41:44,580:__main__:139928876865280:ThreadPoolExecutor-0_3:Start scraping Channel Info: 阿晋
2023-05-14 09:41:44,635:__main__:139928868472576:ThreadPoolExecutor-0_4:Start scraping Channel Info: 館長惡名昭彰
2023-05-14 09:41:44,703:__main__:139928893650688:ThreadPoolExecutor-0_1: QUOTA EXCEEDED YoutubeSpider(None) (#00)
2023-05-14 09:41:44,704:__main__:139928976127744:ThreadPoolExecutor-0_0: QUOTA EXCEEDED YoutubeSpider(None) (#00)
2023-05-14 09:41:44,708:__main__:139928876865280:ThreadPoolExecutor-0_3: QUOTA EXCEEDED YoutubeSpider(None) (#00)
2023-05-14 09:41:44,717:__main__:139928868472576:ThreadPoolExecutor-0_4: QUOTA EXCEEDED YoutubeSpider(None

Note that files like **<i>TopTaiwanYoutubers.csv<i/>** that are fed into the `.load_channel_id` method is manually collected.<br>
It has organized inputs in the desired format for the above Youttube spiders.<br>

Below is an example.

In [6]:
pd.read_csv("data/TopTaiwanYoutubers.csv", names=["Youtuber", "Link"])

Unnamed: 0,Youtuber,Link
0,葉式特工 Yes Ranger,https://www.youtube.com/channel/UCXpxKdZAiyUEa...
1,這群人TGOP,https://www.youtube.com/channel/UC6FcYHEm7SO1j...
2,Pan Piano,https://www.youtube.com/channel/UCI7ktPB6toquc...
3,阿神,https://www.youtube.com/channel/UCnJEWsS5agXCk...
4,阿滴英文,https://www.youtube.com/channel/UCeo3JwE3HezUW...
5,蔡阿嘎,https://www.youtube.com/channel/UCPwxSX0DYDMQx...
6,Joeman,https://www.youtube.com/channel/UCPRWWKG0VkBA0...
7,Ru's Piano Ru味春捲,https://www.youtube.com/channel/UCAYrMNl92jw6c...
8,木曜4超玩,https://www.youtube.com/channel/UCLW_SzI9txZvt...
9,黃氏兄弟,https://www.youtube.com/channel/UCV_S2S-Zs8Leu...
