In [13]:
"""
Download all comments of a given YouTube channel! This will generate a txt file with all comments of all videos in a given channel
"""

from apiclient.errors import HttpError
from apiclient.discovery import build

YOUTUBE_API_SERVICE_NAME = "youtube"
YOUTUBE_API_VERSION = "v3"
DEVELOPER_KEY = "YOUR YOUTUBE DEVELOPER KEY"


In [25]:
#Get all comments of a given video_id with out replies
def get_comments(youtube, video_id, comments):
    threads = []
    results = youtube.commentThreads().list(
        part="snippet",
        videoId=video_id,
        textFormat="plainText",
    ).execute()

    #Get the first set of comments
    for item in results["items"]:
        threads.append(item)
        comment = item["snippet"]["topLevelComment"]
        text = comment["snippet"]["textDisplay"]
        comments.append(' '.join(text.replace('\n',' ').split()))

    #Keep getting comments from the following pages
    while ("nextPageToken" in results):
        results = youtube.commentThreads().list(
        part="snippet",
        videoId=video_id,
        pageToken=results["nextPageToken"],
        textFormat="plainText",
        ).execute()
        for item in results["items"]:
            threads.append(item)
            comment = item["snippet"]["topLevelComment"]
            text = comment["snippet"]["textDisplay"]
            comments.append(' '.join(text.replace('\n',' ').split()))

    return threads

#Get all replies of a comment and returns all comments including previously downloaded
def get_comment_replies(youtube, parent_comment_id, comments):
    results = youtube.comments().list(
        part="snippet",
        parentId=parent_comment_id,
        textFormat="plainText"
    ).execute()

    for item in results["items"]:
        text = item["snippet"]["textDisplay"]
        comments.append(text)

    return comments

#Gets all videos in a channel
def get_channel_videos(channel_id):
    
    res = youtube.channels().list(id=channel_id, 
                                  part='contentDetails').execute()
    playlist_id = res['items'][0]['contentDetails']['relatedPlaylists']['uploads']
    
    videos = []
    next_page_token = None
    
    while True:
        res = youtube.playlistItems().list(playlistId=playlist_id, 
                                           part='snippet', 
                                           maxResults=50,
                                           pageToken=next_page_token).execute()
        videos += res['items']
        next_page_token = res.get('nextPageToken')
        
        if next_page_token is None:
            break
    
    return videos


#Read data from given txt file and return all video_ids as an array
def get_accessed_videos(ACCESSED_VIDEOS_PATH):
    with open(ACCESSED_VIDEOS_PATH, "r", encoding="utf-8") as file:
        vid_ids = file.readlines()
    return [video_id.replace('\n','') for video_id in vid_ids]
    



In [29]:
if __name__ == "__main__":    
    #Copy the channel id you want scrap and assign it to variable "channel_id"
    channel_id = 'UCO1u_DqxUJQJ7xSUqbXMDHQ'
    
    OUTPUTFILE = "data/output_file.txt"
    ACCESSED_VIDEOS_PATH = "data/accessed_videos.txt"
    
    youtube = build(YOUTUBE_API_SERVICE_NAME, YOUTUBE_API_VERSION, developerKey=DEVELOPER_KEY)
    
    ACCESSED_VIDEOS = get_accessed_videos(ACCESSED_VIDEOS_PATH)
    videos = get_channel_videos(channel_id)
    videos_ids = []
    for video in videos:
        videos_ids.append(video['snippet']['resourceId']['videoId'])
        #if you want to filter by video upload date
        #if video['snippet']['publishedAt'].startswith(("2019-12","2020-01","2019-11","2020-02")):
            #video_ids.append(video['snippet']['resourceId']['videoId'])
        
    output_file = open(OUTPUTFILE, "w", encoding="utf-8")
    i = length = 0
    for video_id in videos_ids:
        i += 1
        if video_id not in ACCESSED_VIDEOS:
            try:            
                comments = []
                video_comments = get_comments(youtube, video_id, comments)
                #If you want to get replies in a comment uncomment the following 2 lines: (note it might cause your API limit)
                #for comment in video_comments:
                    #get_comment_replies(youtube, comment["id"], comments)

                output_file.write('\n'.join(comments))
                output_file.write('\n')
                length += len(comments)
                print("Video %d/%d:\tVID:%s\tcurrent Video's Comments: %d\tTotal comments: %d" %(i, len(videos_ids), video_id ,len(video_comments),length) )
                
            except HttpError as e:
                if not "disabled comments" in str(e.content):
                    print("An HTTP error %d occurred:\n%s" % (e.resp.status, e.content))
                    print("\n\n\n------------\n\n\n")
                    print("Not finished... Your API KEY has exceeded the limit")
                    print("Id" + video_id)
                    break
                else:
                    print("Comments disabled.... Could not download comments from %s", video_id)
                
        else:
             print("Video %d/%d:\tTotal comments: %d \tVIDEO COMMENTS ALREADY SCRAPED: %s" %(i, len(videos_ids),length,video_id))
        ACCESSED_VIDEOS.append(video_id)
    output_file.close()
    #Save all video ids already downloaded to a file 
    with open(ACCESSED_VIDEOS_PATH, 'w') as file:
        file.write('\n'.join(ACCESSED_VIDEOS))

Video 1/52:	VID:A1qyu5Dkjig	current Video's Comments: 1113	Total comments: 1113
Video 2/52:	VID:yMwGD6Z1Om4	current Video's Comments: 140	Total comments: 1253
Video 3/52:	VID:H-Y1no_n4hk	current Video's Comments: 95	Total comments: 1348
Video 4/52:	VID:TZDSDmXStR4	current Video's Comments: 127	Total comments: 1475
Video 5/52:	VID:WvpHXm1lsmc	current Video's Comments: 424	Total comments: 1899
Video 6/52:	VID:qXRGKZ42Jtg	current Video's Comments: 229	Total comments: 2128
Video 7/52:	VID:yKuWc0ykKik	current Video's Comments: 485	Total comments: 2613
Video 8/52:	VID:OYtaCG1hYww	current Video's Comments: 110	Total comments: 2723
Video 9/52:	VID:DBZUwzsfBYA	current Video's Comments: 106	Total comments: 2829
Video 10/52:	VID:9okLawuIXng	current Video's Comments: 83	Total comments: 2912
Video 11/52:	VID:sj40pyI26YU	current Video's Comments: 149	Total comments: 3061
Video 12/52:	VID:CeV0vSwDjIw	current Video's Comments: 72	Total comments: 3133
Video 13/52:	VID:CJmtMCNxGe8	current Video's Commen