In [2]:
from googleapiclient.discovery import build
import pandas as pd
from IPython.display import JSON
import os

In [18]:
def get_channels(youtube, channels_ids, path):
    all_data = []
    
    request = youtube.channels().list(
    part="snippet,contentDetails,statistics",
    id=",".join(channels_ids))
    
    response = request.execute()
    
    # Loop through items
    for item in response["items"]:
        data={"channelName": item["snippet"]["title"], 
             "subscribers": item["statistics"]["subscriberCount"], 
             "views": item["statistics"]["viewCount"], 
             "totalViews":item["statistics"]["videoCount"], 
             "playlistId":item["contentDetails"]["relatedPlaylists"]["uploads"]}
        
        all_data.append(data)
    channels = pd.DataFrame(all_data)
    return channels, channels.to_csv(path + "all-channels.csv", index=None)

In [61]:
def get_video_ids(youtube, playlist_id):
    video_ids=[]
    request = youtube.playlistItems().list(
    part="snippet,contentDetails",
    playlistId=playlist_id, 
    maxResults =50
    )
    response = request.execute()
    for i in range(len(response['items'])):
        video_ids.append(response['items'][i]['contentDetails']['videoId'])
        
    next_page_token = response.get('nextPageToken')
    more_pages = True
    

    while more_pages:
        if next_page_token is None:
            more_pages = False
        else:
            request = youtube.playlistItems().list(
            part="snippet,contentDetails",
            playlistId=playlist_id, 
            maxResults =50, 
            pageToken = next_page_token)
            response = request.execute()
        
            for i in range(len(response['items'])):
                video_ids.append(response['items'][i]['contentDetails']['videoId'])
            
            next_page_token = response.get('nextPageToken')


    return video_ids

In [55]:
def get_video_details(youtube, videos_id):
    all_video_info = []
    
    for i in range(0, len(videos_id), 50):
        request = youtube.videos().list(
        part="snippet,contentDetails,statistics",
        id=",".join(videos_id[i:i+50])
        )
        response = request.execute()

        for video in response["items"]:
            stats_to_keep = {"snippet": ["channelTitle", "title", "description", "tags", "publishedAt"],
                             "statistics":["viewCount", "likeCount", "favoriteCount", "commentCount"],
                             "contentDetails": ["duration", "definition", "caption"]}
            video_info={}
            video_info["video_id"] = video["id"]

            for k in stats_to_keep.keys():
                for v in stats_to_keep[k]:
                    try:
                        video_info[v] = video[k][v]
                    except:
                        video_info[v] = None
            all_video_info.append(video_info)
    return pd.DataFrame(all_video_info)

In [5]:
def get_comments(youtube, videos_id):
    all_comments = []
    
    for video_id in videos_id:
        try:
            request = youtube.commentThreads().list(
            part="snippet,replies",
            videoId = video_id)
            response = request.execute()
            comments_in_video = [comment["snippet"]["topLevelComment"]["snippet"]["textOriginal"] for comment in response["items"]]
            comments_in_video_info = {"video_id": video_id, "comments": comments_in_video}

            all_comments.append(comments_in_video_info)
        except:
            print(f"Could not get comments for video {video_id}")
    return pd.DataFrame(all_comments)
    

In [None]:
def save(youtube, path, playlist_id):
    if os.path.exists(path):
        print(f"The file {path}already exists")
        return
    videos_id = get_video_ids(youtube, playlist_id)
    all_video_info = get_video_details(youtube, videos_id)
    
    comments = get_comments(youtube, videos_id)
    all_data = pd.concat([all_video_info, comments], axis=1)
    all_video_info.to_csv(path, index=None, encoding = "UTF-8")

In [2]:
def main():
    api_key = #insert here your api key
    api_service_name = "youtube"
    api_version = "v3"
    
    # Get credentials and create an API client
    youtube = build(api_service_name, api_version, developerKey=api_key)
    channels_ids=["UCnkp4xDOwqqJD7sSM3xdUiQ",   # Adam Neely
                  "UCLuHOqDilyLQT4NPXQuVN4Q",   # BenL evin
                  "UC4PIiYewI1YGyiZvgNlJNrA",   # CharlesCornell
                  "UCz2iUx-Imr6HgDC3zAFpjOw",   # DavidBennetPiano
                  "UCh-PyMficPzVAihCJkFJVAA",   # David Bruce Composer
                  "UCtmY49Zn4l0RMJnTWfV7Wsg",   # Jacob Collier
                  "UCa3DVlGH2_QhvwuWlPa6MDQ",   # Jaime Altozano
                  "UC8R8FRt1KcPiR-rtAflXmeg",   # Nahre Sol
                  "UCl_dlV_7ofr4qeP1drJQ-qg"]   # Tantacrul
    
    os.makedirs("data", exist_ok = True)
    channels, channels_df = get_channels(youtube, channels_ids,"data/")
    
    for index, row in channels.iterrows():
        print("Getting data from: ", row["channelName"])
        save(youtube, "data/" + row["channelName"].replace(" ","-") + ".csv", row["playlistId"])
        