In [39]:
import pandas as pd

In [40]:
from googleapiclient.discovery import build

In [41]:
from IPython.display import JSON

In [42]:
api_key  = 'AIzaSyBJ3vWcb9XvN4u5wh5yNJIbdrfEbgfS7Ss'

In [43]:
blackpink_id = "UCOmHUn--16B90oW2L6FRR3A"

In [57]:
bts_id = "UCLkAepWjdylmXSltofFvsYQ"

In [44]:
api_service_name = "youtube"
api_version = "v3"

# Get credentials and create an API client
youtube = build(
    api_service_name, api_version, developerKey=api_key)

request = youtube.channels().list(
    part="snippet,contentDetails,statistics",
    id=blackpink_id
)
response = request.execute()

JSON(response)

<IPython.core.display.JSON object>

In [55]:
def get_channel_stats(youtube, channel_ids):
    """
    Get channel statistics: title, subscriber count, view count, video count, upload playlist
    Params:
    
    youtube: the build object from googleapiclient.discovery
    channels_ids: list of channel IDs
    
    Returns:
    Dataframe containing the channel statistics for all channels in the provided list: title, subscriber count, view count, video count, upload playlist
    
    """
    all_data = []
    request = youtube.channels().list(
                part='snippet,contentDetails,statistics',
                id=','.join(channel_ids))
    response = request.execute() 
    
    for i in range(len(response['items'])):
        data = dict(channelName = response['items'][i]['snippet']['title'],
                    subscribers = response['items'][i]['statistics']['subscriberCount'],
                    views = response['items'][i]['statistics']['viewCount'],
                    totalVideos = response['items'][i]['statistics']['videoCount'],
                    playlistId = response['items'][i]['contentDetails']['relatedPlaylists']['uploads'])
        all_data.append(data)
    
    return pd.DataFrame(all_data)

def get_video_ids(youtube, playlist_id):
    """
    Get list of video IDs of all videos in the given playlist
    Params:
    
    youtube: the build object from googleapiclient.discovery
    playlist_id: playlist ID of the channel
    
    Returns:
    List of video IDs of all videos in the playlist
    
    """
    
    request = youtube.playlistItems().list(
                part='contentDetails',
                playlistId = playlist_id,
                maxResults = 50)
    response = request.execute()
    
    video_ids = []
    
    for i in range(len(response['items'])):
        video_ids.append(response['items'][i]['contentDetails']['videoId'])
        
    next_page_token = response.get('nextPageToken')
    more_pages = True
    
    while more_pages:
        if next_page_token is None:
            more_pages = False
        else:
            request = youtube.playlistItems().list(
                        part='contentDetails',
                        playlistId = playlist_id,
                        maxResults = 50,
                        pageToken = next_page_token)
            response = request.execute()
    
            for i in range(len(response['items'])):
                video_ids.append(response['items'][i]['contentDetails']['videoId'])
            
            next_page_token = response.get('nextPageToken')
        
    return video_ids

def get_video_details(youtube, video_ids):
    """
    Get video statistics of all videos with given IDs
    Params:
    
    youtube: the build object from googleapiclient.discovery
    video_ids: list of video IDs
    
    Returns:
    Dataframe with statistics of videos, i.e.:
        'channelTitle', 'title', 'description', 'tags', 'publishedAt'
        'viewCount', 'likeCount', 'favoriteCount', 'commentCount'
        'duration', 'definition', 'caption'
    """
        
    all_video_info = []
    
    for i in range(0, len(video_ids), 50):
        request = youtube.videos().list(
            part="snippet,contentDetails,statistics",
            id=','.join(video_ids[i:i+50])
        )
        response = request.execute() 

        for video in response['items']:
            stats_to_keep = {'snippet': ['channelTitle', 'title', 'description', 'tags', 'publishedAt'],
                             'statistics': ['viewCount', 'likeCount', 'favouriteCount', 'commentCount'],
                             'contentDetails': ['duration', 'definition', 'caption']
                            }
            video_info = {}
            video_info['video_id'] = video['id']

            for k in stats_to_keep.keys():
                for v in stats_to_keep[k]:
                    try:
                        video_info[v] = video[k][v]
                    except:
                        video_info[v] = None

            all_video_info.append(video_info)
            
    return pd.DataFrame(all_video_info)

def get_comments_in_videos(youtube, video_ids):
    """
    Get top level comments as text from all videos with given IDs (only the first 10 comments due to quote limit of Youtube API)
    Params:
    
    youtube: the build object from googleapiclient.discovery
    video_ids: list of video IDs
    
    Returns:
    Dataframe with video IDs and associated top level comment in text.
    
    """
    all_comments = []
    
    for video_id in video_ids:
        try:   
            request = youtube.commentThreads().list(
                part="snippet,replies",
                videoId=video_id
            )
            response = request.execute()
        
            comments_in_video = [comment['snippet']['topLevelComment']['snippet']['textOriginal'] for comment in response['items'][0:10]]
            comments_in_video_info = {'video_id': video_id, 'comments': comments_in_video}

            all_comments.append(comments_in_video_info)
            
        except: 
            # When error occurs - most likely because comments are disabled on a video
            print('Could not get comments for video ' + video_id)
        
    return pd.DataFrame(all_comments)   

In [58]:
channel_ids = [blackpink_id, bts_id]

In [59]:
channel_data = get_channel_stats(youtube, channel_ids)


In [60]:
channel_data

Unnamed: 0,channelName,subscribers,views,totalVideos,playlistId
0,BLACKPINK,91900000,33567618140,585,UUOmHUn--16B90oW2L6FRR3A
1,BANGTANTV,76700000,21574999725,2365,UULkAepWjdylmXSltofFvsYQ


In [61]:
video_df = pd.DataFrame()
comments_df = pd.DataFrame()

for c in channel_data['channelName'].unique():
    print("Getting video information from channel: " + c)
    playlist_id = channel_data.loc[channel_data['channelName']== c, 'playlistId'].iloc[0]
    video_ids = get_video_ids(youtube, playlist_id)
    
    # get video data
    video_data = get_video_details(youtube, video_ids)
    # get comment data
    # comments_data = get_comments_in_videos(youtube, video_ids)

    # append video data together and comment data toghether
    # video_df = video_df.append(video_data, ignore_index=True)
    # comments_df = comments_df.append(comments_data, ignore_index=True)

Getting video information from channel: BLACKPINK
Could not get comments for video OqEq4tOSpZI
Could not get comments for video 8hOo7Ms5Syk
Could not get comments for video rKltTMnp4G4


AttributeError: 'DataFrame' object has no attribute 'append'

In [63]:
video_data

Unnamed: 0,video_id,channelTitle,title,description,tags,publishedAt,viewCount,likeCount,favouriteCount,commentCount,duration,definition,caption
0,6lw4Cbk1IzA,BLACKPINK,BLACKPINK - ‘B.P.M.’ Last Roll,#BLACKPINK #블랙핑크 #BPM #BORN_PINK_MEMORIES #Las...,"[YG Entertainment, YG, 와이지, K-pop, BLACKPINK, ...",2023-10-20T09:00:04Z,1970401,238076,,14716,PT14M34S,hd,true
1,Yl_rmlS4u9s,BLACKPINK,BLACKPINK - ‘B.P.M.’ Roll #33,#BLACKPINK #블랙핑크 #BPM #BORN_PINK_MEMORIES #Rol...,"[YG Entertainment, YG, 와이지, K-pop, BLACKPINK, ...",2023-10-13T09:00:21Z,1331648,135882,,4464,PT13M6S,hd,true
2,q9lQG1shW6o,BLACKPINK,JENNIE - 'You & Me' Behind The Scenes,#JENNIE #제니 #BLACKPINK #블랙핑크 #SpecialSingle #Y...,"[YG Entertainment, YG, 와이지, K-pop, BLACKPINK, ...",2023-10-13T04:00:36Z,1737862,197662,,5359,PT2M44S,hd,true
3,OCxrWKk-Jbw,BLACKPINK,JENNIE - ‘You & Me (Jazz ver.)’ LIVE CLIP HIGH...,#JENNIE #제니 #BLACKPINK #블랙핑크 #SpecialSingle #Y...,,2023-10-11T04:30:46Z,5487250,588069,,5723,PT15S,hd,false
4,ZgLlabyz7oY,BLACKPINK,JENNIE - ‘You & Me (Jazz ver.)’ LIVE CLIP,"작사 : TEDDY\n작곡 : TEDDY, 24, Vince\n편곡 : Nohc\n...","[YG Entertainment, YG, 와이지, K-pop, BLACKPINK, ...",2023-10-11T04:00:28Z,6447572,675143,,24538,PT2M31S,hd,false
...,...,...,...,...,...,...,...,...,...,...,...,...,...
580,cxbZuoF3RuQ,BLACKPINK,BLACKPINK - '휘파람 (WHISTLE)' M/V BEHIND THE SCENES,Available @ https://BLACKPINK.lnk.to/SQUAREONE...,,2016-08-08T11:33:16Z,4284079,262621,,4873,PT3M16S,hd,false
581,uy2Yc5tt5ns,BLACKPINK,BLACKPINK - '붐바야 (BOOMBAYAH)' M/V BEHIND THE S...,Available @ https://BLACKPINK.lnk.to/SQUAREONE...,,2016-08-08T11:33:13Z,7064903,456943,,8912,PT3M56S,hd,false
582,dISNgvVpWlo,BLACKPINK,BLACKPINK - '휘파람 (WHISTLE)' M/V,BLACKPINK - 휘파람 (WHISTLE)\n\nHey boy\nMake’ em...,"[BLACKPINK, 휘파람, WHISTLE, ‎DEBUTSINGLE, SQUARE...",2016-08-08T11:17:08Z,868331143,9996042,,616978,PT3M51S,hd,true
583,bwmSjveL3Lc,BLACKPINK,BLACKPINK - '붐바야 (BOOMBAYAH)' M/V,BLACKPINK - 붐바야 (BOOMBAYAH)\n\nBLACKPINK in yo...,"[BLACKPINK, 붐바야, ‎BOOMBAYAH, ‎SQUAREONE]",2016-08-08T11:17:04Z,1645899401,16224667,,1124016,PT4M4S,hd,true


In [65]:
comments_data

Unnamed: 0,video_id,comments
0,6lw4Cbk1IzA,"[مافهمت بيجددون العقد ومافب ترجمه, Давайте доб..."
1,Yl_rmlS4u9s,[I'm in Greece hello Blackpink love Blackpink...
2,q9lQG1shW6o,[JENNIE - ‘You & Me’ DANCE PERFORMANCE VIDEO\n...
3,OCxrWKk-Jbw,"[❤❤❤, Hello black, I see you from Mexico🇲🇽 hap..."
4,ZgLlabyz7oY,[JENNIE - ‘You & Me’ DANCE PERFORMANCE VIDEO\n...
...,...,...
577,cxbZuoF3RuQ,[HOW THIS IS VERY FAST YOU ARE REALLY BORN QUE...
578,uy2Yc5tt5ns,[WHO WILL SUPPORT OUR BORN QUEENS ALWAYS RAISE...
579,dISNgvVpWlo,[The fact that they almost didn't release this...
580,bwmSjveL3Lc,[se dan cuenta que blacpink bino tambien en u...


In [67]:
comments_data['comments'][0]

['مافهمت بيجددون العقد ومافب ترجمه',
 'Давайте добьем им 92 миллиона?❤ и 50 к лайков?!💕',
 'بلاكبيك واو واوواواوا',
 '더 이상 존재하지 않을 것 같아서 울고 있어요 블랙핑크 나를 케이팝의 길로 데려왔다. 울고 있다. 별을 잃고 싶지 않다..',
 "I'm  so happy for all you girls. Love you very  much❤❤❤",
 '🥹😭😭',
 'Las voy a extrañar mucho mis niñas las amo gracias por los años tan bonitos q me regalaron de la emoción de verlas a través de la pantalla no se vayan no sé si,si renovaron pero solo quería decirles q las amo tanto las tendré siempre presentes y si se van seguiré siendo BLINK,BLINK hasta la muerte las amo jisoo Jennie lisa  rose mi bias siempre rose las amo chicas 💗💗💞💞estoy muy orgullosa de ustedes no dejen q nada las apague BLACKPINK in your Área ❤✨💗💖💝',
 'بلينك للابد',
 'Jisoo My queen!🥰',
 'people who still remember when he would try to look at BLACKPINK weird and lick his lips 4:00']