In [None]:
from googleapiclient.discovery import build
import time
import pandas as pd
import json

In [None]:
API_KEY = 'API_KEY'

In [None]:
# Create YouTube resource object
youtube = build('youtube', 'v3', developerKey=API_KEY)

In [None]:
# Load videos to download
df = pd.read_csv('./data/vdoLinks.csv')

In [None]:
# Create index
df.set_index(df['youtubeId'], inplace=True)
df.index.name = None

In [None]:
# Get the list of IDs
video_ids = df.index.to_list()

In [None]:
# Variable to store the videos data
videos = []

In [None]:
def download_video_data(vid: str):
    """
    Download video data from YouTube API
    ...
    Parameters
    ----------
    vid : str
        The video ID to download data from YouTube API
    """

    # Get video details
    description = None
    view_count = None
    like_count = None
    dislike_count = None
    comment_count = None
    duration = None
    favorite_count = None
    error_message = None
    is_error = False
    comments = []

    # Download video data
    try:
        video_data = youtube.videos().list( part='snippet,contentDetails,statistics', id=vid ).execute()
        video_data = video_data['items'][0]
        description = video_data['snippet'].get('description', 0)
        view_count = video_data['statistics'].get('viewCount', 0)
        like_count = video_data['statistics'].get('likeCount', 0)
        dislike_count = video_data['statistics'].get('dislikeCount', 0)
        comment_count = video_data['statistics'].get('commentCount', 0)
        duration = video_data['contentDetails'].get('duration', 0)
        favorite_count = video_data['statistics'].get('favoriteCount', 0)
    except Exception as ex:
        error_message = str(ex)
        is_error = True

    # Dowload comments only if there is no error
    if is_error == False:
        try:
            video_comments = youtube.commentThreads().list(part='snippet,replies', videoId=vid, maxResults=1000).execute()
            for item in video_comments['items']:
                comment = item['snippet']['topLevelComment']['snippet']['textDisplay']
                comments.append(comment)
        except Exception as ex:
            error_message = str(ex)

    return {
        "id": vid,
        "description": description,
        "view_count": view_count,
        "like_count": like_count,
        "dislike_count": dislike_count,
        "comment_count": comment_count,
        "duration": duration,
        "favorite_count": favorite_count,
        "comments": comments,
        "error": error_message
    }

#download_video_data('QuV__efzdVo')

In [None]:
# Remove from the list the videos that have already been downloaded
last_video = video_ids.index('mzbe0LCDMbg') # First 5600 videos have already been downloaded
video_ids = video_ids[last_video+1:]

In [None]:
count = 0
for id in video_ids:

    print(f'Downloading [{count + 1}] ID: {id}...')

    count = count + 1
    item = download_video_data(id)
    videos.append(item)

    # Every 100 videos save the data
    if count == 100:
        count = 0
        # Save as json
        with open(f'./data/result.json', 'w') as json_file:
            json.dump(videos, json_file, indent=4)

        time.sleep(10)


In [34]:
# Load videos data
videos_df = pd.read_json(f'./data/v5600.json')
videos_df.head()

Unnamed: 0,id,description,view_count,like_count,dislike_count,comment_count,duration,favorite_count,comments,error
0,K26_sDKnvMU,Trailer for Toy Story (1995) captured from the...,116545.0,150.0,0.0,0.0,PT1M31S,0.0,[],<HttpError 403 when requesting https://youtube...
1,3LPANjHlPxo,Jumanji Trailer 1995\nDirector: Joe Johnston\n...,196593.0,286.0,0.0,0.0,PT2M48S,0.0,[],<HttpError 403 when requesting https://youtube...
2,rEnOoWs3FuA,"The more things change, the more they stay the...",221365.0,220.0,0.0,13.0,PT1M52S,0.0,"[Buena película de comedia romántica, <a href=...",
3,j9xml1CxgXI,Whitney Houston and Angela Bassett star in thi...,433261.0,0.0,0.0,0.0,PT2M40S,0.0,[],<HttpError 403 when requesting https://youtube...
4,ltwvKLnj1B4,,,,,,,,[],list index out of range
