In [1]:
from googleapiclient.discovery import build
import time
import pandas as pd
import json

In [2]:
API_KEY = 'API_KEY'

In [3]:
# Create YouTube resource object
youtube = build('youtube', 'v3', developerKey=API_KEY)

In [35]:
# Load videos to download
df = pd.read_csv('./data/vdoLinks.csv')

In [36]:
# Create index
df.set_index(df['youtubeId'], inplace=True)
df.index.name = None

In [37]:
# Get the list of IDs
video_ids = df.index.to_list()

In [8]:
# Variable to store the videos data
videos = []

In [9]:
def download_video_data(vid: str):
  """
  Download video data from YouTube API
  ...
  Parameters
  ----------
  vid : str
      The video ID to download data from YouTube API
  """

  # Get video details
  description = None
  view_count = None
  like_count = None
  dislike_count = None
  comment_count = None
  duration = None
  favorite_count = None
  error_message = None
  is_error = False
  comments = []

  # Download video data
  try:
      video_data = youtube.videos().list( part='snippet,contentDetails,statistics', id=vid ).execute()
      video_data = video_data['items'][0]
      description = video_data['snippet'].get('description', 0)
      view_count = video_data['statistics'].get('viewCount', 0)
      like_count = video_data['statistics'].get('likeCount', 0)
      dislike_count = video_data['statistics'].get('dislikeCount', 0)
      comment_count = video_data['statistics'].get('commentCount', 0)
      duration = video_data['contentDetails'].get('duration', 0)
      favorite_count = video_data['statistics'].get('favoriteCount', 0)
  except Exception as ex:
      error_message = str(ex)
      print(error_message)
      is_error = True

  # Dowload comments only if there is no error
  if is_error == False:
      try:
          video_comments = youtube.commentThreads().list(part='snippet,replies', videoId=vid, maxResults=1000).execute()
          for item in video_comments['items']:
              comment = item['snippet']['topLevelComment']['snippet']['textDisplay']
              comments.append(comment)
      except Exception as ex:
          error_message = str(ex)

      return {
          "id": vid,
          "description": description,
          "view_count": view_count,
          "like_count": like_count,
          "dislike_count": dislike_count,
          "comment_count": comment_count,
          "duration": duration,
          "favorite_count": favorite_count,
          "comments": comments,
          "error": error_message
      }
  return None

#download_video_data('QuV__efzdVo')

In [38]:
# Remove from the list the videos that have already been downloaded
last_video = video_ids.index("Ymwe4DB_HsU") # Videos have been downloaded before this
print(f'Last video: {last_video}')
video_ids = video_ids[last_video+1:]

Last video: 22997


In [39]:
len(video_ids)

2625

In [32]:
count = 0
for id in video_ids:

  print(f'Downloading [{count + 1}] ID: {id}...')

  count = count + 1
  item = download_video_data(id)
  if item is not None:
    videos.append(item)

    # Every 100 videos save the data
    if len(videos) % 100 == 0:
        print(f'Saving {len(videos)} videos...')
        # Save as json
        with open(f'./data/result.json', 'w') as json_file:
            json.dump(videos, json_file, indent=4)

        time.sleep(10)
  else:
    print("Item is None")


In [None]:
# store in case final videos is not divisible by 100
with open(f'./data/result.json', 'w') as json_file:
  json.dump(videos, json_file, indent=4)

In [27]:
# Load videos data
videos_df = pd.read_json(f'./data/result.json')
print(videos_df.shape)
videos_df.head()

(4106, 10)


Unnamed: 0,id,description,view_count,like_count,dislike_count,comment_count,duration,favorite_count,comments,error
0,KTmrUXNxwWI,,64930,142,0,2,PT1M9S,0,[i dont cry much but after watching this movie...,
1,8u99lWnxd2E,Friends and family of Edie Sedgwick discuss wh...,296236,3046,0,608,PT28M22S,0,"[Factory girl, Gia, sad, good movies, Just fas...",
2,OkhvmiN4lUY,"For 15-year-old Trife, life is a day-to-day st...",120858,0,0,2,PT2M6S,0,"[whas the song called?, Quality film]",
3,C6err100S78,Like Mike 2 trailer,1680546,4862,0,0,PT3M,0,[],<HttpError 403 when requesting https://youtube...
4,nVZbipz5zLo,Considerato un modello per il genere film-inch...,144052,409,0,12,PT4M20S,0,[Grande uomo salvatore giuliano ha lottato per...,


In [33]:
videos_df.tail()

Unnamed: 0,id,description,view_count,like_count,dislike_count,comment_count,duration,favorite_count,comments,error
4101,6fDAap9JDwA,Subscribe to TRAILERS: http://www.youtube.com/...,115455,289,0,28,PT1M57S,0,[],<HttpError 403 when requesting https://youtube...
4102,v5twP_19CEA,"funny sci-fi/horror movie from 1964, directed ...",429161,1717,0,752,PT1H16M33S,0,[],<HttpError 403 when requesting https://youtube...
4103,AtnCWxhYyEE,Subscribe to TRAILERS: http://bit.ly/sxaw6h\nS...,140434,395,0,158,PT46S,0,[],<HttpError 403 when requesting https://youtube...
4104,DuynkTM0zCo,A priggish dean's assistant at the local unive...,3899,0,0,0,PT2M19S,0,[],<HttpError 403 when requesting https://youtube...
4105,Ymwe4DB_HsU,Streaming Now on Disney+ – Sign Up at https://...,14446666,8044,0,435,PT33S,0,[],<HttpError 403 when requesting https://youtube...
