In [3]:
#!pip install google-api-python-client
import googleapiclient.discovery
import traceback
from tools import load_api_cfg

# Set the API access attributes & get a handle to the API service
apicfg=load_api_cfg('cfg.json')
#if apicfg is not None:
youtube = googleapiclient.discovery.build(apicfg['service_name'], apicfg['version'], developerKey=apicfg['key'])




In [4]:
'''
  Fetches the channel identified by the provided channel ID
  Input: Channel ID
  Returns: Dictionary representing channel information
'''
def get_channel_info(channel_id):
  try:
    # Prepare the API request
    request = youtube.channels().list(
      part="snippet,contentDetails,statistics,status",
      id=channel_id)
    # Execute the API request
    response = request.execute()
    item = response["items"][0]

    # Extract from the response, the channel attributes that we are interested in
    data=dict()
    data["id"]=item["id"]
    data["name"]=item["snippet"]["title"]
    data["type"] = item["kind"]
    data["description"]=item["snippet"]["description"]
    data["playlistId"]=item["contentDetails"]["relatedPlaylists"]["uploads"]
    data["views"]=item["statistics"]["viewCount"]
    data["hidden_subscribers"]=item["statistics"]["hiddenSubscriberCount"]
    data["subscriber_count"] = None
    if data["hidden_subscribers"] == False:
      data["subscriber_count"]=item["statistics"]["subscriberCount"]
    data["status"]=item["status"]["privacyStatus"]
    data["video_count"] = item["statistics"]["videoCount"]
    return data
  except:
    print('Failed to fetch or process channel info from Youtube Data API v3')
    print(traceback.print_exc())


In [5]:
'''
  Fetches the playlists for given channel
  Input: Channel ID
  Returns: List of dictionaries, each representing playlist info for the given channel
'''
def get_playlist_info(channel_id):
  try:
    # Prepare the API request
    request = youtube.playlists().list(part="snippet",channelId=channel_id, maxResults=10)

    # Execute the API request
    response = request.execute()
    item = response["items"]

    # Extract from the response, the channel attributes that we are interested in
    play_lists = dict()
    for obj in response["items"]:
      data =dict()
      data["channelId"] = obj["snippet"]["channelId"]
      data["name"] = obj["snippet"]["title"]
      data["id"] = obj["id"]
      play_lists[data["id"]]=data
    return play_lists
  except:
    print('Failed to fetch or process playlist info from Youtube Data API v3')
    print(traceback.print_exc())


In [6]:
'''
  Fetches comments for given channel
  Input: Channel ID
  Returns: Dictionary of dictionaries, each representing a comment thread of the given channel
'''
#Gets the comment for a given channel
def get_comments_info(channel_id):
  try:
    # Prepare the API request
    request = youtube.commentThreads().list(part="snippet",allThreadsRelatedToChannelId=channel_id, maxResults=20)

    # Execute the API request
    response = request.execute()
    items = response["items"]

    # Extract from the response, the channel attributes that we are interested in
    comment_threads = dict()
    for comment in items:
      data =dict()
      data["id"] = comment["id"]
      data["video_id"] = comment["snippet"]["videoId"]
      data["text"] = comment["snippet"]["topLevelComment"]["snippet"]["textDisplay"]
      data["author"] = comment["snippet"]["topLevelComment"]["snippet"]["authorDisplayName"]
      data["published_date"] = comment["snippet"]["topLevelComment"]["snippet"]["publishedAt"]

      comment_threads[data["id"]]=data
    return comment_threads
  except:
    print('Failed to fetch or process comment threads info from Youtube Data API v3')
    print(traceback.print_exc())



In [7]:
'''
  Fetches video information , given a set of ids
  Input: Channel ID
  Returns: Dictionary of dictionaries, each representing video
           info for every video for a channel
'''
import re

# Helper to convert API duration string to duration in seconds
def to_seconds(dur_str):
  duration=days=hours=minutes=seconds=0
  #'P#DT#H#M#S or PT#H#M#S or PT#M#S or PT#S
  pattern_days= re.compile('P(\d*)DT(\d*)H(\d*)M(\d*)S')
  pattern_hours= re.compile('PT(\d*)H(\d*)M(\d*)S')
  pattern_minutes= re.compile('PT(\d*)M(\d*)S')
  pattern_seconds= re.compile('PT(\d*)S')

  #m = pattern.match('DT2H3M40S10')
  m = pattern_days.match(dur_str)
  if m is not None:
    (days,hours,minutes,seconds) =m.groups()
  else:
    m=pattern_hours.match(dur_str)
    if m is not None:
      (hours,minutes,seconds) = m.groups()
    else:
      m=pattern_minutes.match(dur_str)
      if m is not None:
        (minutes,seconds) = m.groups()
      else:
        m=pattern_seconds.match(dur_str)
        if m is not None:
          seconds = m.group(1)
  #print(f'days:{days}, hours:{hours}, minutes:{minutes}, seconds:{seconds}')
  duration = (int(days)*86400)+(int(hours)*3600)+(int(minutes)*60)+int(seconds)
  return duration

def get_video_info(video_ids, channel):
  try:
    vids = ",".join(map(str, video_ids))
    # Prepare the API request
    request = youtube.videos().list(part="snippet,contentDetails,statistics",id=vids, maxResults=10)

    # Execute the API request
    response = request.execute()
    item = response["items"]

    # Extract from the response, the channel attributes that we are interested in
    video_data = dict()
    for video in response["items"]:
      data =dict()
      data["id"] = video["id"]
      data["playlist_id"]=channel['playlistId']
      data["channel_id"]=channel['id']

      data["name"]=video["snippet"]["title"]
      data["description"]=video["snippet"]["description"]
      data["published_date"]=video["snippet"]["publishedAt"]
      data["view_count"]=video["statistics"]["viewCount"]
      data["like_count"]=video["statistics"]["likeCount"]
      data["dislike_count"]=0 # Private information that cannot be obtained - setting to 0

      data["favorite_count"]=video["statistics"]["favoriteCount"]
      data["comment_count"]=video["statistics"]["commentCount"]
      data["duration"]= to_seconds(video["contentDetails"]["duration"])

      data["thumbnails"]=dict()
      for key in video["snippet"]["thumbnails"].keys():
        data["thumbnails"][key] = video["snippet"]["thumbnails"][key]["url"]


      data["caption_status"]= video["contentDetails"]["caption"]
      #data["playlistId"]=playlist_id

      # The statistics.dislikeCount property was made private as of December 13, 2021.
      # This means that the property is included in an API response only if the API request was
      # authenticated by the video owner.
      #data["dislike_count"]=obj["statistics"]["dislikeCount"]
      video_data[data["id"]]=data
    return video_data
  except:
    print('Failed to fetch or process video info from Youtube Data API v3')
    print(traceback.print_exc())



In [8]:
'''
  Fetches all the YouTube stats in oneshot - channel, playlist, comment threads and video
'''
def get_all_info(channel_id):
  stats=dict()
  # Fetch channel information
  stats["channel"] = get_channel_info(channel_id)

  # Fetch playlist information for the above channel
  stats["playlists"] = get_playlist_info(channel_id)

  # Fetch comment threads for the above channel
  stats["comments"] = get_comments_info(channel_id)

  # Collect the video IDs from the channel's comment threads
  video_ids = set()
  [video_ids.add(stats["comments_info"][key]["video_id"]) for key in stats["comments_info"].keys()]

  # Fetch video info for supplied video ids
  stats["videos"]=get_video_info(video_ids, stats["channels"])
  return stats

In [9]:
'''
  Test Program that triggers all the 4 APIs
'''
channel_ids={
    'Tamil Pokkisham': 'UCS84kz7Fs8bzRs6xcPY9lQQ',
    'CNN': 'UCupvZG-5ko_eiXAupbDfxWw',
    "TEDx Talks":'UCsT0YIqwnpJCM-mx7-gSA4Q',
    "WION":"UC_gUM8rL-Lrg6O3adPW9K1g",
    "The Indian Mystics":"UCcnBJHMugWpuy6Y7-SlI1ew",
    "Future Technology":"UCoIPNbr4UXhhZXDBZaUNHwA",
    "SonyMusicSouthVEVO":"UCTNtRdBAiZtHP9w7JinzfUg",
    "NBA":"UCWJ2lWNubArHWmf3FIHbfcQ",
    "moneycontrol":"UChftTVI0QJmyXkajQYt2tiQ",
    "GUVI":"UCduIoIMfD8tT3KoU0-zBRgQ",
    "Neural networks":"UCYO_jab_esuFRV4b17AJtAw"
}

# Pick one of the channel IDs from the test channels above
channel_id= channel_ids['TEDx Talks']
print(f'''channel_id=test_channels['TEDx Talks']''')

# Fetch & print channel info
channel= get_channel_info(channel_id)
print(f'''channel={channel}''')

#Fetch & print playlist info
playlists = get_playlist_info(channel_id)
print(f'''playlists={playlists}''')

# Fetch & comments thread info
comments=get_comments_info(channel_id)
print(f'''comments={comments}''')

# Fetch & print video info for channel's videos
video_ids = set()
[video_ids.add(comments[key]["video_id"]) for key in comments.keys()]
print(f'''videos={get_video_info(video_ids,channel)}''')

channel_id=test_channels['TEDx Talks']
channel={'id': 'UCsT0YIqwnpJCM-mx7-gSA4Q', 'name': 'TEDx Talks', 'type': 'youtube#channel', 'description': "TEDx is an international community that organizes TED-style events anywhere and everywhere -- celebrating locally-driven ideas and elevating them to a global stage. TEDx events are produced independently of TED conferences, each event curates speakers on their own, but based on TED's format and rules.\n\nFor more information on using TED for commercial purposes (e.g. employee learning, in a film or online course), please submit a Media Request here: https://media-requests.ted.com.", 'playlistId': 'UUsT0YIqwnpJCM-mx7-gSA4Q', 'views': '7877255081', 'hidden_subscribers': False, 'subscriber_count': '40800000', 'status': 'public', 'video_count': '216639'}
playlists={'PLsRNoUx8w3rNyeKk_xNnclpAqrWQztioB': {'channelId': 'UCsT0YIqwnpJCM-mx7-gSA4Q', 'name': 'Plus! TEDx talks en Français', 'id': 'PLsRNoUx8w3rNyeKk_xNnclpAqrWQztioB'}, 'PLsRNoUx8w3rOoNAR