In [1]:
from urllib.parse import urljoin, urlparse, urlencode, ParseResult


BASE_URL = 'https://www.youtube.com'
WATCH_SUB_DOMAIN = 'watch'


# construct the YouTube video location given the extracted video id
def video_url_from_id(video_id):
    joined = urljoin(BASE_URL, WATCH_SUB_DOMAIN)
    parsed = urlparse(joined)

    pr = \
        ParseResult(
            scheme=parsed.scheme,
            netloc=parsed.netloc,
            path=parsed.path,
            params=None,
            query=urlencode({'v': video_id}),
            fragment=parsed.fragment
        )

    return pr.geturl()


In [2]:
def _extract_playlist_ids_from_response(resp):
    collect = []
    for i in resp['items']:
        collect.append(i['id'])
    
    return collect

In [3]:
import os
import json
import requests as req
from urllib.parse import urljoin

import googleapiclient.discovery

SERVICE = 'youtube'
API_VERSION = 'v3'

CHANNEL_ID = os.environ['CHANNEL_ID']
API_KEY = os.environ['API_KEY']


def get_authenticated_service():
    return googleapiclient.discovery.build(
        SERVICE, API_VERSION, developerKey=API_KEY
    )


# only public playlists (excludes unlisted and private) belonging to
# my account are listed
def retrieve_playlist_ids(youtube):
    playlist_ids = []
    next_page = None
    make_request = True
    
    while make_request:
        req = youtube.playlists().list(
            part='snippet',
            channelId=CHANNEL_ID,
            pageToken = next_page,
            maxResults=50
        )
        resp = req.execute()
        playlist_ids = playlist_ids + _extract_playlist_ids_from_response(resp)
    
        try:
            next_page = resp['nextPageToken']
        except KeyError:
            make_request = False 

    return playlist_ids
    

def dump_channel_playlist_to_file():
    youtube_session = get_authenticated_service()
    response = retrieve_playlists(youtube_session)
    with open('playlists.json', 'w') as fp:
        js = json.dumps(response, sort_keys=True, indent=2)
        print(js, file=fp)

        
def load_channel_playlist(from_file):
    with open("playlists.json", "r") as fp: 
        js = json.load(fp)
        return js


# the extracted id (from the playlist dump) is equalivalent to the
# playlist url id in the form: youtube.com/watch?v={video}&list={playlist_id}
def dump_from_playlist(youtube, playlist_id):
    req = youtube.playlistItems().list(
        part='snippet',
        playlistId=playlist_id,
        maxResults=50
    )
    return req.execute()


# extract selected data from an API request
def _extract_playlist_items_from_response(response):
    collect = []
    for i in response['items']:
        snippet = i['snippet']
        title = snippet['title']
        # description = snippet['description']
        time = snippet['publishedAt']
        video_id = snippet['resourceId']['videoId']
        url = video_url_from_id(video_id)

        d = {
            'title': title,
            'updated_at': time,
            'url': url
        }
        collect.append(d)
    
    return collect


In [4]:
youtube_session = get_authenticated_service()

In [5]:
retrieve_playlist_ids(youtube_session)

In [6]:
def playlist_name_from_id(youtube, playlist_id):
    req = youtube.playlists().list(
        part='snippet',
        id=playlist_id
    )
    
    resp = req.execute()
    
    if len(resp['items']) < 1:
        raise UnboundLocalError
    else:
        return resp['items'][0]['snippet']['localized']['title']

In [7]:
playlist_name_from_id(youtube_session, 'PL0WFr828oRc1vqQD2rqH3WULTxZqq4pZn')

'documentaries'

In [8]:
req = youtube_session.playlistItems().list(
        part='snippet',
        playlistId='PL0WFr828oRc1vqQD2rqH3WULTxZqq4pZn',
        # if the number of items in the playlist > maxResults, there will be a
        # `nextPageToken` in the snippet, pass the `nextPageToken` as the `pageToken` param
        pageToken=None,
        maxResults=50
    )

response = req.execute()
_extract_playlist_items_from_response(response)

[{'title': 'The surprising pattern behind color names around the world',
  'updated_at': '2018-08-18T04:30:16Z',
  'url': 'https://www.youtube.com/watch?v=gMqZR3pqMjg'},
 {'title': 'The math problem that stumped thousands of mansplainers',
  'updated_at': '2018-08-18T04:36:28Z',
  'url': 'https://www.youtube.com/watch?v=ggDQXlinbME'},
 {'title': 'The City That Has Its Own Operating System',
  'updated_at': '2019-03-18T04:09:59Z',
  'url': 'https://www.youtube.com/watch?v=ROKydMx9EGs'},
 {'title': 'How Music Can Be Better For You Than Sex and Chocolate: Deanna Choi at TEDxQueensU',
  'updated_at': '2019-03-18T04:10:20Z',
  'url': 'https://www.youtube.com/watch?v=XZFKpkDUMB4'},
 {'title': 'Want to sound like a leader? Start by saying your name right | Laura Sicola | TEDxPenn',
  'updated_at': '2019-03-18T04:10:46Z',
  'url': 'https://www.youtube.com/watch?v=02EJ1IdC6tE'},
 {'title': 'HOW IT WORKS: Transmissions',
  'updated_at': '2019-03-18T04:11:19Z',
  'url': 'https://www.youtube.com/w

In [70]:
# extract all the items from a playlist and render in 2-tuple format:
# (playlist_name, JSON collection of links)
def extract_all_from_playlist(youtube, playlist_id):
    
    playlist_items = []
    next_page = None
    make_request = True
    playlist_name = playlist_name_from_id(youtube, playlist_id)
    
    while make_request:
        req = youtube.playlistItems().list(
            part='snippet',
            playlistId=playlist_id,
            # if the number of items in the playlist > maxResults, there will be a
            # `nextPageToken` in the snippet, pass the `nextPageToken` as the `pageToken` param
            pageToken=next_page,
            maxResults=50
        )
        
        resp = req.execute()
        playlist_items = playlist_items + _extract_playlist_items_from_response(resp)
        
        try:
            next_page = resp['nextPageToken']
        except KeyError:
            make_request = False
    
    return (playlist_name, playlist_items)


In [71]:
extract_all_from_playlist(youtube_session, 'PL0WFr828oRc1vqQD2rqH3WULTxZqq4pZn')


('documentaries',
 [{'title': 'The surprising pattern behind color names around the world',
   'updated_at': '2018-08-18T04:30:16Z',
   'url': 'https://www.youtube.com/watch?v=gMqZR3pqMjg'},
  {'title': 'The math problem that stumped thousands of mansplainers',
   'updated_at': '2018-08-18T04:36:28Z',
   'url': 'https://www.youtube.com/watch?v=ggDQXlinbME'},
  {'title': 'The City That Has Its Own Operating System',
   'updated_at': '2019-03-18T04:09:59Z',
   'url': 'https://www.youtube.com/watch?v=ROKydMx9EGs'},
  {'title': 'How Music Can Be Better For You Than Sex and Chocolate: Deanna Choi at TEDxQueensU',
   'updated_at': '2019-03-18T04:10:20Z',
   'url': 'https://www.youtube.com/watch?v=XZFKpkDUMB4'},
  {'title': 'Want to sound like a leader? Start by saying your name right | Laura Sicola | TEDxPenn',
   'updated_at': '2019-03-18T04:10:46Z',
   'url': 'https://www.youtube.com/watch?v=02EJ1IdC6tE'},
  {'title': 'HOW IT WORKS: Transmissions',
   'updated_at': '2019-03-18T04:11:19Z',


In [None]:
dump_from_playlist(youtube_session, 'PL0WFr828oRc1vqQD2rqH3WULTxZqq4pZn')