In [67]:
from urllib.parse import urljoin, urlparse, urlencode, ParseResult


BASE_URL = 'https://www.youtube.com'
WATCH_SUB_DOMAIN = 'watch'


# construct the YouTube video location given the extracted video id
def video_url_from_id(video_id):
    joined = urljoin(BASE_URL, WATCH_SUB_DOMAIN)
    parsed = urlparse(joined)

    pr = \
        ParseResult(
            scheme=parsed.scheme,
            netloc=parsed.netloc,
            path=parsed.path,
            params=None,
            query=urlencode({'v': video_id}),
            fragment=parsed.fragment
        )

    return pr.geturl()


In [94]:
import os
import json
import requests as req
from urllib.parse import urljoin

import googleapiclient.discovery

SERVICE = 'youtube'
API_VERSION = 'v3'

CHANNEL_ID = os.environ['CHANNEL_ID']
API_KEY = os.environ['API_KEY']


def get_authenticated_service():
    return googleapiclient.discovery.build(
        SERVICE, API_VERSION, developerKey=API_KEY
    )


# only public playlists (excludes unlisted and private) belonging to
# my account are listed
def retrieve_playlists(youtube):
    req = youtube.playlists().list(
        part='snippet',
        channelId=CHANNEL_ID,
        maxResults=50
    )
    return req.execute()


def dump_channel_playlist_to_file():
    youtube_session = get_authenticated_service()
    response = retrieve_playlists(youtube_session)
    with open('playlists.json', 'w') as fp:
        js = json.dumps(response, sort_keys=True, indent=2)
        print(js, file=fp)

        
def load_channel_playlist(from_file):
    with open("playlists.json", "r") as fp: 
        js = json.load(fp)
        return js


# the extracted id (from the playlist dump) is equalivalent to the
# playlist url id in the form: youtube.com/watch?v={video}&list={playlist_id}
def dump_from_playlist(youtube, playlist_id):
    req = youtube.playlistItems().list(
        part='snippet',
        playlistId=playlist_id,
        maxResults=50
    )
    return req.execute()


# extract selected data from an API request
def _extract_from_response(response):
    collect = []
    for i in response['items']:
        title = i['snippet']['title']
        description = i['snippet']['description']
        video_id = i['snippet']['resourceId']['videoId']
        url = video_url_from_id(video_id)

        d = {
            'title': title,
            'description': description,
            'id': video_id,
            'url': url
        }
        collect.append(d)
    
    return collect


In [51]:
youtube_session = get_authenticated_service()

In [152]:
def extract_all_from_playlist(youtube, playlist_id):
    
    result = []
    next_page = None
    make_request = True

    while make_request:
        req = youtube.playlistItems().list(
            part='snippet',
            playlistId=playlist_id,
            # if the number of items in the playlist > maxResults, there will be a
            # `nextPageToken` in the snippet, pass the `nextPageToken` as the `pageToken` param
            pageToken=next_page,
            maxResults=50
        )
        resp = req.execute()
        result = result + _extract_from_response(resp)
        
        try:
            next_page = resp['nextPageToken']
        except KeyError:
            make_request = False
    
    return result


In [153]:
r = extract_all_from_playlist(youtube_session, 'PL0WFr828oRc1vqQD2rqH3WULTxZqq4pZn')

print(r)



In [150]:
len(extract_all_from_playlist(youtube_session, 'PL0WFr828oRc1vqQD2rqH3WULTxZqq4pZn'))

88