In [1]:
import googleapiclient.discovery
import google_auth_oauthlib
import pickle
import json

Replace the default `print()` function with one that only prints if the global variable `VERBOSE` is set to `True`.  
The `'_print' in globals()` check is used to allow re-execution of the cell without messing up the function pointers.

In [2]:
VERBOSE = True
if '_print' in globals(): print = _print
_print = print
def print(*args, force_print=False, **kwargs):
    if VERBOSE or force_print: _print(*args, **kwargs)

In [3]:
def save_playlist_data(data):
    # TODO have more than one file to write to, such that the last few versions are all kept, not just the last one.
    json.dump(data, open('playlist_data.json', 'w'))
    
def load_playlist_data():
    try: return json.load(open('playlist_data.json'))
    except FileNotFoundError:
        print('playlist_data.json doesn\'t exist yet, using empty dict.')
        return {}

## Authenticate

Choose whether you need to use the API Key to authenticate, or OAuth 2.0.  
If you only deal with public playlists, then the API Key is enough.  
If you want to backup your own private playlists, then you need to use OAuth 2.0.  
OAuth 2.0 strictly supersedes the API Key in terms of permissions. Generation, and using it is a bit more tedious though, so it is recommended to use the API Key if you don't need the extended permissions of OAuth 2.0.

### API Key

In [4]:
key:str = json.load(open('api_key.json'))

In [5]:
youtube = googleapiclient.discovery.build("youtube", "v3", developerKey=key)

### OAuth 2.0 Client IDs

In [None]:
secret:dict        = json.load(open('client_secret.json'))
client_id:str      = secret['installed']['client_id']
client_secret:str  = secret['installed']['client_secret']

You need to re-authenticate every time you want to run this program. Even if you store the credentials, there is a timeout on googles side that prevents you from re-using old credentials.

In [None]:
credentials = google_auth_oauthlib.get_user_credentials(
    client_id=client_id, client_secret=client_secret,
    scopes='https://www.googleapis.com/auth/youtube.readonly')

Save the credentials to disk in order to not have to re-authenticate when you're just restarting the kernel.

In [None]:
def save_credentials(credentials): pickle.dump(credentials, open('credentials.pkl', 'wb'))
def load_credentials()           : return pickle.load(open('credentials.pkl', 'rb'))

In [None]:
save_credentials(credentials)

In [None]:
credentials = load_credentials()

In [None]:
youtube = googleapiclient.discovery.build("youtube", "v3", credentials=credentials)

## Query all playlists belonging to a channel

In [6]:
def all_playlists_from_channel(channel_id:str=None) -> list:
    "Recieve metadata about all playlists belonging to the youtube channel with `channel_id`.\n"\
    "If no `channel_id` is given, and OAuth 2.0 was used to authenticate, "\
    "then the authenticated users channel will be used."
    if channel_id:
        request = youtube.playlists().list(part='snippet', maxResults=50, channelId=channel_id)
    else:
        request = youtube.playlists().list(part='snippet', maxResults=50, mine=True)
    playlists = []
    while request is not None:
        response = request.execute(num_retries=2)
        playlists.extend(response['items'])
        request = youtube.playlists().list_next(request, response)
    return [(pl['id'], pl['snippet']['title'], pl['snippet']['channelTitle'], pl) for pl in playlists]

Example:

In [7]:
playlists = all_playlists_from_channel('UC38IQsAvIsxxjztdMZQtwHA')
len(playlists), len(playlists[0])

(5, 4)

In [8]:
playlists[0][-1].keys()

dict_keys(['kind', 'etag', 'id', 'snippet'])

## Get all Playlist metadata for all channels and playlists set in tracked_ids.json

In [11]:
def playlist_info(playlist_id:str) -> dict:
    "Retrieve metadata for the playlist with given `playlist_id`."
    request = youtube.playlists().list(part='snippet', id=playlist_id)
    response = request.execute()
    assert response['pageInfo']['totalResults'] == 1, f'There should exist exactly one playlist per ID. {response}'
    pl = response['items'][0]
    return pl['id'], pl['snippet']['title'], pl['snippet']['channelTitle'], pl

In [10]:
def load_playlists_from_file():
    "Loads channel and playlist ids from the `tracked_ids.json` file, "\
    "and retrieves metadata for all associated playlists."
    file = json.load(open('tracked_ids.json'))
    playlist_ids = file['playlists']
    playlists = [playlist_info(p_id) for p_id in playlist_ids]
    channel_ids = file['channels']
    for c_id in channel_ids:
        playlists.extend(all_playlists_from_channel(c_id))
    return playlists

Example:

In [13]:
len(load_playlists_from_file())

10

## Query all information about a playlists items

In [14]:
def all_playlist_items(playlist_id:str) -> list:
    "Revieve a list of metadata for all videos in the playlist with given `playlist_id`."
    request = youtube.playlistItems().list(part='snippet', playlistId=playlist_id, maxResults=50)
    playlist_items = []
    while request is not None:
        response = request.execute(num_retries=2)
        playlist_items.extend(response['items'])
        request = youtube.playlistItems().list_next(request, response)
    return playlist_items

Example:

In [15]:
items = all_playlist_items('PL2MI040U_GXq1L5JUxNOulWCyXn-7QyZK')

In [16]:
items[0].keys()

dict_keys(['kind', 'etag', 'id', 'snippet'])

In [17]:
print('https://www.youtube.com/watch?v=' + items[0]['snippet']['resourceId']['videoId'])

https://www.youtube.com/watch?v=dQw4w9WgXcQ


## Main

In [18]:
def main(playlists:list, remove_missing_videos:bool=False, remove_missing_playlists:bool=False):
    old_pl_data       :dict = load_playlist_data()
    new_pl_data       :dict = dict()
    plist_contained   :set  = set()
    any_change_to_data:bool = False
    
    for plist_id, title, channel_name, plist in playlists:
        print(f'-------------------------------------------------------')
        print(f'-------------------------{title} @ {channel_name}')
        
        # NOTE: Check if this playlist was already present in previous runs.
        # Then add the data that's stored in the playlist dict about the videos
        # to the new version of the playlist dict.
        playlist_known:bool = (plist_id in old_pl_data)
        if playlist_known:
            # TODO: Check for changed in playlist metadata?
            # Don't really care about that, but might be useful...
            plist['videos'] = old_pl_data[plist_id]['videos']
        else:
            print(f'New Playlist with ID "{plist_id}" | Titled "{title}"')
            plist['videos'] = {}
        new_pl_data[plist_id] = plist

        # NOTE: Check for duplicate playlists. This should actually never happen, but you never know...
        if plist_id in plist_contained:
            print(f'Duplicate Playlist ID "{plist_id}"')
        plist_contained.add(plist_id)

        videos     :dict = plist['videos']
        contained  :set  = set()
        plist_has_changed = False
        plist_has_new     = False
        
        # NOTE: deleted videos won't even show up in this list,
        # so we have to check against saved info from previous runs later using `contained` and `videos`.
        for item in all_playlist_items(plist_id):
            video_id = item['snippet']['resourceId']['videoId']

            # NOTE: Check if this video is set to Private.
            is_private = ((not item['snippet']['thumbnails'])
                           or (item['snippet']['title']       == 'Private video')
                           or (item['snippet']['description'] == 'This video is private.'))
            if is_private:
                print(f'Private Video with ID "{video_id}" at position {item["snippet"]["position"]}')

            # NOTE: Check if this video has already appeared before in this playlist.
            # This is unaffected by data from previous runs.
            is_duplicate = (video_id in contained)
            if is_duplicate:
                print(f'Duplicate Video ID "{video_id}" at position {item["snippet"]["position"]}. '\
                      f'Previous occurrence is at position {videos[video_id]["snippet"]["position"]}')
            contained.add(video_id)

            # NOTE: Check if this video is known from previous runs or not.
            # Also check the videos title or description has changed compared to the previous run.
            # This will in almost every case also detect when a video has been set to private since the last run.
            # Only exception is if the original video was titiled "Private video",
            # and had the description "This video is private."... That should never happen in practice though,
            # and at that point it doesn't really matter anyway, since the user will still know which video it was.
            is_new = not (video_id in videos)
            if is_new:
                plist_has_new = True
                print(f'New Video with ID "{video_id}" detected.')
            else:
                old = videos[video_id]
                
                old_title, new_title = old['snippet']['title'], item['snippet']['title']
                if (old_title != new_title):
                    plist_has_changed = True
                    print(f'Video with ID "{video_id}"\'s Title has changed.\n'\
                          f'\t"{old_title}" -> "{new_title}"')
                    
                old_description, new_description = old['snippet']['description'], item['snippet']['description']
                if (old_description != new_description):
                    plist_has_changed = True
                    print(f'Video with ID "{video_id}"\'s Description has changed.\n'\
                          f'\tLength: {len(old_description)} -> {len(new_description)}')
            
            videos[video_id] = item # This overwrites whatever video data was there before with the new data.
        
        # NOTE: Check and Inform user about changes to Tracked Data.
        any_change_to_data = (any_change_to_data or (plist_has_new or plist_has_changed))
        if plist_has_new:     print('New Videos have been added to Tracked Data.')
        if plist_has_changed: print('Changes to Titles and / or Descriptions have been applied.')

        # NOTE: Check if any videos that have appeared in previous runs are no longer present.
        missing = set(videos.keys()).difference(contained)
        if len(missing) > 0:
            print('Missing Videos detected!')
            if remove_missing_videos:
                any_change_to_data = True
                print('Following Videos have been removed from Tracked Data:')
            else:
                print('Following Videos are missing from received data, but will be carried over from backup:')
            for id in missing:
                print(f'\tID: {id} (pos: {videos[id]["snippet"]["position"]})\t'\
                      f'| Old Title: {videos[id]["snippet"]["title"]}')
                if remove_missing_videos: videos.pop(id)
                    

    # NOTE: Check if any playlists that have appeared in previous runs are no longer present.
    plist_missing = set(old_pl_data.keys()).difference(plist_contained)
    if len(plist_missing) > 0:
        print(f'Missing Playlist detected!')
        if remove_missing_playlists:
            any_change_to_data = True
            print('Following Playlists have been removed from Tracked Data:')
        else:
            print('Following Playlists are missing from received data, but will be carried over from backup:')
        for id in plist_missing:
            print(f'\tID: {id} | Old Title: {old_pl_data[id]["snippet"]["title"]}')
            if not remove_missing_playlists: new_pl_data[id] = old_pl_data[id]

    print('\nTo remove missing videos or playlist from Tracked Data, pass the corresponding flags to main().')
    
    print('='*75, force_print=True)
    if any_change_to_data: print('!---CAUTION---! Tracked Data has Changed !---CAUTION---!', force_print=True)
    else:                  print('No Change to Tracked Data.', force_print=True)
    
    return old_pl_data, new_pl_data

In [19]:
VERBOSE = True

In [20]:
playlists = load_playlists_from_file()
len(playlists)

10

`main()` loads the previous backup, queries all videos for all playlists that are passed as the argument, and compares that information with the backup. It then returns the backup data and the newly constructed date for further analysis, or to overwrite the old backup.

In [23]:
old_pl_data, new_pl_data = main(playlists, remove_missing_videos=True, remove_missing_playlists=False)

-------------------------------------------------------
-------------------------Ted Nelson's Computers for Cynics @ bennokr
-------------------------------------------------------
-------------------------AlphaGo, AlphaZero, AlphaGo Zero @ Two Minute Papers
-------------------------------------------------------
-------------------------3D Printing / 3D Fabrication @ Two Minute Papers
-------------------------------------------------------
-------------------------Light Transport, Ray Tracing and Global Illumination (Two Minute Papers) @ Two Minute Papers
-------------------------------------------------------
-------------------------Fluid, Cloth and Hair Simulations (Two Minute Papers) @ Two Minute Papers
Duplicate Video ID "CSQPD3oyvD8" at position 15. Previous occurrence is at position 14
Duplicate Video ID "CSQPD3oyvD8" at position 16. Previous occurrence is at position 15
-------------------------------------------------------
-------------------------AI and Deep Learning - Two 

In [22]:
save_playlist_data(new_pl_data)

## Explore Stored Data

In [24]:
# Structure of Data:
{
    "random_playlist_id_1" : {
        "id" : "random_playlist_id_1",
        "snippet" : {
            "channelId" : "random_id_of_channel_that_owns_this_playlist",
            "title" : "The Title of this playlist",
            "description" : "The Description of this playlist",
            "channelTitle" : "Title of the channel that owns this playlist"
        },
        "videos" : {
            "random_video_id_1" : {
                "snippet" : {
                    "channelId" : "random_id_of_channel_that_owns_this_playlist",
                    "title" : "The Title of this video",
                    "description" : "The Description of this video",
                    "channelTitle" : "Title of the channel that owns this playlist",
                    "position" : 123,
                    "resourceId" : {"videoId" : "random_id_of_this_video"}
                }
            },
            "random_video_id_2" : {},
            "random_video_id_3" : {},
        }
    },
    "random_playlist_id_2" : {},
    "random_playlist_id_3" : {},
};

In [25]:
data = load_playlist_data()

In [26]:
pid, plist = data.popitem()

In [27]:
plist.keys()

dict_keys(['kind', 'etag', 'id', 'snippet', 'videos'])

In [28]:
vid, video = plist['videos'].popitem()

In [29]:
video['snippet'].keys()

dict_keys(['publishedAt', 'channelId', 'title', 'description', 'thumbnails', 'channelTitle', 'playlistId', 'position', 'resourceId'])