# Playlist download with Spotipy

# Overview
    1) Basic settings
    2) Process data
    3) Batch execution

# 1) Basic settings and class

In [20]:
import os
import json
import pandas as pd
import datetime
import spotipy
import sys
from urllib3.exceptions import ReadTimeoutError
from spotipy.oauth2 import SpotifyClientCredentials
sp = spotipy.Spotify(client_credentials_manager=SpotifyClientCredentials())

# Provide Credentials to request Spotify API

In [21]:
os.environ['SPOTIPY_CLIENT_SECRET'] = '61536735f1bd40fcbefcf6d9d7e020e0'
os.environ['SPOTIPY_CLIENT_ID'] = '204a173aa42349368ddc04651ecc1b1f'

# Simple class for reusability

In [26]:
class PlaylistDownloader(object):
    sp = ''
    df_playlist = None
    df_playlistid = ''
    
    def __init__(self, playlistid: str):
        self.sp = spotipy.Spotify(client_credentials_manager=SpotifyClientCredentials())
        self.playlistid = playlistid
    
    def get_playlist(self, playlistid: str = '') -> pd.DataFrame:
        if playlistid == '':
            playlistid = self.playlistid
        playlist_2019 = sp.playlist(playlistid)
        self.df_playlist = pd.json_normalize(playlist_2019['tracks']['items'])
        return self.df_playlist
    
    def get_audio_features(self) -> pd.DataFrame:
        audio_features = None
        print("Downloading audio features for " +str(self.playlistid)+" - This may take a while:")
        while audio_features is None:
            try:
                audio_features = self.df_playlist['track.id'].apply(lambda x: sp.audio_features(x) if x != None and not isinstance(x,float) else None)
            except ReadTimeoutError:
                print('timed out')
                continue
            except KeyboardInterrupt:
                exit()
            except KeyError:
                break
            except:
                e = sys.exc_info()[0]
                print("audiofeatures: ",e)
        self.df_playlist["audio_features"] = audio_features
        print('Flattening Audio features data')
        features_list = []
        for song_feature in self.df_playlist["audio_features"]:
            features_list.append(song_feature[0])
            
        df_with_audio_features = pd.DataFrame(features_list)
        print('Looking for artistnames in subdictionary')
        artist_names = []
        for artist in self.df_playlist["track.album.artists"]:
            artist_names.append(artist[0]['name'])
        df_with_audio_features['artists'] = artist_names
        self.df_playlist = pd.concat([self.df_playlist, df_with_audio_features],join="inner",axis=1)
        return self.df_playlist
    
    def clean(self) -> pd.DataFrame:
        drop_list = ['added_at', 'is_local', 'primary_color', 'added_by.external_urls.spotify', 'added_by.href', 'added_by.id', 'added_by.type', 'track.album.album_type','track.album.artists','track.album.available_markets', 'track.album.external_urls.spotify', 'added_by.uri','track.album.href','track.album.id','track.album.images','track.album.name','track.album.release_date','track.album.release_date_precision', 'track.album.total_tracks','track.album.type','track.album.uri','track.artists','track.available_markets','track.disc_number','track.duration_ms','track.episode','track.explicit','track.external_ids.isrc','track.external_urls.spotify','track.href','track.is_local','track.preview_url','track.track','track.type','track.url','track.uri','video_thumbnail','audio_features','type','url','track.id','track_href','analysis_url','uri','track.track_number','video_thumbnail.url']
        rename_dict = {'track.name':'name','track.popularity':'popularity'}
        for key in rename_dict:
            if key in self.df_playlist:
                self.df_playlist.rename(columns={key:rename_dict[key]},inplace=True)
        for drop in drop_list:
            if drop in self.df_playlist:
                self.df_playlist.drop(columns=drop,inplace=True);
        return self.df_playlist

# 2) Process DATA
# Enter playlist ID

In [5]:
playlistid = '37i9dQZF1DXcz8eC5kMSWZ'
playlist_d = PlaylistDownloader(playlist_id)
playlist_d.playlistid


NameError: name 'playlist_id' is not defined

# Download the playlist

In [None]:
playlist_d.get_playlist()
display(playlist_d.df_playlist)

# Download music features

In [None]:
playlist_d.get_audio_features()
playlist_d.df_playlist

# Cleaning data

In [None]:
playlist_d.clean()
playlist_d.df_playlist

# Save to file

In [6]:
playlist_d.df_playlist.to_csv(playlist_id+'.csv', mode='a', header=True)

NameError: name 'playlist_d' is not defined

# 3) Batch execution

In [28]:
playlist_id_dictonary = {'Top50-Charts-Global-2019':'37i9dQZF1DXcz8eC5kMSWZ',
'Top100-Charts-Global-2018':'37i9dQZF1DWSeqc9t1iHgX',
'Top100-Charts-USA-2017':'37i9dQZF1DX7Axsg3uaDZb'}


In [29]:
for playlist_name in playlist_id_dictonary:
    playlist_d = PlaylistDownloader(playlist_id_dictonary[playlist_name])
    playlist_d.get_playlist()
    playlist_d.get_audio_features()
    playlist_d.clean()
    playlist_d.df_playlist.to_csv(playlist_name+'.csv', mode='a', header=True)

print("Done")

Downloading audio features for 37i9dQZF1DXcz8eC5kMSWZ - This may take a while:
Flattening Audio features data
Looking for artistnames in subdictionary
Downloading audio features for 37i9dQZF1DWSeqc9t1iHgX - This may take a while:
Flattening Audio features data
Looking for artistnames in subdictionary
Downloading audio features for 37i9dQZF1DX7Axsg3uaDZb - This may take a while:
Flattening Audio features data
Looking for artistnames in subdictionary
Done
