In [11]:
# Dependencies and Setup
import base64
import matplotlib.pyplot as plt
import pandas as pd
import numpy as np
import requests
import base64
from requests.auth import HTTPBasicAuth
import scipy.stats as st
from pprint import pprint

# Import Spotify API key
from api_keys import spotify_client_id, spotify_client_secret

In [12]:
# Create playlist dataframe from csv file that contains a list of playlist IDs that we want to search tracks for
play_df = pd.read_csv("datasets/playlist_top_5_input.csv")
play_df.head()

# Prepare output csv file path to save the playlists and tracks data retrieved from the Spotify API
spotify_playlists_csv = "datasets/playlist_top_5_output.csv"

In [13]:
"""
Code to manage Spotify's Client Credentials Flow as described here:
https://developer.spotify.com/documentation/general/guides/authorization-guide/#client-credentials-flow
"""
# Build the Spotify Client ID and Secret combination
spotify_client_id_secret = spotify_client_id + ":" + spotify_client_secret
# Generate the base64 encoded string that contains the client ID and client secret key
base64_encoding = base64.b64encode(spotify_client_id_secret.encode()).decode()

# Build the Spotify API URL to get the Bearer token
spotify_token_url = 'https://accounts.spotify.com/api/token'

# Build the payload, i.e. the request body parameters that need to be passed to the token API URL
payload = 'grant_type=client_credentials'
# Build the header for Content-Type and Basic Auth containing the base64 encoded string so we can generate the Spotify API token
token_api_headers = { 'Content-Type': 'application/x-www-form-urlencoded', 'Authorization': 'Basic %s' % base64_encoding }

# Call the token API URL and save as a JSON response object
spotify_token_response = requests.post(spotify_token_url, headers=token_api_headers, data=payload).json()
# Retrieve the Bearer token from the response object
spotify_access_token = spotify_token_response['access_token']

In [14]:
"""
Code to retrieve a Spotify Playlist's Tracks and related data:
https://developer.spotify.com/documentation/web-api/reference/playlists/get-playlists-tracks/
"""
# Sample Playlist ID
playlist_ids = play_df["List ID"]

# Initialize lists to save playlist data
track_added_at = []
track_id = []
track_names = []
track_popularity = []
track_durations = []
artists = []
albums = []
album_types = []
release_dates = []
num_available_markets = []

#loops for creating dataframe columns
for playlist_id in playlist_ids:
    
    # Handle any exceptions for Spotify's Get Playlist's Tracks API
    try:
        # Build Spotify's Get Playlist's Tracks URL
        spotify_playlists_url = f'https://api.spotify.com/v1/playlists/{playlist_id}/tracks'
        # Build the header for Bearer token containing the base64 encoded string so we can retrieve the playlist's tracks 
        playlist_api_headers = { 'Authorization': 'Bearer %s' % spotify_access_token }
        # Call the Get Playlist's Tracks API URL and save as a JSON response object
        spotify_playlists_response = requests.get(spotify_playlists_url, headers=playlist_api_headers, data={}).json()
        
        # Get total tracks contained in playlist
        total_tracks = spotify_playlists_response['total']
        # Number of tracks that can be retrieved (Spotify limit = 100)
        track_limit = len(spotify_playlists_response['items'])

        # Playlist returns a track object which we will parse to retrieve all the track, album and artist data
        for item in range(track_limit):
            
            # Check if a track has a video
            if spotify_playlists_response['items'][item]['track'] is not None:
                # Get the date and time the track was added to the playlist 
                track_added_at.append(spotify_playlists_response['items'][item]['added_at'])
                #Get track ID
                track_id.append(spotify_playlists_response['items'][item]['track']['album']['id'])
                # Get the name of the track
                track_names.append(spotify_playlists_response['items'][item]['track']['name'])
                # Get the popularity score of the track
                track_popularity.append(spotify_playlists_response['items'][item]['track']['popularity'])
                # Get the track duration (song length) in milliseconds
                track_durations.append(spotify_playlists_response['items'][item]['track']['duration_ms'])
                # Get the number of markets that the track is available in
                num_available_markets.append(len(spotify_playlists_response['items'][item]['track']['available_markets']))
                # Get the album name
                albums.append(spotify_playlists_response['items'][item]['track']['album']['name'])
                # Get the album type: single, album or compilation
                album_types.append(spotify_playlists_response['items'][item]['track']['album']['album_type'])
                # Get the album release date
                release_dates.append(spotify_playlists_response['items'][item]['track']['album']['release_date'])

                # Each track could be associated with one or more artists
                # Parse through each artist object to retrieve their names
                
                artist_name = ''
                artists_count = len(spotify_playlists_response['items'][item]['track']['artists'])
                for artist in range(artists_count):
                    artist_name = artist_name + '|' + spotify_playlists_response['items'][item]['track']['artists'][artist]['name']

                artists.append(artist_name)
        print(f"Finished processing Playlist {playlist_id} with total tracks: {total_tracks} | Retrieval limited to {track_limit}")

    # Catch any exceptions thrown by the Spotify API
    except Exception as e:
        print(e)
        pass
print(f"----------------/nFinished processing all playlists/n-----------------")

Finished processing Playlist 37i9dQZF1DXcBWIGoYBM5M with total tracks: 50 | Retrieval limited to 50
Finished processing Playlist 37i9dQZF1DWUa8ZRTfalHk with total tracks: 75 | Retrieval limited to 75
Finished processing Playlist 37i9dQZF1DX4JAvHpjipBk with total tracks: 96 | Retrieval limited to 96
Finished processing Playlist 37i9dQZF1DXbYM3nMM0oPk with total tracks: 75 | Retrieval limited to 75
Finished processing Playlist 37i9dQZF1DX0b1hHYQtJjp with total tracks: 75 | Retrieval limited to 75
Finished processing Playlist 37i9dQZF1DX0XUsuxWHRQd with total tracks: 54 | Retrieval limited to 54
Finished processing Playlist 37i9dQZF1DWY4xHQp97fN6 with total tracks: 100 | Retrieval limited to 100
Finished processing Playlist 37i9dQZF1DX6GwdWRQMQpq with total tracks: 50 | Retrieval limited to 50
Finished processing Playlist 37i9dQZF1DX2RxBh64BHjQ with total tracks: 100 | Retrieval limited to 100
Finished processing Playlist 37i9dQZF1DX7Mq3mO5SSDc with total tracks: 48 | Retrieval limited to

In [16]:
# Save the retrieved playlists' track data into a Data Frame
playlist_data_df = pd.DataFrame( {'Playlist ID': playlist_id, 'Track Name': track_names, 'Track ID': track_id, 'Track Added At': track_added_at, 'Track Popularity': track_popularity, 'Track Duration': track_durations, 'Available Markets': num_available_markets, 'Album': albums, 'Album Type': album_types, 'Artist': artists, 'Release Date': release_dates } )

# Export the playlists' tracks data to a csv
playlist_data_df.to_csv(spotify_playlists_csv)

# Display the playlists' tracks data frame
playlist_data_df.head()

Unnamed: 0,Playlist ID,Track Name,Track ID,Track Added At,Track Popularity,Track Duration,Available Markets,Album,Album Type,Artist,Release Date
0,37i9dQZF1DXasneILDRM7B,You should be sad,1gBDGPFz9v93dxE3fUU9eO,2020-01-29T19:04:50Z,89,205473,79,You should be sad,single,|Halsey,2020-01-10
1,37i9dQZF1DXasneILDRM7B,The Box,52u4anZbHd6UInnmHRFzba,2020-01-29T19:04:50Z,100,196652,78,Please Excuse Me For Being Antisocial,album,|Roddy Ricch,2019-12-06
2,37i9dQZF1DXasneILDRM7B,bad guy,0S0KGZnfBGSIssfF54WSJh,2020-01-29T19:04:50Z,95,194087,79,"WHEN WE ALL FALL ASLEEP, WHERE DO WE GO?",album,|Billie Eilish,2019-03-29
3,37i9dQZF1DXasneILDRM7B,Blinding Lights,2ZfHkwHuoAZrlz7RMj0PDz,2020-01-29T19:04:50Z,98,201573,79,Blinding Lights,single,|The Weeknd,2019-11-29
4,37i9dQZF1DXasneILDRM7B,Falling,1Czfd5tEby3DbdYNdqzrCa,2020-01-29T19:04:50Z,98,159381,79,Falling,single,|Trevor Daniel,2018-10-05
