In [4]:
import pandas as pd
from dotenv import load_dotenv
import spotipy
from spotipy.oauth2 import SpotifyClientCredentials

In [5]:
load_dotenv(verbose=True)

sp = spotipy.Spotify(auth_manager=SpotifyClientCredentials())

In [38]:
def get_all_track_ids_from_albums_referenced_in_playlist(playlist_id: str) -> list:
    playlist_to_album_ids = lambda results : [item['track']['album']['id'] for item in results['items']]

    album_ids = []
    results = sp.playlist_items(playlist_id=playlist_id)
    album_ids.extend(playlist_to_album_ids(results))
    while results['next']:
        results = sp.next(results)
        album_ids.extend(playlist_to_album_ids(results))

    album_ids = list(set(album_ids))

    print(f'{len(album_ids)} albums referenced in playlist')

    # TODO: use the endpoint for multiple albums here?
    track_ids = []
    for album_id in album_ids:
        track_ids.extend([item['id'] for item in sp.album_tracks(album_id=album_id)['items']])

    print(f'{len(track_ids)} total tracks in those albums')

    return track_ids

In [46]:
def get_tracks_info_and_audio_features(track_ids: list) -> pd.DataFrame:

    chunks = lambda list, size :  (list[i: i + size] for i in range(0, len(track_ids), size))

    tracks = []
    for track_ids_chunk in chunks(track_ids, 50):
        tracks.extend(sp.tracks(track_ids_chunk)['tracks'])
        
    track_id_to_audio_features = dict()
    for track_ids_chunk in chunks(track_ids, 100):
        tracks_audio_features = sp.audio_features(track_ids_chunk)
        track_id_to_audio_features.update(((track_audio_features['id'], track_audio_features) for track_audio_features in tracks_audio_features))

    track_columns = ['id', 'name', 'artist', 'album', 'release_date', 'genre']
    track_audio_features_columns = ['acousticness', 'danceability', 'duration_ms', 'energy',
                        'instrumentalness', 'key', 'liveness', 'loudness', 'mode', 'speechiness',
                        'tempo', 'time_signature', 'valence']

    # TODO: suppport data for more than just the first-appearing artist and genre?
    rows = []
    for track in tracks:
        artist_obj = track['artists'][0]
        album_obj = track['album']

        id = track['id']
        name = track['name']
        artist = artist_obj['name']
        album = album_obj['name']
        release_date = album_obj["release_date"]

        # TODO: this isn't gonna work! the artist object in these responses doens't include genres; going to have to use the artist endpoint and combine the data
        genre = artist_obj['genres'][0] if ('genres' in artist_obj and len(artist_obj['genres'] != 0)) else ''

        rows.append([id, name, artist, album, release_date, genre]
                    + [track_id_to_audio_features[track['id']][audio_feature] for audio_feature in track_audio_features_columns])

    return pd.DataFrame(rows, columns=track_columns+track_audio_features_columns)

In [11]:
SMALL_TESTER_PLAYLIST = '37i9dQZF1DWXT8uSSn6PRy'
BIG_BOI_PLAYLIST = '37i9dQZF1DWTmvXBN4DgpA'

track_ids = get_all_track_ids_from_albums_referenced_in_playlist(SMALL_TESTER_PLAYLIST)

75 albums referenced in playlist
352 total tracks in those albums


In [47]:
dataset = get_tracks_info_and_audio_features(track_ids)
print(dataset)

{'album': {'album_type': 'single', 'artists': [{'external_urls': {'spotify': 'https://open.spotify.com/artist/26VFTg2z8YR0cCuwLzESi2'}, 'href': 'https://api.spotify.com/v1/artists/26VFTg2z8YR0cCuwLzESi2', 'id': '26VFTg2z8YR0cCuwLzESi2', 'name': 'Halsey', 'type': 'artist', 'uri': 'spotify:artist:26VFTg2z8YR0cCuwLzESi2'}], 'available_markets': ['AD', 'AE', 'AG', 'AL', 'AM', 'AO', 'AR', 'AT', 'AU', 'AZ', 'BA', 'BB', 'BD', 'BE', 'BF', 'BG', 'BH', 'BI', 'BJ', 'BN', 'BO', 'BR', 'BS', 'BT', 'BW', 'BZ', 'CA', 'CD', 'CG', 'CH', 'CI', 'CL', 'CM', 'CO', 'CR', 'CV', 'CW', 'CY', 'CZ', 'DE', 'DJ', 'DK', 'DM', 'DO', 'DZ', 'EC', 'EE', 'EG', 'ES', 'FI', 'FJ', 'FM', 'FR', 'GA', 'GB', 'GD', 'GE', 'GH', 'GM', 'GN', 'GQ', 'GR', 'GT', 'GW', 'GY', 'HK', 'HN', 'HR', 'HT', 'HU', 'ID', 'IE', 'IL', 'IN', 'IQ', 'IS', 'IT', 'JM', 'JO', 'JP', 'KE', 'KG', 'KH', 'KI', 'KM', 'KN', 'KR', 'KW', 'KZ', 'LA', 'LB', 'LC', 'LI', 'LK', 'LR', 'LS', 'LT', 'LU', 'LV', 'LY', 'MA', 'MC', 'MD', 'ME', 'MG', 'MH', 'MK', 'ML', 'MN', '

In [48]:
sp.artist('26VFTg2z8YR0cCuwLzESi2')

{'external_urls': {'spotify': 'https://open.spotify.com/artist/26VFTg2z8YR0cCuwLzESi2'},
 'followers': {'href': None, 'total': 18469992},
 'genres': ['dance pop', 'electropop', 'etherpop', 'indie poptimism', 'pop'],
 'href': 'https://api.spotify.com/v1/artists/26VFTg2z8YR0cCuwLzESi2',
 'id': '26VFTg2z8YR0cCuwLzESi2',
 'images': [{'height': 640,
   'url': 'https://i.scdn.co/image/ab6761610000e5ebd707e1c5177614c4ec95a06c',
   'width': 640},
  {'height': 320,
   'url': 'https://i.scdn.co/image/ab67616100005174d707e1c5177614c4ec95a06c',
   'width': 320},
  {'height': 160,
   'url': 'https://i.scdn.co/image/ab6761610000f178d707e1c5177614c4ec95a06c',
   'width': 160}],
 'name': 'Halsey',
 'popularity': 83,
 'type': 'artist',
 'uri': 'spotify:artist:26VFTg2z8YR0cCuwLzESi2'}