##### Import relevant libraries/modules and get access token

In [26]:
import pandas as pd
import numpy as np
from get_api_data import *

In [27]:
access_token_header = get_access_token(AUTH_URL, CLIENT_ID, CLIENT_SECRET)

In [28]:
# playlist_id = '1ulFK6boXuJRlRy5kH0pEX'
playlist_id = '03KGgbv2VfJ8vJAqt6TfPw'


##### Get playlist data (i.e. all tracks from playlist)

In [29]:
# First call to get playlist information
playlist_response = playlist_tracks_endpoint(playlist_id, access_token_header)

In [30]:
# If playlist has more than 100 songs, more calls will be necessary
all_responses = [playlist_response]

next_page = playlist_response['next']
while next_page is not None:
    offset_response = general_endpoint(next_page, access_token_header)
    all_responses.append(offset_response)
    next_page = offset_response['next']

##### Create dataframe with only relevant info from API call(s)

In [31]:
all_tracks_df = pd.DataFrame()

for response in all_responses:
    tracks = response['items']
    cleaned_tracks = {}

    for track in tracks:
        track_id = track['track']['id']
        track_name = track['track']['name']
        track_artists = [artist['name'] for artist in track['track']['artists']]

        cleaned_tracks[track_id] = {'artists': track_artists, 'name': track_name}

    track_df = pd.DataFrame.from_dict(cleaned_tracks, orient='index')
    all_tracks_df = pd.concat([all_tracks_df, track_df])

all_tracks_df.tail()

Unnamed: 0,artists,name
2VUo8O3ymKRYNgj97ZG2kM,[The Dave Brubeck Quartet],Unsquare Dance


##### Ensuring dataframe is correct

In [32]:
# Checking length of tracks (imported from API) matches that of the dataframe
all_tracks_df.shape[0] == playlist_response['total']

True

##### Create string of ids to query Audio Features

In [33]:
track_ids = all_tracks_df.index

def prepare_ids_for_query(ids, max_len=100):
    track_ids = np.array(ids)
    split_ids = []
    # If there are more than 100 tracks in the playlist
    while len(track_ids) > max_len:
        # Create a string of comma-delimited ids and add this to a list
        split_ids.append(",".join(track_ids[:max_len]))
        # Move on to next section of ids
        track_ids = np.delete(track_ids, np.s_[:max_len])
    # Add any remaining tracks (or add all tracks if number of tracks are sub 100)
    split_ids.append(",".join(track_ids))
    return split_ids

ids_list = prepare_ids_for_query(track_ids)

##### Checking ids have been formatted correctly

In [34]:
# Checking length of last group matches total length minus sum of all other groups
len(ids_list[-1].split(",")) == playlist_response['total'] - sum([len(ids.split(",")) for ids in ids_list[:-1]])

True

##### Get audio features data for all tracks in playlist

In [35]:
all_audio_features_list = [multiple_audio_features_endpoint(ids, access_token_header) for ids in ids_list]

##### Create dataframe of audio features

In [36]:
all_audio_features_df = pd.DataFrame()

# For each length-100 grouping of tracks
for tracks_features in all_audio_features_list:
    audios_features = tracks_features['audio_features']
    ids = [data['id'] for data in audios_features]
    features_df = pd.DataFrame(audios_features, index=ids)
    # Drop irrelevant columns
    features_df.drop(['type', 'id', 'uri', 'track_href', 'analysis_url'], axis=1, inplace=True)
    all_audio_features_df = pd.concat([all_audio_features_df, features_df])

all_audio_features_df.tail()

Unnamed: 0,danceability,energy,key,loudness,mode,speechiness,acousticness,instrumentalness,liveness,valence,tempo,duration_ms,time_signature
2VUo8O3ymKRYNgj97ZG2kM,0.65,0.496,7,-13.869,1,0.115,0.0723,0.88,0.119,0.615,116.868,119867,4


##### Merge dataframes together

In [37]:
tracks_with_features = pd.merge(all_tracks_df, all_audio_features_df, left_index=True, right_index=True)

tracks_with_features

Unnamed: 0,artists,name,danceability,energy,key,loudness,mode,speechiness,acousticness,instrumentalness,liveness,valence,tempo,duration_ms,time_signature
2VUo8O3ymKRYNgj97ZG2kM,[The Dave Brubeck Quartet],Unsquare Dance,0.65,0.496,7,-13.869,1,0.115,0.0723,0.88,0.119,0.615,116.868,119867,4
