# Spotify Data

This data will assist in imputing values for the 'BPM' column in the 'Music & Mental Health Survey' dataset. More specifically, the difference in BPM between genres will be investigated. If there is a significant difference between them, these will be used to impute the missing values.

In [1]:
import os
from dotenv import load_dotenv

# Extract CLIENT_ID and CLIENT_SECRET from local .env file
load_dotenv()
CLIENT_ID = os.getenv('CLIENT_ID')
CLIENT_SECRET = os.getenv('CLIENT_SECRET')

In [2]:
import requests
import json
import base64

import pandas as pd

# Methods for Querying Spotify API

In [3]:
def get_access_token():
    """Return access token from Spotify.

    Returns:
        json['access_token'] (str) -- Spotify API Access Token
    """
    
    # Creates authorization parameter for Client Credentials
    credentials = CLIENT_ID + ':' + CLIENT_SECRET
    auth_string = "Basic " + str(base64.b64encode(credentials.encode('utf-8')), 'utf-8')

    # Url to request token
    url = 'https://accounts.spotify.com/api/token'

    # Required headers for Client Credentials
    headers = {'Authorization':auth_string,
              'Content-Type':'application/x-www-form-urlencoded'}

    data = {'grant_type':'client_credentials'}

    # Request token by sending POST request and converting response to json
    r = requests.post(url=url, headers=headers, data=data)
    r_json = r.json()

    return r_json['access_token']

In [4]:
def header_auth(access_token=get_access_token()):
    """Generate Spotify authorization header.

    Params:
        access_token (str) -- Spotify access token

    Returns:
        Authorization header in the format 'Bearer {access_token}' 
    """
    
    return f"Bearer {access_token}"

In [5]:
def search_for_playlist(name, authorization_header):
    url = 'https://api.spotify.com/v1/search?'

    headers = {'Authorization':authorization_header}

    params = {
        'q' : name,
        'type' : 'playlist',
        'limit' : 1
    }

    r = requests.get(url=url, params=params, headers=headers)

    return r.json()

In [6]:
def get_tracks(playlist_url, authorization_header):
    """Gets list of top 50 tracks from the playlist_url.

    Params:
        playlist_url (str): Link to access Spotify playlist
        authorization_header (str): Header containing access token to access Spotify api

    Returns:
        Dictionary containing the Spotify Track ID of each track
    """
    
    url = playlist_url
    headers = {'Authorization':authorization_header}
    params = {
        'fields':'items(track(uri))',
        'limit':50
             }

    r = requests.get(url=url, params=params, headers=headers)
    return r.json()

In [7]:
def get_track_urls(track_dict):
    """Returns a list of track IDs which can be used to query Spotify for audio features.
    
    Params
        track_dict (dict) -- Dictionary of track items

    Returns
        track_urls (list) -- List of track urls
    """
    
    track_urls = []
    
    for track in track_dict['items']:
        track_url_str = track['track']['uri']
        track_urls.append(track_url_str.removeprefix('spotify:track:'))

    return track_urls

In [8]:
def get_track_features(track_ID, authorization_header):
    """Query Spotify for audio features of track and return dictionary result.
    
    Params
        track_ID (str) -- track ID used by Spotify
        authorization_header (str) -- Header containing access token to access Spotify api

    Returns
        Dictionary containing audio features of track corresponding to the track_ID
    """
    
    url = f"https://api.spotify.com/v1/audio-features/{track_ID}"
    headers = {'Authorization':authorization_header}

    r = requests.get(url=url, headers=headers)
    return r.json()

In [19]:
def gen_genre_df(genre, header):
    """Generate pandas DataFrame containing audio features of 50 songs in specified genre.
    
    Params
        genre (str) -- Genre to create df for
        header (str) -- Authorization header for making Spotify api calls
        
    Returns
        Dataframe containing audio features of 50 songs in top playlist of 'genre'"""
    
    # Get top playlist for genre
    playlist_json = search_for_playlist(genre, header)

    # Extract playlist url for api call
    url = playlist_json['playlists']['items'][0]['tracks']['href']
    
    # Get list of tracks
    track_list = get_tracks(url, header)
    
    # Get list of track urls
    track_urls = get_track_urls(track_list)

    # Create list of dictionaries which contain audio features for each track
    data = []
    
    for track in track_urls:
       data.append(get_track_features(track, header))
    
    # Convert list of dicts to pandas DataFrame
    return pd.DataFrame.from_dict(data)

# Spotify Queries

In [22]:
# Generate authorization header for API calls

header = header_auth(get_access_token())

In [21]:
df = gen_genre_df('country', header)
df.head()

Unnamed: 0,danceability,energy,key,loudness,mode,speechiness,acousticness,instrumentalness,liveness,valence,tempo,type,id,uri,track_href,analysis_url,duration_ms,time_signature
0,0.429,0.453,0,-7.746,1,0.0459,0.554,2e-06,0.102,0.155,77.639,audio_features,4KULAymBBJcPRpk1yO4dOG,spotify:track:4KULAymBBJcPRpk1yO4dOG,https://api.spotify.com/v1/tracks/4KULAymBBJcP...,https://api.spotify.com/v1/audio-analysis/4KUL...,227196,4
1,0.492,0.673,6,-5.431,1,0.0347,0.413,0.0,0.137,0.488,203.812,audio_features,7K3BhSpAxZBznislvUMVtn,spotify:track:7K3BhSpAxZBznislvUMVtn,https://api.spotify.com/v1/tracks/7K3BhSpAxZBz...,https://api.spotify.com/v1/audio-analysis/7K3B...,163855,4
2,0.712,0.603,8,-5.52,1,0.0262,0.186,0.0,0.115,0.67,97.994,audio_features,1Lo0QY9cvc8sUB2vnIOxDT,spotify:track:1Lo0QY9cvc8sUB2vnIOxDT,https://api.spotify.com/v1/tracks/1Lo0QY9cvc8s...,https://api.spotify.com/v1/audio-analysis/1Lo0...,265493,4
3,0.677,0.724,2,-8.992,1,0.0245,0.143,0.000101,0.158,0.697,90.392,audio_features,1ipcb9qXpSHWhSUvdxJhsx,spotify:track:1ipcb9qXpSHWhSUvdxJhsx,https://api.spotify.com/v1/tracks/1ipcb9qXpSHW...,https://api.spotify.com/v1/audio-analysis/1ipc...,212640,4
4,0.535,0.66,9,-7.174,1,0.0297,0.455,0.0,0.135,0.577,148.044,audio_features,3xdjjKMcMOFgo1eQrfbogM,spotify:track:3xdjjKMcMOFgo1eQrfbogM,https://api.spotify.com/v1/tracks/3xdjjKMcMOFg...,https://api.spotify.com/v1/audio-analysis/3xdj...,298373,4
