# Get Spotify audio features

Use the code in this notebook to get audio features of tracks on Spotify.
This code has functions to:

1. Get all albums for an artist
2. Get all tracks in an album
3. Iterate over a list of albums to get all tracks
4. Get audio features for a track
5. Iterate over a list of tracks to get audio features

Depending on how the tracks are obtained, not all functions may be required.

The following variables should be changed for a new project:

* <code>save_file_name</code>: The name of the file to save to Google drive with the tracks and audio features
* <code>artist_url</code>: The Spotify URL of the artist

In [0]:
# Oku Hanako's Spotify URL
oku_hanako_url = 'https://open.spotify.com/artist/2tOwqfTtAMswbLySSaTRYR'
oku_hanako_save_file = 'oku_hanako_audio_features.csv'


""" Variables to change """
save_file_name = oku_hanako_save_file
artist_url = oku_hanako_url

In [2]:
"""Set up connection to Google Drive."""

# import Drive helper and mount drive
from google.colab import drive
drive.mount('/content/drive')

Go to this URL in a browser: https://accounts.google.com/o/oauth2/auth?client_id=947318989803-6bn6qk8qdgf4n4g3pfee6491hc0brc4i.apps.googleusercontent.com&redirect_uri=urn%3Aietf%3Awg%3Aoauth%3A2.0%3Aoob&scope=email%20https%3A%2F%2Fwww.googleapis.com%2Fauth%2Fdocs.test%20https%3A%2F%2Fwww.googleapis.com%2Fauth%2Fdrive%20https%3A%2F%2Fwww.googleapis.com%2Fauth%2Fdrive.photos.readonly%20https%3A%2F%2Fwww.googleapis.com%2Fauth%2Fpeopleapi.readonly&response_type=code

Enter your authorization code:
··········
Mounted at /content/drive


In [0]:
# Suppress output of this cell
%%capture

"""Install and import dependencies."""

# Install and import spotipy to access Spotify API
!pip install spotipy
import spotipy 
from spotipy.oauth2 import SpotifyClientCredentials 

# Import configparser for loading configuration file to call Spotify API
import configparser

# Import pandas for converting list to dataframe
import pandas as pd 

In [0]:
"""Define path variables."""

# Main path in Google Drive
home_path = '/content/drive/My Drive/Colab Notebooks'

# Path to the Spotify configuration file
config_file_path = f'{home_path}/bin/spotify_client_config.txt'

# Path to save data
data_path = f'{home_path}/Data/Spotify audio features'

In [5]:
def init_spotify_client(config_file_path: str):
  """
  Initialise Spotify client to use Spotify API.
  
  Args:
      config_file_path: The path to the Spotify config file.
      
  Returns: 
      A Spotify client instance.
  """
  # Load the Spotify client configuration file
  config = configparser.ConfigParser()
  config.read(config_file_path)
  
  print(f'Loaded config file with settings for: {config.sections()}')

  # Use configuration to set up Spotify client authorisation
  client_credentials_manager = \
  SpotifyClientCredentials(client_id=config['developer.spotify.com']['cid'], \
                           client_secret=config['developer.spotify.com']['secret'])

  # Initialise the Spotify client
  sp = spotipy.Spotify(client_credentials_manager=client_credentials_manager)
  
  print('Authorised Spotify client has been initialised')
  
  return sp


# Initialise the Spotify client
sp = init_spotify_client(config_file_path)

Loaded config file with settings for: ['developer.spotify.com']
Authorised Spotify client has been initialised


In [6]:
def get_album_info(sp, artist_url: str, album_type: str, country: str, request_limit: int):
  """
  Get album information for an artist on Spotify, and return them in a list.
  
  Args:
      sp: The Spotify client instance.
      artist_url: The Spotify URL of an artist.
      country: The country where the album is available.
      album_type: The album type to request.
      request_limit: The max number of records to return from the API request.
      
  Returns: 
      A list containing the names and Spotify IDs of albums for the artist.
  """
  
  # Get the artist's album information from Spotify
  artist_album_info = sp.artist_albums(artist_url, album_type=album_type, country=country, limit=request_limit)
  
  # Get the list of albums
  album_list = artist_album_info['items']
  
  # Count the number of albums 
  num_albums = len(album_list)
  
  # Initialise empty list
  output_list = []
  
  # Check that there is at least 1 album
  if num_albums > 0:
    # Iterate over all albums
    for album_idx in range(0, num_albums):
      # Record the album name and Spotify ID
      output_list.append([
          album_list[album_idx]['name'],
          album_list[album_idx]['id']
      ])
  
  print(f'Got {len(output_list)} album(s) of type "{album_type}"')
  
  return output_list


def get_artist_albums(sp, artist_url: str):
  """
  Get the names and Spotify IDs of all albums for an artist on Spotify, and return them in a list.
  
  Args:
      sp: The Spotify client.
      artist_url: The Spotify URL of an artist.
      
  Returns: 
      A list containing the album names and Spotify IDs for the artist.
  """
  
  # Set limit to 50, which is the maximum
  request_limit = 50
  
  # Set country
  country = 'US'
  
  # Get all album information from Spotify
  artist_album_info = get_album_info(sp, artist_url, 'album', country, request_limit)
  artist_single_info = get_album_info(sp, artist_url, 'single', country, request_limit)
  artist_compilation_info = get_album_info(sp, artist_url, 'compilation', country, request_limit)
  
  all_album_info = artist_album_info + artist_single_info + artist_compilation_info
  
  print(f'Total number of albums: {len(all_album_info)}')
  
  return all_album_info
  
  
# Get album names and IDs
all_albums_list = get_artist_albums(sp, artist_url)

Got 14 album(s) of type "album"
Got 22 album(s) of type "single"
Got 0 album(s) of type "compilation"
Total number of albums: 36


In [7]:
def get_tracks_one_album(sp, album_id: str):
  """
  Get names and IDs of all tracks in the album, and return them in a list.
  
  Args:
      sp: The Spotify client.
      album_id: The Spotify ID of album
  
  Returns: 
      A list containing names and IDs of tracks in the album.
  """
  
  # Get album information from Spotify
  album = sp.album(album_id)
  
  # Number of tracks from metadata
  num_tracks = album['total_tracks']
  
  # Album name
  album_name = album['name']
  
  # List of tracks in the album
  album_tracks = album['tracks']['items']
  
  # Initialise empty list
  track_list = []
  
  # Iterate over all tracks
  for track_idx in range(0, num_tracks):
    # Record the track ID and name
    track_list.append([
        album_name,
        album_tracks[track_idx]['name'],
        album_tracks[track_idx]['id']
    ])

  print(f'Extracted {num_tracks} track ID(s) and name(s) from {album_name}')
    
  return track_list


def get_tracks_many_albums(sp, all_albums_list):
  """
  Get all the tracks from multiple albums.
  
  Args:
      sp: The Spotify client
      all_albums_list: The list of [album name, album Spotify ID]
  
  Returns:
      A list with all tracks from all input albums.
  """
  # Initialise empty list
  all_tracks_list = []

  # Iterate over the list of album names and IDs to get the tracks in the album
  for album_i in all_albums_list:
    # Record album tracks in the master list
    all_tracks_list += get_tracks_one_album(sp, album_i[1])
    
  return all_tracks_list
  

# Get all tracks from all the albums
all_tracks_list = get_tracks_many_albums(sp, all_albums_list)

Extracted 16 track ID(s) and name(s) from Kasumisou
Extracted 11 track ID(s) and name(s) from Koitegami
Extracted 11 track ID(s) and name(s) from Time Note
Extracted 12 track ID(s) and name(s) from Yasashii Hana no Saku Basho
Extracted 14 track ID(s) and name(s) from Haruka Tooku ni Miete Ita Kyou
Extracted 6 track ID(s) and name(s) from Kimi ga Kureta Natsu
Extracted 13 track ID(s) and name(s) from Prism
Extracted 13 track ID(s) and name(s) from Kimi to Boku no Michi
Extracted 14 track ID(s) and name(s) from Good-Bye
Extracted 30 track ID(s) and name(s) from Hanako Oku Best ~ My Letters ~
Extracted 10 track ID(s) and name(s) from Kimi no Egao - Smile Selection -
Extracted 14 track ID(s) and name(s) from Utakata
Extracted 14 track ID(s) and name(s) from Birthday
Extracted 14 track ID(s) and name(s) from vol.BEST
Extracted 1 track ID(s) and name(s) from Christmas Night
Extracted 1 track ID(s) and name(s) from Negai
Extracted 1 track ID(s) and name(s) from Kimi no Hana (TV Size)
Extracte

In [8]:
def get_audio_features_one_track(sp, track_id: str):
  """
  Get the audio features for one track.
  
  Args:
      sp: The Spotify client.
      track_id: The Spotify ID of a music track.
      
  Returns:
      A list of audio features for the track.
  """
  
  # Get track audio features from Spotify
  track_features = sp.audio_features(track_id)
  
  # Parse Spotify output into a list
  audio_features = [
      track_id,
      track_features[0]['acousticness'],
      track_features[0]['danceability'],
      track_features[0]['duration_ms'],
      track_features[0]['energy'],
      track_features[0]['instrumentalness'],
      track_features[0]['key'],
      track_features[0]['liveness'],
      track_features[0]['loudness'],
      track_features[0]['mode'],
      track_features[0]['speechiness'],
      track_features[0]['tempo'],
      track_features[0]['time_signature'],
      track_features[0]['valence']
  ]
  
  return audio_features


def get_audio_features_all_tracks(sp, track_list):
  """
  Get audio features of all tracks in the list and return them in a dataframe.
  
  Args:
      sp: The Spotify client.
      track_list: The list of IDs and names of tracks.
  
  Returns: 
      A dataframe of audio features for each track.
  """
  # Count the number of tracks
  num_tracks = len(track_list)
  
  # Initialise empty list
  features_list = []
  
  print(f'Start getting audio features for {num_tracks} tracks')
  
  # Iterate over all tracks
  for track_idx in range(0, num_tracks):
    # Get track ID and name from list
    album_name = track_list[track_idx][0]
    track_name = track_list[track_idx][1]
    track_id = track_list[track_idx][2]
    
    # Get audio features for track
    track_info = [album_name, track_name] +  get_audio_features_one_track(sp, track_id)
    
    # Append audio features into master list
    features_list.append(track_info)
  
  
  # Define output dataframe columns
  col_names = ['album', 'track', 'track_id', 
               'acousticness', 'danceability', 'duration_ms', 'energy', 'instrumentalness', \
               'key', 'liveness', 'loudness', 'mode', 'speechiness', \
               'tempo', 'time_signature', 'valence']
  
  # Convert the list into a dataframe
  features_df = pd.DataFrame(features_list, columns = col_names)
  
  print(f'Got audio features for {num_tracks} tracks')
  
  return features_df


# Get audio features
audio_features_df = get_audio_features_all_tracks(sp, all_tracks_list)

Start getting audio features for 269 tracks
retrying ...1secs
retrying ...1secs
retrying ...4secs
Got audio features for 269 tracks


In [9]:
# Save to drive
audio_features_df.to_csv(f'{data_path}/{save_file_name}', index=False)

audio_features_df.head(5)

Unnamed: 0,album,track,track_id,acousticness,danceability,duration_ms,energy,instrumentalness,key,liveness,loudness,mode,speechiness,tempo,time_signature,valence
0,Kasumisou,Kaban no Naka no Yakimochi,3krX7tJSZtgcExvoz2Brdt,0.541,0.535,329067,0.323,2e-06,5,0.113,-7.98,1,0.0268,78.248,4,0.339
1,Kasumisou,Hontowane,2iU639pRsHohsb7JnD3O7s,0.343,0.378,272933,0.473,3e-06,5,0.18,-8.124,1,0.0258,175.899,4,0.572
2,Kasumisou,Aenakutemo,3brkLSKOt1XqeNWs26q803,0.926,0.361,314493,0.207,0.000699,11,0.128,-12.718,1,0.029,81.101,4,0.335
3,Kasumisou,Zettai,2zOZ64scyLxDLCfWMVzBcC,0.85,0.406,303000,0.297,0.000196,9,0.183,-10.697,0,0.0257,83.399,4,0.171
4,Kasumisou,Negai,76HWQNnmamYdndbKeB5aSo,0.843,0.455,332507,0.393,0.00355,1,0.231,-7.52,1,0.0275,79.01,4,0.22
