# The National Spotify Stats and Visualization

In [1]:
import spotipy
from spotipy.oauth2 import SpotifyClientCredentials
import time
import numpy as np
import pandas as pd

## Oauth2 Authorization

Obtain client_id and client_secret after creating a new app from: <https://developer.spotify.com/dashboard/applications>

In [2]:
start_time = time.time()

client_id = "210e37ebca874b6db50d781e9100f83c"
client_secret = "f4834b95875b438e853f0c4e39987261"

In [3]:
# Client Credentials flow for server-to-server authentication
auth_manager = SpotifyClientCredentials(client_id = client_id,
                                        client_secret = client_secret)
# Spotify Client module
sp = spotipy.Spotify(auth_manager=auth_manager)

## Retrieve all albums of The National on Spotify

In [4]:
national_uri = "spotify:artist:2cCUtGK9sDU2EoElnk0GNB"
results = sp.artist_albums(national_uri, 
                           album_type='album', 
                           country='US')

In [5]:
# initialize some lists to store results
album_names = []
album_uris = []
album_release_dates = []
album_total_tracks = []

In [6]:
# store info for each album in lists
for album in results['items']:
    album_names.append(album['name'])
    album_uris.append(album['uri'])
    album_release_dates.append(album['release_date'])
    album_total_tracks.append(album['total_tracks'])

In [7]:
for album in album_names:
    print(album)

Sad Songs for Dirty Lovers (2021 Remaster)
Juicy Sonic Magic (Live in Berkeley September 24-25 2018)
I Am Easy to Find
Boxer (Live in Brussels)
Sleep Well Beast
Trouble Will Find Me
High Violet (Expanded Edition)
High Violet
High Violet
High Violet
Boxer
Boxer
Alligator
Sad Songs for Dirty Lovers
The National (2021 Remaster)
The National


Several albums like High Violet appear multiple times in the Spotify results

## Retrieve list of tracks for each album
Create a nested dictionary to store track information of each album

In [8]:
def get_album_tracks(name, uri):
    '''
    Retrieves and stores track information for each album
    '''
    
    national_albums[uri] = {} # create nested dictionary for album
    
    # initialize empty lists inside nested dictionary for album
    national_albums[uri]['album'] = []
    national_albums[uri]['track_number'] = []
    national_albums[uri]['name'] = []
    national_albums[uri]['uri'] = []
    national_albums[uri]['duration_ms'] = []
    
    # get album tracks
    tracks = sp.album_tracks(uri, market='US')
    
    for track in tracks['items']:
        national_albums[uri]['album'].append(name)
        national_albums[uri]['track_number'].append(track['track_number'])
        national_albums[uri]['name'].append(track['name'])
        national_albums[uri]['uri'].append(track['uri'])
        national_albums[uri]['duration_ms'].append(track['duration_ms'])

In [9]:
national_albums = {}

for (name, uri) in zip(album_names, album_uris):
    get_album_tracks(name, uri)

In [10]:
def get_track_features(album_uri):
    '''
    Retrieves and stores Spotify audio features and popularity for each track
    '''
    
    print("Retrieving audio features for album: {}".format(national_albums[album_uri]['album'][0]))
    
    feature_list = ['acousticness',
                   'danceability',
                   'energy',
                   'instrumentalness',
                   'liveness',
                   'loudness',
                   'speechiness',
                   'tempo',
                   'valence',
                   'key',
                   'time_signature']

    # initialize to store track features
    for feature in feature_list:
        national_albums[album_uri][feature] = []
    
    national_albums[album_uri]['popularity'] = []
    
    # iterate through each track
    for track in national_albums[album_uri]['uri']:
        try:
            # get track features and popularity
            track_features = sp.audio_features(track)[0]
            track_popularity = sp.track(track)
            
            # popularity is stored in track, separate from audio_features
            national_albums[album_uri]['popularity'].append(track_popularity['popularity'])

            # add audio features for each track
            for feature in feature_list:
                national_albums[album_uri][feature].append(track_features[feature])
                
            time.sleep(1)
            
        # not all tracks have audio features
        except TypeError:
            print("Could not retrieve audio features for track: {}".format(track))
            for feature in feature_list:
                national_albums[album_uri][feature].append("NA")

In [11]:
for album in national_albums:
    get_track_features(album)
    time.sleep(5)

Retrieving audio features for album: Sad Songs for Dirty Lovers (2021 Remaster)
Retrieving audio features for album: Juicy Sonic Magic (Live in Berkeley September 24-25 2018)
Could not retrieve audio features for track: spotify:track:1vqZ6Lza85ENA860s8vtCs
Retrieving audio features for album: I Am Easy to Find
Retrieving audio features for album: Boxer (Live in Brussels)
Retrieving audio features for album: Sleep Well Beast
Retrieving audio features for album: Trouble Will Find Me
Retrieving audio features for album: High Violet (Expanded Edition)
Retrieving audio features for album: High Violet
Retrieving audio features for album: High Violet
Retrieving audio features for album: High Violet
Retrieving audio features for album: Boxer
Retrieving audio features for album: Boxer
Retrieving audio features for album: Alligator
Retrieving audio features for album: Sad Songs for Dirty Lovers
Retrieving audio features for album: The National (2021 Remaster)
Retrieving audio features for album:

In [36]:
# unpack the nested dictionary and convert to a dataframe
national_dict = {}

all_features = list(national_albums[album_uris[0]].keys())

for feature in all_features:
    national_dict[feature] = []
    
for album in national_albums:
    for feature in national_albums[album]:
        national_dict[feature].extend(national_albums[album][feature])

national_df = pd.DataFrame.from_dict(national_dict)
national_df['artist'] = name

In [37]:
national_df

Unnamed: 0,album,track_number,name,uri,duration_ms,acousticness,danceability,energy,instrumentalness,liveness,loudness,speechiness,tempo,valence,key,time_signature,popularity,artist
0,Sad Songs for Dirty Lovers (2021 Remaster),1,Cardinal Song - 2021 Remaster,spotify:track:1kje7rdBRPVLdJjhurLSZw,378840,0.79800,0.424,0.337,0.6200,0.114,-11.554,0.0345,119.901,0.1080,2,4,38,The National
1,Sad Songs for Dirty Lovers (2021 Remaster),2,Slipping Husband - 2021 Remaster,spotify:track:09b8mr09RVCoj327bBpaPS,202626,0.02900,0.629,0.615,0.2590,0.189,-7.732,0.0318,129.977,0.4520,9,4,37,The National
2,Sad Songs for Dirty Lovers (2021 Remaster),3,90-Mile Water Wall - 2021 Remaster,spotify:track:70YTeIt85PXWV2pJyBTGdN,224213,0.78500,0.479,0.441,0.1260,0.110,-8.938,0.0270,103.002,0.1100,4,4,39,The National
3,Sad Songs for Dirty Lovers (2021 Remaster),4,It Never Happened - 2021 Remaster,spotify:track:3atyVUHESb82hi3d8VKEqg,277106,0.07320,0.403,0.618,0.5870,0.279,-7.978,0.0278,133.113,0.2200,7,4,35,The National
4,Sad Songs for Dirty Lovers (2021 Remaster),5,Murder Me Rachael - 2021 Remaster,spotify:track:0e6Ndkr4xwChsbOIjKUgUj,225720,0.00018,0.312,0.822,0.6790,0.214,-4.759,0.0494,148.926,0.2660,7,4,35,The National
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
232,The National,8,John's Star,spotify:track:6B0vA1VobjedKbjSkTY1tk,184866,0.03020,0.578,0.689,0.0394,0.111,-8.340,0.0303,130.267,0.7510,1,4,23,The National
233,The National,9,Watching You Well,spotify:track:0hdDBAtFnekO78kmOGlcrc,182440,0.30700,0.563,0.334,0.1720,0.243,-11.833,0.0253,79.102,0.2650,0,4,23,The National
234,The National,10,Theory of the Crows,spotify:track:6DrpiNrqgOgdcyNa51CqVd,276600,0.00260,0.459,0.426,0.4460,0.108,-8.983,0.0281,135.151,0.1720,2,3,24,The National
235,The National,11,29 Years,spotify:track:3IifrPET3e7aWXT3FN4Ij6,170493,0.92300,0.525,0.306,0.1910,0.368,-21.498,0.2060,135.688,0.0854,0,1,25,The National


In [38]:
national_df.to_csv("The_National_Song_Features.csv")