# Analyzing Audio Features in Spotify
*Anne Bode*

In [1]:
import pandas as pd
import numpy as np
import requests

## Creating Streaming/Library Dataframe

In [2]:
# read your 1+ StreamingHistory files (depending on how extensive your streaming history is) into pandas dataframes
df_stream0 = pd.read_json('StreamingHistory0.json')
df_stream1 = pd.read_json('streamingHistory1.json')

# merge streaming dataframes
df_stream = pd.concat([df_stream0, df_stream1])

# create a 'UniqueID' for each song by combining the fields 'artistName' and 'trackName'
df_stream['UniqueID'] = df_stream['artistName'] + ":" + df_stream['trackName']

df_stream.head()

Unnamed: 0,endTime,artistName,trackName,msPlayed,UniqueID
0,2020-09-12 00:01,Keri Hilson,Pretty Girl Rock,243920,Keri Hilson:Pretty Girl Rock
1,2020-09-12 00:05,Silk City,Electricity (with Dua Lipa),238173,Silk City:Electricity (with Dua Lipa)
2,2020-09-12 00:09,Chance the Rapper,Same Drugs,257775,Chance the Rapper:Same Drugs
3,2020-09-12 00:13,Frank Ocean,Thinkin Bout You,200746,Frank Ocean:Thinkin Bout You
4,2020-09-12 00:16,Zedd,The Middle,184732,Zedd:The Middle


In [3]:
# read your edited Library json file into a pandas dataframe
df_library = pd.read_json('YourLibrary1.json')

# add UniqueID column (same as above)
df_library['UniqueID'] = df_library['artist'] + ":" + df_library['track']

# add column with track URI stripped of 'spotify:track:'
new = df_library["uri"].str.split(":", expand = True)
df_library['track_uri'] = new[2]

df_library.head()

Unnamed: 0,artist,album,track,uri,UniqueID,track_uri
0,Arctic Monkeys,Tranquility Base Hotel & Casino,Four Out Of Five,spotify:track:3nhzPKCm2yqGmgEhdAg19u,Arctic Monkeys:Four Out Of Five,3nhzPKCm2yqGmgEhdAg19u
1,Adele,19,First Love,spotify:track:1DHV4JhMnCkbLuf6Psg93N,Adele:First Love,1DHV4JhMnCkbLuf6Psg93N
2,Vampire Weekend,Modern Vampires of the City,Ya Hey,spotify:track:4eE6vZ2vOrceLq4xgz3VmG,Vampire Weekend:Ya Hey,4eE6vZ2vOrceLq4xgz3VmG
3,Haley Heynderickx,I Need to Start a Garden,Show You a Body,spotify:track:1Ax5RAq7lXMTF8A5PTqdl4,Haley Heynderickx:Show You a Body,1Ax5RAq7lXMTF8A5PTqdl4
4,Paris Jones,You're Invited (To the Assassination of Patric...,Summer,spotify:track:5rfvovWBd35hYqk6rWLIrr,Paris Jones:Summer,5rfvovWBd35hYqk6rWLIrr


In [14]:
# create final dict as a copy df_stream
df_tableau = df_stream.copy()

# left join with df_library on UniqueID to bring in album and track_uri
df_tableau = pd.merge(df_tableau, df_library[['album','UniqueID','track_uri']],how='left',on=['UniqueID'])

# drop all songs that aren't in our Library, aka library data values like track uri not filled in
df_tableau = df_tableau[df_tableau['track_uri'].notna()]
df_tableau.head()

Unnamed: 0,endTime,artistName,trackName,msPlayed,UniqueID,album,track_uri
2,2020-09-12 00:09,Chance the Rapper,Same Drugs,257775,Chance the Rapper:Same Drugs,Coloring Book,6m9qPYXmhge2QhBLfFKnVF
3,2020-09-12 00:13,Frank Ocean,Thinkin Bout You,200746,Frank Ocean:Thinkin Bout You,channel ORANGE,7DfFc7a6Rwfi3YQMRbDMau
5,2020-09-12 00:19,Jorja Smith,Be Honest (feat. Burna Boy),207030,Jorja Smith:Be Honest (feat. Burna Boy),Be Honest (feat. Burna Boy),5pAbCxt9e3f81lOmjIXwzd
6,2020-09-12 00:23,The 1975,Menswear,206737,The 1975:Menswear,The 1975,1v07ywlVYd02pOCnXRBDNA
9,2020-09-12 00:39,Fergie,Big Girls Don't Cry (Personal),268120,Fergie:Big Girls Don't Cry (Personal),The Dutchess,3Q4WeJmzxuDpzMu9QjQqbM


## Creating Genre Dataframe

In [29]:
# save your IDs from new project in Spotify Developer Dashboard
CLIENT_ID = '565c1e94bd89447eb3c564ceb55d3ab9'
CLIENT_SECRET = 'd41cc711389645f3b0fdf61695e70dca'

In [30]:
# generate access token

# authentication URL
AUTH_URL = 'https://accounts.spotify.com/api/token'

# POST
auth_response = requests.post(AUTH_URL, {
    'grant_type': 'client_credentials',
    'client_id': CLIENT_ID,
    'client_secret': CLIENT_SECRET,
})

# convert the response to JSON
auth_response_data = auth_response.json()

# save the access token
access_token = auth_response_data['access_token']

In [31]:
# used for authenticating all API calls
headers = {'Authorization': 'Bearer {token}'.format(token=access_token)}

In [32]:
# base URL of all Spotify API endpoints
BASE_URL = 'https://api.spotify.com/v1/'

In [23]:
# create blank dictionary to store audio features
feature_dict = {}

# convert track_uri column to an iterable list
track_uris = df_library['track_uri'].to_list()

# loop through track URIs and pull audio features using the API,
# store all these in a dictionary
for t_uri in track_uris:
    
    feature_dict[t_uri] = {'popularity': 0,
                           'danceability': 0,
                           'energy': 0,
                           'speechiness': 0,
                           'instrumentalness': 0,
                           'tempo': 0}
    
    r = requests.get(BASE_URL + 'tracks/' + t_uri, headers=headers)
    r = r.json()
    feature_dict[t_uri]['popularity'] = r['popularity']
    
    s = requests.get(BASE_URL + 'audio-features/' + t_uri, headers=headers)
    s = s.json()
    feature_dict[t_uri]['danceability'] = s['danceability']
    feature_dict[t_uri]['energy'] = s['energy']
    feature_dict[t_uri]['speechiness'] = s['speechiness']
    feature_dict[t_uri]['instrumentalness'] = s['instrumentalness']
    feature_dict[t_uri]['tempo'] = s['tempo']

In [25]:
# convert dictionary into dataframe with track_uri as the first column
df_features = pd.DataFrame.from_dict(feature_dict, orient='index')
df_features.insert(0, 'track_uri', df_features.index)
df_features.reset_index(inplace=True, drop=True)

df_features.head()

Unnamed: 0,track_uri,popularity,danceability,energy,speechiness,instrumentalness,tempo
0,3nhzPKCm2yqGmgEhdAg19u,57,0.601,0.863,0.0685,0.0,130.189
1,1DHV4JhMnCkbLuf6Psg93N,0,0.562,0.11,0.0363,0.00514,162.548
2,4eE6vZ2vOrceLq4xgz3VmG,0,0.584,0.65,0.0427,0.000283,92.01
3,1Ax5RAq7lXMTF8A5PTqdl4,43,0.459,0.179,0.0391,0.013,127.019
4,5rfvovWBd35hYqk6rWLIrr,30,0.65,0.674,0.419,2e-06,89.996


In [27]:
# save df_tableau and df_genre_expanded as csv files that we can load into Tableau
df_tableau.to_csv('MySpotifyLibraryStreams.csv')
df_features.to_csv('AudioFeaturesTable.csv')

print('done')

done


In [35]:
import jovian
#jovian.commit()

<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>

[jovian] Committed successfully! https://jovian.ai/abode118/spotify-audio-features


'https://jovian.ai/abode118/spotify-audio-features'