In [1]:
!pip3 install spotipy

Defaulting to user installation because normal site-packages is not writeable


In [2]:
import sys
import json
import spotipy
import spotipy.util as util
import os
import pandas as pd
import configparser

In [4]:
scope = 'user-library-read'

config = configparser.ConfigParser()
config.read('config.ini')

scope = 'user-library-read'
username = config['SPOTIFY']['username']
client_id=config['SPOTIFY']['client_id']
client_secret=config['SPOTIFY']['client_secret']
redirect_uri=config['SPOTIFY']['redirect_uri']

token = util.prompt_for_user_token(username, scope,
                           client_id=client_id,
                           client_secret=client_secret,
                           redirect_uri=redirect_uri)

if token:
    print("got token")
    sp = spotipy.Spotify(auth=token)
    print("created client")
else:
    sp = None
    print("No token")

got token
created client


In [132]:
def getAllSongs(sp):
    """
    Returns list of data on all saved tracks raw from the spotify api.    
    When running for the first time, saves json of the responses locally.
    Else, it just reads the saved json
    """
    if not sp:
        print("spotify client not initialized")
        
    data = []
    my_songs_filepath = './data/my_songs.json'
    
    if os.path.exists(my_songs_filepath):
        print("loading existing json of my songs")
        with open(my_songs_filepath, 'r') as f:
            data = json.load(f)
    else:
        offset = 0
        limit = 50
        print("getting data from spotify...")
        while True:
            results = sp.current_user_saved_tracks(limit, offset)
            if 'items' not in results:
                print("items missing from results, printing response")
                print(results)
                break
            
            print("fetched... offset: %s" % offset)
            if len(results['items']):
                data.extend(results['items'])
                offset += limit
            else:
                print("got all songs... saving to json.")
                with open(my_songs_filepath, 'w') as json_file:
                    json.dump(data, json_file)
                break
    return data

In [133]:
def getAudioFeatures(sp, song_data=[]):
    """
    Returns list of audio features on all saved tracks raw from the spotify api.    
    When running for the first time, saves json of the responses locally. 
    Else, it just reads the saved json
    Can hit the API in batches of 50.
    """
    data = []
    audio_features_filepath = 'data/audio_features.json'
    
    if os.path.exists(audio_features_filepath):
        print("loading existing json of audio features")
        with open(audio_features_filepath, 'r') as f:
            data = json.load(f)
    elif not len(song_data):
        print("existing json of audio features doesn't exist and song data not provided.")
    else:
        print("fecthing audio features from spotify...")
        
        # chunk song data into batch of 50 to get audio features of 50 songs at a time
        batch_size = 50
        for i in range(0, len(song_data), batch_size):
            track_ids = []
            for j in range(i, i+50):
                track_id = song_data[j]['track']['uri'].split(':')[-1]
                track_ids.append(track_id)
            
            results = sp.audio_features(track_ids)
            print("fetched audio features... (%s/%s)" % (i, len(song_data)))
            data.extend(results)
        
        print("got all audio features... saving to json.")
        with open(audio_features_filepath, 'w') as json_file:
            json.dump(data, json_file)
            
    return data

In [134]:
# we probably won't need this for this project

# def getAudioAnalysis(sp, song_data=[]):
#     """
#     Returns list of audio analysis on all saved tracks raw from the spotify api.    
#     When running for the first time, saves json of the responses locally.
#     Else, it just reads the saved json
#     """
#     data = []
#     audio_analysis_filepath = 'data/audio_analysis.json'
    
#     if os.path.exists(audio_analysis_filepath):
#         print("loading existing json of audio analysis")
#         with open(audio_analysis_filepath, 'r') as f:
#             data = json.load(f)
#     elif not len(song_data):
#         print("existing json of audio analysis doesn't exist and song data not provided.")
#     else:
#         print("fecthing audio analysis from spotify...")
        
#         # can only grab these one song at a time per api unfortunately
#         for i in range(0, len(song_data)):
#             track_uri = song_data[i]['track']['uri']
#             results = sp.audio_analysis(track_uri)
#             if i % 50 == 0:
#                 print("fetched audio analysis... (%s/%s)" % (i, len(song_data)))
            
#             # deleting `codestring` and `echoprintstring` fields as it takes up a lot of space and  seems useless right now
#             del results['track']['codestring']
#             del results['track']['echoprintstring']
#             del results['track']['synchstring']
#             del results['track']['rhythmstring']
#             print(results)
#             break
#             data.append(results)
        
# #         print("got all audio analysis... saving to json.")
# #         with open(audio_analysis_filepath, 'w') as json_file:
# #             json.dump(data, json_file)
            
#     return data

In [135]:
song_data = getAllSongs(sp)
print(song_data[0])

loading existing json of my songs
{'added_at': '2020-03-26T16:57:05Z', 'track': {'album': {'album_type': 'album', 'artists': [{'external_urls': {'spotify': 'https://open.spotify.com/artist/2OZfuhYQm8IY95egVPC1U9'}, 'href': 'https://api.spotify.com/v1/artists/2OZfuhYQm8IY95egVPC1U9', 'id': '2OZfuhYQm8IY95egVPC1U9', 'name': 'The Shivas', 'type': 'artist', 'uri': 'spotify:artist:2OZfuhYQm8IY95egVPC1U9'}], 'available_markets': ['AD', 'AE', 'AR', 'AT', 'AU', 'BE', 'BG', 'BH', 'BO', 'BR', 'CA', 'CH', 'CL', 'CO', 'CR', 'CY', 'CZ', 'DE', 'DK', 'DO', 'DZ', 'EC', 'EE', 'EG', 'ES', 'FI', 'FR', 'GB', 'GR', 'GT', 'HK', 'HN', 'HU', 'ID', 'IE', 'IL', 'IN', 'IS', 'IT', 'JO', 'JP', 'KW', 'LB', 'LI', 'LT', 'LU', 'LV', 'MA', 'MC', 'MT', 'MX', 'MY', 'NI', 'NL', 'NO', 'NZ', 'OM', 'PA', 'PE', 'PH', 'PL', 'PS', 'PT', 'PY', 'QA', 'RO', 'SA', 'SE', 'SG', 'SK', 'SV', 'TH', 'TN', 'TR', 'TW', 'US', 'UY', 'VN', 'ZA'], 'external_urls': {'spotify': 'https://open.spotify.com/album/6RjA3lF4xJ1UdmSXTK3APZ'}, 'href': 'h

In [44]:
audio_features = getAudioFeatures(sp, song_data)

loading existing json of audio features


In [102]:
# need uri, artist, album, track, popularity, duration, release date\
def transformSongData(song_data):
    transformed_song_data = []
    for i in range(len(song_data)):
        transformed_data = {}
        transformed_data['uri'] = song_data[i]['track']['uri']
        transformed_data['artist'] = song_data[i]['track']['artists'][0]['name']
        transformed_data['album'] = song_data[i]['track']['album']['name']
        transformed_data['track'] = song_data[i]['track']['name']
        transformed_data['popularity'] = song_data[i]['track']['popularity']
        transformed_data['release_date'] = song_data[i]['track']['album']['release_date']
        transformed_song_data.append(transformed_data)
    
    return transformed_song_data

In [108]:
audio_df = pd.DataFrame.from_dict(audio_features)
audio_df = audio_df[['danceability', 'energy', 'key', 'loudness', 'mode', 'speechiness', \
                     'acousticness', 'instrumentalness', 'liveness', 'valence', 'tempo', \
                     'uri', 'duration_ms', 'time_signature']]

In [109]:
song_df = pd.DataFrame.from_dict(transformSongData(song_data))

In [118]:
song_df

Unnamed: 0,uri,artist,album,track,popularity,release_date
0,spotify:track:4a0FM5gDxCenHiLS17Edvr,The Shivas,Freezing to Death,"Look So Good, Be So Good",33,2010-04-01
1,spotify:track:1eN42Q7IWRzRBq8eW2Y2TE,Simon & Garfunkel,Bridge Over Troubled Water,El Condor Pasa (If I Could),62,1970-01-26
2,spotify:track:4SzmBRbDVmi0z4Lnc6H1Za,Against All Logic,2012 - 2017,This Old House Is All I Have,56,2018-02-17
3,spotify:track:3OeUlriM0EZHdWleJtjoVr,George Harrison,Cloud Nine,Got My Mind Set On You - 2004 Mix,70,1987-11-02
4,spotify:track:6wLMO8GUyJrZuBwnf4sgsL,Dawes,Nothing Is Wrong,A Little Bit Of Everything,56,2011-06-07
...,...,...,...,...,...,...
6645,spotify:track:00hf8ngxJLTabS1fw4b7Jt,Electric Light Orchestra,20 Songs of Electric Light Orchestra,Whiskey Girls,0,2012-08-29
6646,spotify:track:4ZAFSG9ge8Zy6w0oSOXgvw,Electric Light Orchestra,20 Songs of Electric Light Orchestra,Hold On Tight,0,2012-08-29
6647,spotify:track:6pyWK3X6WrSnEUss628VQP,Electric Light Orchestra,20 Songs of Electric Light Orchestra,Strange Magic,0,2012-08-29
6648,spotify:track:0knz7XF2MvWEUdK2E8obBy,Electric Light Orchestra,20 Songs of Electric Light Orchestra,Sweet Talking Woman,0,2012-08-29


In [119]:
audio_df

Unnamed: 0,danceability,energy,key,loudness,mode,speechiness,acousticness,instrumentalness,liveness,valence,tempo,uri,duration_ms,time_signature
0,0.515,0.872,9,-8.466,1,0.0324,0.21200,0.712000,0.1420,0.480,118.803,spotify:track:4a0FM5gDxCenHiLS17Edvr,359800,4
1,0.330,0.214,4,-17.699,0,0.0311,0.83600,0.070100,0.1780,0.275,147.795,spotify:track:1eN42Q7IWRzRBq8eW2Y2TE,187040,4
2,0.690,0.804,10,-6.288,0,0.0675,0.31200,0.775000,0.1710,0.482,86.974,spotify:track:4SzmBRbDVmi0z4Lnc6H1Za,218960,4
3,0.767,0.854,4,-6.947,1,0.0426,0.46500,0.000002,0.4850,0.963,149.206,spotify:track:3OeUlriM0EZHdWleJtjoVr,234653,4
4,0.510,0.393,5,-9.526,1,0.0364,0.39500,0.000000,0.1380,0.269,143.739,spotify:track:6wLMO8GUyJrZuBwnf4sgsL,339493,4
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
6645,0.409,0.829,2,-8.039,1,0.0436,0.00346,0.000000,0.8040,0.606,128.968,spotify:track:00hf8ngxJLTabS1fw4b7Jt,223422,4
6646,0.329,0.741,7,-8.370,1,0.0357,0.00036,0.000000,0.0557,0.961,152.712,spotify:track:4ZAFSG9ge8Zy6w0oSOXgvw,117573,4
6647,0.228,0.405,9,-11.929,0,0.0306,0.78700,0.000016,0.7980,0.151,177.860,spotify:track:6pyWK3X6WrSnEUss628VQP,133187,4
6648,0.524,0.835,0,-7.505,1,0.0427,0.38100,0.000000,0.3640,0.443,128.902,spotify:track:0knz7XF2MvWEUdK2E8obBy,124720,4


In [122]:
joined_df = pd.concat([song_df, audio_df], axis=1, join='inner')
joined_df.drop(['uri'], axis=1)

Unnamed: 0,artist,album,track,popularity,release_date,danceability,energy,key,loudness,mode,speechiness,acousticness,instrumentalness,liveness,valence,tempo,duration_ms,time_signature
0,The Shivas,Freezing to Death,"Look So Good, Be So Good",33,2010-04-01,0.515,0.872,9,-8.466,1,0.0324,0.21200,0.712000,0.1420,0.480,118.803,359800,4
1,Simon & Garfunkel,Bridge Over Troubled Water,El Condor Pasa (If I Could),62,1970-01-26,0.330,0.214,4,-17.699,0,0.0311,0.83600,0.070100,0.1780,0.275,147.795,187040,4
2,Against All Logic,2012 - 2017,This Old House Is All I Have,56,2018-02-17,0.690,0.804,10,-6.288,0,0.0675,0.31200,0.775000,0.1710,0.482,86.974,218960,4
3,George Harrison,Cloud Nine,Got My Mind Set On You - 2004 Mix,70,1987-11-02,0.767,0.854,4,-6.947,1,0.0426,0.46500,0.000002,0.4850,0.963,149.206,234653,4
4,Dawes,Nothing Is Wrong,A Little Bit Of Everything,56,2011-06-07,0.510,0.393,5,-9.526,1,0.0364,0.39500,0.000000,0.1380,0.269,143.739,339493,4
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
6645,Electric Light Orchestra,20 Songs of Electric Light Orchestra,Whiskey Girls,0,2012-08-29,0.409,0.829,2,-8.039,1,0.0436,0.00346,0.000000,0.8040,0.606,128.968,223422,4
6646,Electric Light Orchestra,20 Songs of Electric Light Orchestra,Hold On Tight,0,2012-08-29,0.329,0.741,7,-8.370,1,0.0357,0.00036,0.000000,0.0557,0.961,152.712,117573,4
6647,Electric Light Orchestra,20 Songs of Electric Light Orchestra,Strange Magic,0,2012-08-29,0.228,0.405,9,-11.929,0,0.0306,0.78700,0.000016,0.7980,0.151,177.860,133187,4
6648,Electric Light Orchestra,20 Songs of Electric Light Orchestra,Sweet Talking Woman,0,2012-08-29,0.524,0.835,0,-7.505,1,0.0427,0.38100,0.000000,0.3640,0.443,128.902,124720,4


In [123]:
joined_df.describe()

Unnamed: 0,popularity,danceability,energy,key,loudness,mode,speechiness,acousticness,instrumentalness,liveness,valence,tempo,duration_ms,time_signature
count,6650.0,6650.0,6650.0,6650.0,6650.0,6650.0,6650.0,6650.0,6650.0,6650.0,6650.0,6650.0,6650.0,6650.0
mean,30.190376,0.505176,0.62357,5.165564,-8.837362,0.709774,0.066125,0.296084,0.167453,0.220162,0.506533,120.947989,241972.5,3.879549
std,20.604524,0.153569,0.234191,3.5386,4.59091,0.453901,0.092409,0.312634,0.290538,0.192347,0.248319,29.419335,128110.9,0.461891
min,0.0,0.0,0.0,0.0,-60.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,5560.0,0.0
25%,13.0,0.399,0.47,2.0,-10.70175,0.0,0.0327,0.022125,4e-05,0.1,0.317,98.79475,178779.8,4.0
50%,33.0,0.511,0.657,5.0,-7.883,1.0,0.0409,0.167,0.00426,0.138,0.514,119.75,222960.0,4.0
75%,46.0,0.611,0.815,9.0,-5.83875,1.0,0.060175,0.53,0.182,0.283,0.706,138.55975,274460.2,4.0
max,85.0,0.968,0.997,11.0,0.49,1.0,0.952,0.996,0.996,0.994,0.983,215.895,3816373.0,5.0


In [126]:
joined_df.to_csv('data/songs_with_features.csv')