In [None]:
import requests
import pprint
import os

import spotipy
import spotipy.util as util
from spotipy.oauth2 import SpotifyClientCredentials

import pandas as pd
import numpy as np

pp = pprint.PrettyPrinter()
datapath = '../Data/'

In [19]:
cid = os.environ.get('SPOTIPY_CLIENT_ID')
secret = os.environ.get('SPOTIPY_CLIENT_SECRET')
username = ""
client_credentials_manager = SpotifyClientCredentials(client_id=cid, client_secret=secret) 
sp = spotipy.Spotify(client_credentials_manager=client_credentials_manager)

# Get read access to your library
scope = 'user-library-read'
token = util.prompt_for_user_token(username, scope)
if token:
    sp = spotipy.Spotify(auth=token)
else:
    print("Can't get token for", username)

# Get favourite songs or playlist songs

In [3]:
def get_features_from_favourites():
    '''
    Returns a dataframe of the current user's favourite songs
    '''
    df_result = pd.DataFrame()
    track_list = ''
    added_ts_list = []
    artist_list = []
    title_list = []

    more_songs = True #As long as there is tracks not fetched from API, continue looping
    offset_index = 0

    while more_songs:
        songs = sp.current_user_saved_tracks(offset=offset_index)

        for song in songs['items']:
            #join track ids to a single string as an input parameter for audio_features function
            track_list += song['track']['id'] +','

            #get the time when the song was added
            added_ts_list.append(song['added_at'])

            #get the title of the song
            title_list.append(song['track']['name'])

            #get all the artists in the song
            artists = song['track']['artists']
            artists_name = ''
            for artist in artists:
                artists_name += artist['name']  + ','
            artist_list.append(artists_name[:-1])

            #get the track features and append into a dataframe
            track_features = sp.audio_features(track_list[:-1])
            df_temp = pd.DataFrame(track_features)
            df_result = df_result.append(df_temp)
            track_list = ''

        if songs['next'] == None:
            # no more songs in playlist
            more_songs = False
        else:
            # get the next n songs
            offset_index += songs['limit']
            print('Progress: ' + str(offset_index) + ' of '+ str(songs['total']))

    #add the timestamp added, title and artists of a song
    df_result['added_at'], df_result['song_title'], df_result['artists'] = added_ts_list, title_list, artist_list
    print('--- COMPLETED ---')
    
    return df_result    


In [4]:
def get_features_from_playlist(user='', playlist_id=''):
    '''
    Takes in a user_id and a playlist_id and returns a dataframe of a user's playlist songs
    '''
    df_result = pd.DataFrame()
    track_list = ''
    uploader_list = []
    added_ts_list = []
    artist_list = []
    title_list = []

    more_songs = True #As long as there is tracks not fetched from API, continue looping
    offset_index = 0
    
    if playlist_id != '' and user == '':
        print("Enter username for playlist")
        return

    while more_songs:
        songs = sp.user_playlist_tracks(user, playlist_id=playlist_id, offset=offset_index)

        for song in songs['items']:
            #join track ids to a single string as an input parameter for audio_features function
            track_list += song['track']['id'] +','

            #get the time when the song was added
            added_ts_list.append(song['added_at'])

            #get the title of the song
            title_list.append(song['track']['name'])

            #get all the artists in the song
            artists = song['track']['artists']
            artists_name = ''
            for artist in artists:
                artists_name += artist['name']  + ','
            artist_list.append(artists_name[:-1])
            
            #get user who added song in the playlist, catering for collaboration playlists
            uploader_list.append(song['added_by']['id'])

            #get the track features and append into a dataframe
            track_features = sp.audio_features(track_list[:-1])
            df_temp = pd.DataFrame(track_features)
            df_result = df_result.append(df_temp)
            track_list = ''

        if songs['next'] == None:
            # no more songs in playlist
            more_songs = False
        else:
            # get the next n songs
            offset_index += songs['limit']
            print('Progress: ' + str(offset_index) + ' of '+ str(songs['total']))

    #add the timestamp added, title and artists of a song
    df_result['added_at'], df_result['song_title'], df_result['artists'] = added_ts_list, title_list, artist_list
    
    #add upload user if in a playlist
    df_result['uploaded_by'] = uploader_list
        
    print('--- COMPLETED ---')
    
    return df_result    


def get_features_from_playlist(user='', playlist_id=''):
    '''
    This returns a dataframe of current user's saved track if user and playlist_id are blank.
    This returns a dataframe of the playlist tracks of a specific user if user and playlist_id are filled.
    '''
    df_result = pd.DataFrame()
    track_list = ''
    uploader_list = []
    added_ts_list = []
    artist_list = []
    title_list = []
    
    all_songs = True #As long as there is tracks not fetched from API, continue looping
    offset_index = 0
    
    if playlist_id != '' and user == '':
        print("Enter username for playlist")
        return
    
    while all_songs:
        if playlist_id == '':
            #Get songs from favourites playlist
            songs = sp.current_user_saved_tracks(limit=20, offset=offset_index)
            offset_index += 20
            if len(songs['items']) != 20:
                all_songs = False
        else:
            #Get songs from playlist of user
            lim = 20
            songs = sp.user_playlist_tracks(user, playlist_id=playlist_id, limit=lim, offset=offset_index)
            offset_index += lim
            if len(songs['items']) < lim:
                all_songs = False  
                   
        for song in songs['items']:
            #join track ids to a single string as an input parameter for audio_features function
            track_list += song['track']['id'] +','
            
            #get the time when the song was added
            added_ts_list.append(song['added_at'])
            
            #get the title of the song
            title_list.append(song['track']['name'])
            
            #get all the artists in the song
            artists = song['track']['artists']
            artists_name = ''
            for artist in artists:
                artists_name += artist['name']  + ','
            artist_list.append(artists_name[:-1])
            
            #get user who added song in the playlist, catering for collaboration playlists
            if playlist_id != '':
                uploader_list.append(song['added_by']['id'])
        
        #get the track features and append into a dataframe
        track_features = sp.audio_features(track_list[:-1])
        df_temp = pd.DataFrame(track_features)
        df_result = df_result.append(df_temp)
        track_list = ''
        print(df_result.shape)
    
    
    #add the timestamp added, title and artists of a song
    df_result['added_at'], df_result['song_title'], df_result['artists'] = added_ts_list, title_list, artist_list
    
    #add upload user if in a playlist
    if playlist_id != '':
        df_result['uploaded_by'] = uploader_list
                   
    return df_result

# Get audio features from playlist

In [5]:
user_playlists = sp.user_playlists(user='jkwd93')

for playlist in user_playlists['items']:
    print(playlist['id'], playlist['name'])

2TB6pDUJDB2vpD04RUqXsy NoSaved
3wXrLUcUJTrKkViXuvDgb9 Lounge
3n2aQEZQGEnmavvfOVd3hY Chill
1KwfC9CiQVSJyMUB6qE0Xj Dance
37i9dQZF1DXdd3gw5QVjt9 Morning Acoustic
0DXoY83tBvgWkd8QH49yAI Acoustic Lounge - Chillout & Easy Listening
37i9dQZF1DWZeKCadgRdKQ Deep Focus
37i9dQZF1DX4E3UdUs7fUx Afternoon Acoustic
37i9dQZF1DWUNIrSzKgQbP Winter Acoustic


In [6]:
df_playlist = get_features_from_playlist(user='jkwd93', playlist_id='3n2aQEZQGEnmavvfOVd3hY')

Progress: 100 of 112
--- COMPLETED ---


In [7]:
df_playlist.shape

(112, 22)

In [8]:
df_playlist.head()

Unnamed: 0,acousticness,analysis_url,danceability,duration_ms,energy,id,instrumentalness,key,liveness,loudness,...,tempo,time_signature,track_href,type,uri,valence,added_at,song_title,artists,uploaded_by
0,0.897,https://api.spotify.com/v1/audio-analysis/69RS...,0.69,178239,0.263,69RSizl3ZhHgpsI9bVTM51,0.000108,9,0.0803,-12.004,...,118.906,4,https://api.spotify.com/v1/tracks/69RSizl3ZhHg...,audio_features,spotify:track:69RSizl3ZhHgpsI9bVTM51,0.182,2018-04-22T12:08:58Z,Two Bodies - Lido Remix,"Flight Facilities,Emma Louise,Lido",jkwd93
0,0.305,https://api.spotify.com/v1/audio-analysis/4Ovs...,0.613,180950,0.228,4Ovsc4owid3ZTPJ3KeByvr,0.198,11,0.0815,-14.623,...,129.671,4,https://api.spotify.com/v1/tracks/4Ovsc4owid3Z...,audio_features,spotify:track:4Ovsc4owid3ZTPJ3KeByvr,0.155,2018-04-22T12:09:31Z,Try,SRNO,jkwd93
0,0.516,https://api.spotify.com/v1/audio-analysis/7yjp...,0.867,209313,0.555,7yjpgcDoe13jjiapRyNAfu,2e-06,0,0.0897,-7.025,...,103.961,4,https://api.spotify.com/v1/tracks/7yjpgcDoe13j...,audio_features,spotify:track:7yjpgcDoe13jjiapRyNAfu,0.672,2018-04-22T12:09:38Z,There For Ya,"Lil Cats,Ovcoco",jkwd93
0,0.741,https://api.spotify.com/v1/audio-analysis/21oJ...,0.486,244910,0.419,21oJ1K99GBJrE2GVQGVjA0,6.6e-05,5,0.096,-6.415,...,119.065,3,https://api.spotify.com/v1/tracks/21oJ1K99GBJr...,audio_features,spotify:track:21oJ1K99GBJrE2GVQGVjA0,0.227,2018-04-22T12:08:42Z,Frozen,Sabrina Claudio,jkwd93
0,0.425,https://api.spotify.com/v1/audio-analysis/4sCo...,0.571,184999,0.336,4sCo5PwVmXiKeItVY1is4M,0.000752,6,0.103,-9.977,...,81.194,4,https://api.spotify.com/v1/tracks/4sCo5PwVmXiK...,audio_features,spotify:track:4sCo5PwVmXiKeItVY1is4M,0.612,2018-04-22T12:12:16Z,Give It All up,"SRNO,Gia Koka",jkwd93


In [9]:
df_playlist.to_hdf(datapath + 'chill_playlist.h5',key='df', mode='w')

# Get audio features from saved tracks

In [10]:
df_saved_tracks = get_features_from_favourites()

Progress: 20 of 457
Progress: 40 of 457
Progress: 60 of 457
Progress: 80 of 457
Progress: 100 of 457
Progress: 120 of 457
Progress: 140 of 457
Progress: 160 of 457
Progress: 180 of 457
Progress: 200 of 457
Progress: 220 of 457
Progress: 240 of 457
Progress: 260 of 457
Progress: 280 of 457
Progress: 300 of 457
Progress: 320 of 457
Progress: 340 of 457
Progress: 360 of 457
Progress: 380 of 457
Progress: 400 of 457
Progress: 420 of 457
Progress: 440 of 457
--- COMPLETED ---


In [11]:
df_saved_tracks.shape

(457, 21)

In [12]:
df_saved_tracks.head()

Unnamed: 0,acousticness,analysis_url,danceability,duration_ms,energy,id,instrumentalness,key,liveness,loudness,...,speechiness,tempo,time_signature,track_href,type,uri,valence,added_at,song_title,artists
0,0.172,https://api.spotify.com/v1/audio-analysis/7uiB...,0.441,187298,0.527,7uiBocndm12aKbsdnQ3Scx,0.0,3,0.233,-5.318,...,0.0966,96.37,4,https://api.spotify.com/v1/tracks/7uiBocndm12a...,audio_features,spotify:track:7uiBocndm12aKbsdnQ3Scx,0.589,2019-02-03T06:28:43Z,Undecided,Chris Brown
0,0.153,https://api.spotify.com/v1/audio-analysis/4w8n...,0.841,212500,0.798,4w8niZpiMy6qz1mntFA5uM,3e-06,1,0.0618,-4.206,...,0.229,95.948,4,https://api.spotify.com/v1/tracks/4w8niZpiMy6q...,audio_features,spotify:track:4w8niZpiMy6qz1mntFA5uM,0.591,2019-02-02T02:58:19Z,"Taki Taki (with Selena Gomez, Ozuna & Cardi B)","DJ Snake,Selena Gomez,Ozuna,Cardi B"
0,0.297,https://api.spotify.com/v1/audio-analysis/5p7u...,0.752,201661,0.488,5p7ujcrUXASCNwRaWNHR1C,9e-06,6,0.0936,-7.05,...,0.0705,136.041,4,https://api.spotify.com/v1/tracks/5p7ujcrUXASC...,audio_features,spotify:track:5p7ujcrUXASCNwRaWNHR1C,0.533,2019-02-01T13:55:57Z,Without Me,Halsey
0,0.243,https://api.spotify.com/v1/audio-analysis/3RmK...,0.909,229933,0.539,3RmKpob8xzv1pzHEQrMJah,0.0,8,0.0674,-4.278,...,0.116,90.038,4,https://api.spotify.com/v1/tracks/3RmKpob8xzv1...,audio_features,spotify:track:3RmKpob8xzv1pzHEQrMJah,0.869,2019-01-28T12:03:46Z,Let Me Blow Ya Mind,"Eve,Gwen Stefani"
0,0.619,https://api.spotify.com/v1/audio-analysis/2dn1...,0.545,205500,0.573,2dn1SdbwnCliatWZ8Ls3O5,0.00408,5,0.104,-6.007,...,0.246,118.07,3,https://api.spotify.com/v1/tracks/2dn1SdbwnCli...,audio_features,spotify:track:2dn1SdbwnCliatWZ8Ls3O5,0.344,2019-01-22T00:30:57Z,In Between (feat. BANKS) - Bonus,"6LACK,BANKS"


In [13]:
df_saved_tracks.to_hdf(datapath + 'saved_tracks.h5',key='df', mode='w')

# Get audio features from discover weekly

In [14]:
tastebreak = '37i9dQZF1EjaG34DgvTcv9'
discover_weekly = '37i9dQZEVXcEdB6HFaFbVC'

playlist = discover_weekly

df_playlist = get_features_from_playlist(user='jkwd93', playlist_id=playlist)

--- COMPLETED ---


In [15]:
df_playlist.shape

(30, 22)

In [16]:
df_playlist.head()

Unnamed: 0,acousticness,analysis_url,danceability,duration_ms,energy,id,instrumentalness,key,liveness,loudness,...,tempo,time_signature,track_href,type,uri,valence,added_at,song_title,artists,uploaded_by
0,0.611,https://api.spotify.com/v1/audio-analysis/6q41...,0.619,225747,0.678,6q41Dsz7ojekYx6be5ZQfu,1e-06,0,0.31,-5.483,...,173.902,4,https://api.spotify.com/v1/tracks/6q41Dsz7ojek...,audio_features,spotify:track:6q41Dsz7ojekYx6be5ZQfu,0.891,2019-02-03T16:00:00Z,Come To LA,Zak Waters,
0,0.53,https://api.spotify.com/v1/audio-analysis/66Zf...,0.814,188962,0.481,66ZfURYP8s2TgTVQuia0Oq,6e-06,5,0.129,-6.448,...,119.008,4,https://api.spotify.com/v1/tracks/66ZfURYP8s2T...,audio_features,spotify:track:66ZfURYP8s2TgTVQuia0Oq,0.573,2019-02-03T16:00:00Z,Parachute,NSTASIA,
0,0.287,https://api.spotify.com/v1/audio-analysis/20R2...,0.726,243191,0.576,20R2rF8szcx4VNA6FDRKwo,0.000266,4,0.255,-3.184,...,76.044,4,https://api.spotify.com/v1/tracks/20R2rF8szcx4...,audio_features,spotify:track:20R2rF8szcx4VNA6FDRKwo,0.72,2019-02-03T16:00:00Z,Butterscotch,"Robotaki,Jamie Fine,falcxne",
0,0.682,https://api.spotify.com/v1/audio-analysis/2VUF...,0.703,260627,0.61,2VUFlOjcXAEuvMOpQ6mz7Y,0.0,7,0.0999,-5.254,...,114.963,4,https://api.spotify.com/v1/tracks/2VUFlOjcXAEu...,audio_features,spotify:track:2VUFlOjcXAEuvMOpQ6mz7Y,0.663,2019-02-03T16:00:00Z,Broke,"Jennifer Chung,Joules",
0,0.303,https://api.spotify.com/v1/audio-analysis/228J...,0.495,218936,0.507,228JFCIhBB3dQ1fNqTPsUN,0.0,11,0.348,-5.156,...,145.774,4,https://api.spotify.com/v1/tracks/228JFCIhBB3d...,audio_features,spotify:track:228JFCIhBB3dQ1fNqTPsUN,0.524,2019-02-03T16:00:00Z,Rocket,Johnny Stimson,


In [17]:
import datetime

now = datetime.datetime.now()
ddmmyyyy = str("{:02d}".format(now.day)) + str("{:02d}".format(now.month)) + str(now.year)

savepath = datapath + 'discover_weekly_' + ddmmyyyy + '.h5'
df_playlist.to_hdf(savepath,key='df', mode='w')