In [1]:
import requests, time, os
import pandas as pd
from pandas.io.json import json_normalize
import spotipy
from spotipy.oauth2 import SpotifyClientCredentials
from functools import reduce
import operator

In [2]:
client_credentials_manager = SpotifyClientCredentials()
spotify = spotipy.Spotify(client_credentials_manager=SpotifyClientCredentials())
sp = spotipy.Spotify(client_credentials_manager=client_credentials_manager)

In [3]:
def spotify_playlist_tracks(playlist_id):
    """
    Takes a spotify playlist ID and returns a pandas dataframe 
    containing the artist name, album name, track name, and track id.
    
    """
    
    offset = 0
    track_listings = []
    
    touch = sp.playlist_tracks(playlist_id, 
                               fields="track(name%2C%20id%2C%20album(name))", 
                               limit = 1, offset = 0, market='ES')
    
    num_listings = touch['total']  # see how many total songs in the playlist

    for ii in range(0,num_listings, 100):
        tracks = sp.playlist_tracks(playlist_id, 
                                    fields="track(name%2C%20id%2C%20album(name))", 
                                    limit = 100, offset = ii, market='ES')
        while tracks:
            for i, track in enumerate(tracks['items']):
                track_listings.append([tracks['items'][i]['track']['artists'][0]['name'],
                                       tracks['items'][i]['track']['album']['name'], 
                                       tracks['items'][i]['track']['name'], 
                                       tracks['items'][i]['track']['id']])

            if tracks['next']:
                tracks = sp.next(tracks)
            else:
                tracks = None
                
        time.sleep(0.3) #To make sure we don't abuse spotify's API terms
        
    columns = ['artist', 'album', 'track', 'track_id']

    track_listings_df = pd.DataFrame(track_listings, columns = columns)
        
    return track_listings_df

In [7]:
def spotify_track_features(track_ids):
    """
    Takes a list of spotify track IDs and returns a pandas dataframe 
    containing various features of each track.
    
    """
    
    track_features = []
    
    for ii in range(0, len(track_ids), 100):
        if ii <= (len(track_ids)-100):
            track_features.append(sp.audio_features(track_ids[ii:ii+100]))
        else:
            track_features.append(sp.audio_features(track_ids[ii:]))
            
        time.sleep(0.2)
    
    track_features = reduce(operator.add, track_features)
    
    track_features = list(filter(None, track_features)) # Makes sure there are no NoneType in the list, 
                                                        # this happens when API returns nothing for given ID.
    track_features_df = json_normalize(track_features) # Turn JSON format to pandas dataframe
        
    return track_features_df

In [13]:
def pull_songs_and_feats(playlist_id):
    """
    Takes a playlist ID and automates calling and merging the 
    track identifiers with the track features.
    
    """
    
    track_df = spotify_playlist_tracks(playlist_id)
    
    track_ids = list(track_df['track_id'])
    
    track_features_df = spotify_track_features(track_ids)
    
    playlist_tracks = track_df.merge(track_features_df, how = 'right', 
                                     left_on = 'track_id', right_on = 'id')
    
    playlist_tracks = playlist_tracks.drop(columns = ['id', 'uri', 'track_href', 
                                                      'analysis_url', 'type'])
    playlist_tracks = playlist_tracks.drop_duplicates()
    
    return playlist_tracks 

In [23]:
# # tested if it works for other playlists
# playlist_id1 = '3AjnEOZV1nJDSFfdcJMJT5'

# playlist_id2 = '5PKZSKuHP4d27SXO5fB9Wl'

playlist_id3 = '66iUD25UNnmrF1Mi9Ild4I'

test = pull_songs_and_feats(playlist_id3)
print(test.shape)
test.head()

(12, 17)


Unnamed: 0,artist,album,track,track_id,danceability,energy,key,loudness,mode,speechiness,acousticness,instrumentalness,liveness,valence,tempo,duration_ms,time_signature
0,Ka,Grief Pedigree,No Downtime,6LrPl1nylKSnNC1QZfKt6P,0.719,0.536,7,-9.751,1,0.183,0.701,0.0,0.438,0.58,88.051,231413,4
1,Ka,Grief Pedigree,Cold Facts,0UBvLSf6TxQJV64CZCbUtH,0.662,0.782,7,-7.04,1,0.191,0.655,0.000746,0.162,0.585,104.317,169093,4
2,Baro Sura,Just Problems You Need To Know,PRETTY,7GpsNv6TnecAGWZLAOu4w3,0.669,0.693,1,-7.449,1,0.0712,0.127,0.000921,0.134,0.451,121.924,130227,4
3,Baro Sura,HOWGOODISGOOD?,Travellin' Through Time,6aQuNtFKJDXPNrOejOWxox,0.661,0.443,9,-9.454,1,0.365,0.667,0.0,0.0958,0.623,84.039,179821,4
4,Aminé,ONEPOINTFIVE,REEL IT IN,5qHirGR7M9tdm6C17DlzSY,0.754,0.342,10,-8.677,0,0.0991,0.073,0.0,0.193,0.423,128.981,121347,4


In [24]:
# test.to_csv('test_case_rap.csv', index = False) #save test case(s)

In [14]:
playlist_id = '7htu5ftbLBRFAwiuHVcUAg'
track_master_df = pull_songs_and_feats(playlist_id)
print(track_master_df.shape)
track_master_df.head()

(9650, 17)


Unnamed: 0,artist,album,track,track_id,danceability,energy,key,loudness,mode,speechiness,acousticness,instrumentalness,liveness,valence,tempo,duration_ms,time_signature
0,Lorde,Pure Heroine,Ribs,2MvvoeRt8NcOXWESkxWn3g,0.511,0.472,4,-9.277,1,0.091,0.534,0.614,0.11,0.0399,127.978,258969,4
1,Clairo,Immunity,Bags,6UFivO2zqqPFPoQYsEMuCc,0.742,0.546,1,-7.694,1,0.0315,0.172,0.38,0.115,0.868,104.996,260520,4
2,The Script,The Script,The Man Who Can't Be Moved,4Musyaro0NM5Awx8b5c627,0.609,0.629,10,-5.024,1,0.0264,0.425,0.0,0.0978,0.325,99.955,241467,4
3,Adele,21,Rolling in the Deep,1eq1wUnLVLg4pdEfx9kajC,0.73,0.77,8,-5.114,1,0.0298,0.138,0.0,0.0473,0.507,104.948,228093,4
7,Bruno Mars,Doo-Wops & Hooligans,Grenade,4lLtanYk6tkMvooU0tWzG8,0.706,0.558,2,-7.237,0,0.0593,0.146,0.0,0.118,0.227,110.443,223253,4


In [15]:
# track_master_df.to_csv('track_master_df.csv', index = False) #save point

In [22]:
# sp.search(q='genre:"hip hop"', limit = 50, type= 'artist') #returns artists from a given genre