In [1]:
import requests, time, os
import pandas as pd
from pandas.io.json import json_normalize
import spotipy
from spotipy.oauth2 import SpotifyClientCredentials
from functools import reduce
import operator
import csv

In [2]:
client_credentials_manager = SpotifyClientCredentials()
spotify = spotipy.Spotify(client_credentials_manager=SpotifyClientCredentials())
sp = spotipy.Spotify(client_credentials_manager=client_credentials_manager)

In [3]:
def spotify_playlist_tracks(playlist_id):
    """
    Takes a spotify playlist ID and returns a pandas dataframe 
    containing the artist name, album name, track name, and track id.
    
    """
    
    offset = 0
    track_listings = []
    
    touch = sp.playlist_tracks(playlist_id, 
                               fields="track(name%2C%20id%2C%20album(name))", 
                               limit = 1, offset = 0, market='ES')
    
    num_listings = touch['total']  # see how many total songs in the playlist

    for ii in range(0,num_listings, 100):
        tracks = sp.playlist_tracks(playlist_id, 
                                    fields="track(name%2C%20id%2C%20album(name))", 
                                    limit = 100, offset = ii, market='ES')
        while tracks:
            for i, track in enumerate(tracks['items']):
                track_listings.append([tracks['items'][i]['track']['artists'][0]['name'],
                                       tracks['items'][i]['track']['album']['name'], 
                                       tracks['items'][i]['track']['name'], 
                                       tracks['items'][i]['track']['id']])

            if tracks['next']:
                tracks = sp.next(tracks)
            else:
                tracks = None
                
        time.sleep(0.3) #To make sure we don't abuse spotify's API terms
        
    columns = ['artist', 'album', 'track', 'track_id']

    track_listings_df = pd.DataFrame(track_listings, columns = columns)
        
    return track_listings_df

In [4]:
def spotify_track_features(track_ids):
    """
    Takes a list of spotify track IDs and returns a pandas dataframe 
    containing various features of each track.
    
    """
    
    track_features = []
    
    for ii in range(0, len(track_ids), 100):
        if ii <= (len(track_ids)-100):
            track_features.append(sp.audio_features(track_ids[ii:ii+100]))
        else:
            track_features.append(sp.audio_features(track_ids[ii:]))
            
        time.sleep(0.2)
    
    track_features = reduce(operator.add, track_features)
    
    track_features = list(filter(None, track_features)) # Makes sure there are no NoneType in the list, 
                                                        # this happens when API returns nothing for given ID.
    track_features_df = json_normalize(track_features) # Turn JSON format to pandas dataframe
        
    return track_features_df

In [34]:
def pull_songs_and_feats(playlist_id):
    """
    Takes a playlist ID and automates calling and merging the 
    track identifiers with the track features.
    
    """
    
    track_df = spotify_playlist_tracks(playlist_id)
    
    id_list = list(track_df['track_id'])
    
    track_ids = list(filter(None, id_list))
    
    track_features_df = spotify_track_features(track_ids)
    
    playlist_tracks = track_df.merge(track_features_df, how = 'right', 
                                     left_on = 'track_id', right_on = 'id')
    
    playlist_tracks = playlist_tracks.drop(columns = ['id', 'uri', 'track_href', 
                                                      'analysis_url', 'type'])
    playlist_tracks = playlist_tracks.drop_duplicates()
    
    return playlist_tracks 

In [6]:
# # tested if it works for other playlists
# playlist_id1 = '3AjnEOZV1nJDSFfdcJMJT5'

# playlist_id2 = '5PKZSKuHP4d27SXO5fB9Wl'

playlist_id3 = '37i9dQZF1DXdURFimg6Blm'

test = pull_songs_and_feats(playlist_id3)
print(test.shape)
test.head()

(96, 17)


Unnamed: 0,artist,album,track,track_id,danceability,energy,key,loudness,mode,speechiness,acousticness,instrumentalness,liveness,valence,tempo,duration_ms,time_signature
0,Meek Mill,Championships,Going Bad (feat. Drake),2IRZnDFmlqMuOrYOLnZZyc,0.889,0.496,4,-6.365,0,0.0905,0.259,0.0,0.252,0.544,86.003,180522,4
1,J. Cole,MIDDLE CHILD,MIDDLE CHILD,2JvzF1RMd7lE3KmFlsyZD8,0.837,0.364,8,-11.713,1,0.276,0.149,0.0,0.271,0.463,123.984,213594,4
2,Travis Scott,ASTROWORLD,SICKO MODE,2xLMifQCjDGFmkHkpNLD9h,0.834,0.73,8,-3.714,1,0.222,0.00513,0.0,0.124,0.446,155.008,312820,4
3,Mustard,Perfect Ten,Pure Water (with Migos),3j84U36KvLeXNDPv4t5pI8,0.682,0.559,0,-5.545,1,0.127,0.174,0.0,0.344,0.137,202.015,192471,4
4,A Boogie Wit da Hoodie,Hoodie SZN,Swervin (feat. 6ix9ine),1wJRveJZLSb1rjhnUHQiv6,0.581,0.662,9,-5.239,1,0.303,0.0153,0.0,0.111,0.434,93.023,189487,4


In [7]:
test.to_csv('test_case_ez.csv', index = False) #save test case(s)

In [27]:
playlist_id = '7htu5ftbLBRFAwiuHVcUAg'
track_master_df = pull_songs_and_feats(playlist_id)
print(track_master_df.shape)
track_master_df.head()

In [28]:
track_master_df = pd.read_csv('track_master_df.csv')
track_master_df.shape

(9650, 17)

In [26]:
#update master dataframe for more recommendations.
playlist_updates = open('playlists.txt', 'r')
reader = csv.reader(playlist_updates)
allRows = [row for row in reader]

playlist_list = allRows[0]

In [35]:
for playlist in playlist_list:
    print(playlist)
    track_master_df = pd.concat([track_master_df, pull_songs_and_feats(playlist)])
    track_master_df = track_master_df.drop_duplicates()

In [36]:
track_master_df.shape #now it has 28k songs, triple the original

(28929, 17)

In [37]:
track_master_df.to_csv('track_master_df.csv', index = False) #save point

In [22]:
# sp.search(q='genre:"hip hop"', limit = 50, type= 'artist') #returns artists from a given genre