In [1]:
import config
import pandas as pd

In [2]:
import spotipy
import json
from spotipy.oauth2 import SpotifyClientCredentials


#Initialize SpotiPy with user credentias
sp = spotipy.Spotify(auth_manager=SpotifyClientCredentials(client_id= config.client_id,
                                                           client_secret= config.client_secret))

# The "sp" variable has two useful funtions:
# The first usefull function is:
# .search(q='',limit=n)
# .search(q="track:"+song_name+" artist:"+artist_name,limit=5) to restrict to a song name and artist.
# Where the "q" keyword is the query you want to perform on spotify: song_name, artist,...
# while The "limit" keyword will limit the number of returned results.
#
# The second usefull function is:
# .audio_features([URL|URI|ID])
# which returns some 'features of the song', that after cleanup, we can use in order to characterize a song.


In [3]:
 def get_playlist_tracks(username, playlist_id):
        results = sp.user_playlist_tracks(username,playlist_id)
        tracks = results['items']
        while results['next']:
            results = sp.next(results)
            tracks.extend(results['items'])
        return tracks 

In [4]:
results = []
playlists = ['5n1zRXFeRONJoMdVw18M34',
             '4riovLwMCrY3q0Cd4e0Sqp',
             '4G00RTgXb2dU4FieXsjBS4',
             '37i9dQZF1DXcBWIGoYBM5M',
             '37i9dQZEVXbMDoHDwVN2tF',
             '37i9dQZF1DX0XUsuxWHRQd',
             '37i9dQZF1DX10zKzsJ2jva',
             '37i9dQZF1DWY7IeIP1cdjF',
             '37i9dQZF1DWXRqgorJj26U',
             '37i9dQZF1DWWMOmoXKqHTD',
             '37i9dQZF1DX4o1oenSJRJd',
             '37i9dQZF1DX4UtSsGT1Sbe',
             '37i9dQZF1DX76Wlfdnj7AP',
             '37i9dQZF1DXbTxeAdrVG2l',
             '37i9dQZF1DX4WYpdgoIcn6',
             '37i9dQZF1DX3rxVfibe1L0',
             '37i9dQZF1DX1lVhptIYRda',
             '37i9dQZF1DWSqmBTGDYngZ',
             '37i9dQZF1DX186v583rmzp',
             '37i9dQZF1DX4sWSpwq3LiO',
             '0vvXsWCC9xrXsKd4FyS8kM',
             '37i9dQZF1DXdSjVZQzv2tl',
             '37i9dQZF1DX08mhnhv6g9b']
for i in playlists:
    results += get_playlist_tracks('spotify', i) 

In [5]:
playlist_tracks_id = []
playlist_tracks_titles = []
playlist_tracks_artists = []
playlist_tracks_first_artists = []
playlist_tracks_first_release_date = []
playlist_tracks_popularity = []

for i in range(len(results)):
    if i == 0:
        playlist_tracks_id = results[i]['track']['id']
        playlist_tracks_titles = results[i]['track']['name']
        playlist_tracks_first_release_date = results[i]['track']['album']['release_date']
        playlist_tracks_popularity = results[i]['track']['popularity']

        artist_list = []
        for artist in results[i]['track']['artists']:
            artist_list= artist['name']
        playlist_tracks_artists = artist_list

        features = sp.audio_features(playlist_tracks_id)
        features_df = pd.DataFrame(data=features, columns=features[0].keys())
        features_df['title'] = playlist_tracks_titles
        features_df['all_artists'] = playlist_tracks_artists
        features_df['popularity'] = playlist_tracks_popularity
        features_df['release_date'] = playlist_tracks_first_release_date
        features_df = features_df[['id', 'title', 'all_artists', 'popularity', 'release_date',
                                       'danceability', 'energy', 'key', 'loudness',
                                       'mode', 'acousticness', 'instrumentalness',
                                       'liveness', 'valence', 'tempo',
                                       'duration_ms', 'time_signature']]
        continue
    else:
        try:
            playlist_tracks_id = results[i]['track']['id']
            playlist_tracks_titles = results[i]['track']['name']
            playlist_tracks_first_release_date = results[i]['track']['album']['release_date']
            playlist_tracks_popularity = results[i]['track']['popularity']
            artist_list = []
            for artist in results[i]['track']['artists']:
                artist_list= artist['name']
            playlist_tracks_artists = artist_list
            features = sp.audio_features(playlist_tracks_id)
            new_row = {'id':[playlist_tracks_id],
            'title':[playlist_tracks_titles],
            'all_artists':[playlist_tracks_artists],
            'popularity':[playlist_tracks_popularity],
            'release_date':[playlist_tracks_first_release_date],
            'danceability':[features[0]['danceability']],
            'energy':[features[0]['energy']],
            'key':[features[0]['key']],
            'loudness':[features[0]['loudness']],
            'mode':[features[0]['mode']],
            'acousticness':[features[0]['acousticness']],
            'instrumentalness':[features[0]['instrumentalness']],
            'liveness':[features[0]['liveness']],
            'valence':[features[0]['valence']],
            'tempo':[features[0]['tempo']],
            'duration_ms':[features[0]['duration_ms']],
            'time_signature':[features[0]['time_signature']]
            }

            dfs = [features_df, pd.DataFrame(new_row)]
            features_df = pd.concat(dfs, ignore_index = True)
        except:
            continue

In [7]:
features_df.to_csv('library_3366rows.csv',index=False)