In [216]:
import spotipy
from spotipy.oauth2 import SpotifyClientCredentials
import json

In [217]:
# access my json file and assign my private client credentials

credentials = json.load(open('authorization.json')) 
client_id = credentials['client_id']
client_secret = credentials['client_secret']


# access my json file with the playlist uri from Billboard Top 100
playlist_index = 0
playlists = json.load(open('playlists.json'))
playlist_uri = playlists[playlist_index]['uri']


In [218]:
# call my client credientials from above

client_credentials_manager = SpotifyClientCredentials(client_id=client_id,client_secret=client_secret)
sp = spotipy.Spotify(client_credentials_manager=client_credentials_manager)

In [219]:
# grab playlist information contained in the json file

uri = playlist_uri    # split by ':'
username = uri.split(':')[1] # grabs username
playlist_id = uri.split(':')[2] # grabs playlist id

In [220]:
# grab playlist username, id, and the tracks contained

results = sp.user_playlist(username, playlist_id, 'tracks')

In [221]:
# create empty sets for playlist track ids, artists, etc.

playlist_tracks_data = results['tracks']
playlist_tracks_id = []
playlist_tracks_titles = []
playlist_tracks_artists = []
playlist_tracks_first_artists = []

# grab track, name, and artist contained in playlist

for track in playlist_tracks_data['items']:
    playlist_tracks_id.append(track['track']['id'])
    playlist_tracks_titles.append(track['track']['name'])
    # adds a list of all artists involved in the song to the list of artists for the playlist
    artist_list = []
    for artist in track['track']['artists']:
        artist_list.append(artist['name'])
    playlist_tracks_artists.append(artist_list)
    playlist_tracks_first_artists.append(artist_list[0])

In [222]:
# grab audio features for playlist

features = sp.audio_features(playlist_tracks_id)

In [223]:
import numpy as np
import pandas as pd

In [224]:
# create a dataframe of the audio features

features_df = pd.DataFrame(data=features, columns=features[0].keys())

In [225]:
# merge the audio features to their respective playlist titles

features_df['title'] = playlist_tracks_titles
features_df['first_artist'] = playlist_tracks_first_artists
features_df['all_artists'] = playlist_tracks_artists

features_df = features_df[['id', 'title', 'first_artist', 'all_artists',
                           'danceability', 'energy', 'key', 'loudness',
                           'mode', 'acousticness', 'instrumentalness',
                           'liveness', 'valence', 'tempo',
                           'duration_ms', 'time_signature']]
features_df.head()

Unnamed: 0,id,title,first_artist,all_artists,danceability,energy,key,loudness,mode,acousticness,instrumentalness,liveness,valence,tempo,duration_ms,time_signature
0,0tgBtQ0ISnMQOKorrN9HLX,Last Train Home,John Mayer,[John Mayer],0.591,0.831,4,-6.612,1,0.235,0.00142,0.0571,0.678,93.007,187307,4
1,3MthJpM1IEYp2ulZe00LvP,Shouldn't Matter but It Does,John Mayer,[John Mayer],0.549,0.324,4,-11.769,1,0.457,2e-06,0.133,0.268,80.888,236733,4
2,4T6FWA703h6H7zk1FoSARw,New Light,John Mayer,[John Mayer],0.826,0.694,7,-7.862,1,0.316,0.0285,0.074,0.852,123.932,217427,4
3,3hwjxbqGvTRUPi38fLgeM7,Why You No Love Me,John Mayer,[John Mayer],0.635,0.562,9,-7.315,1,0.387,0.000366,0.113,0.501,97.988,255213,4
4,4VFGpluBaU1WcquEMzhSz6,Wild Blue,John Mayer,[John Mayer],0.816,0.596,0,-8.989,1,0.557,0.812,0.0842,0.876,123.027,252413,4


In [226]:
# convert from miliseconds to minutes

features_df['duration (s)'] = features_df['duration_ms']/1000/60 
features_df.head()

Unnamed: 0,id,title,first_artist,all_artists,danceability,energy,key,loudness,mode,acousticness,instrumentalness,liveness,valence,tempo,duration_ms,time_signature,duration (s)
0,0tgBtQ0ISnMQOKorrN9HLX,Last Train Home,John Mayer,[John Mayer],0.591,0.831,4,-6.612,1,0.235,0.00142,0.0571,0.678,93.007,187307,4,3.121783
1,3MthJpM1IEYp2ulZe00LvP,Shouldn't Matter but It Does,John Mayer,[John Mayer],0.549,0.324,4,-11.769,1,0.457,2e-06,0.133,0.268,80.888,236733,4,3.94555
2,4T6FWA703h6H7zk1FoSARw,New Light,John Mayer,[John Mayer],0.826,0.694,7,-7.862,1,0.316,0.0285,0.074,0.852,123.932,217427,4,3.623783
3,3hwjxbqGvTRUPi38fLgeM7,Why You No Love Me,John Mayer,[John Mayer],0.635,0.562,9,-7.315,1,0.387,0.000366,0.113,0.501,97.988,255213,4,4.25355
4,4VFGpluBaU1WcquEMzhSz6,Wild Blue,John Mayer,[John Mayer],0.816,0.596,0,-8.989,1,0.557,0.812,0.0842,0.876,123.027,252413,4,4.206883


In [227]:
final_table1 = features_df[['title', 'first_artist', 'danceability', 'key', 'acousticness', 'valence', 'tempo', 'duration (s)', 'time_signature']]

In [228]:
final_table1.head()

Unnamed: 0,title,first_artist,danceability,key,acousticness,valence,tempo,duration (s),time_signature
0,Last Train Home,John Mayer,0.591,4,0.235,0.678,93.007,3.121783,4
1,Shouldn't Matter but It Does,John Mayer,0.549,4,0.457,0.268,80.888,3.94555,4
2,New Light,John Mayer,0.826,7,0.316,0.852,123.932,3.623783,4
3,Why You No Love Me,John Mayer,0.635,9,0.387,0.501,97.988,4.25355,4
4,Wild Blue,John Mayer,0.816,0,0.557,0.876,123.027,4.206883,4


In [229]:
# # key column corresponds to musical keys
# # 0 = C, 1 = C#/Db, 2 = D, etc 

final_table1['key'] = final_table1['key'].replace({0: 'C', 1: 'C#/Db', 2: 'D', 3: 'D#/Eb', 4: 'E', 5: 'F', 
6: 'F#/Gb', 7: 'G', 8: 'G#/Ab', 9: 'A', 10: 'A#/Bb', 11: 'B'})
final_table1.head()

Unnamed: 0,title,first_artist,danceability,key,acousticness,valence,tempo,duration (s),time_signature
0,Last Train Home,John Mayer,0.591,E,0.235,0.678,93.007,3.121783,4
1,Shouldn't Matter but It Does,John Mayer,0.549,E,0.457,0.268,80.888,3.94555,4
2,New Light,John Mayer,0.826,G,0.316,0.852,123.932,3.623783,4
3,Why You No Love Me,John Mayer,0.635,A,0.387,0.501,97.988,4.25355,4
4,Wild Blue,John Mayer,0.816,C,0.557,0.876,123.027,4.206883,4


In [230]:
final_table1

Unnamed: 0,title,first_artist,danceability,key,acousticness,valence,tempo,duration (s),time_signature
0,Last Train Home,John Mayer,0.591,E,0.235,0.678,93.007,3.121783,4
1,Shouldn't Matter but It Does,John Mayer,0.549,E,0.457,0.268,80.888,3.945550,4
2,New Light,John Mayer,0.826,G,0.316,0.852,123.932,3.623783,4
3,Why You No Love Me,John Mayer,0.635,A,0.387,0.501,97.988,4.253550,4
4,Wild Blue,John Mayer,0.816,C,0.557,0.876,123.027,4.206883,4
...,...,...,...,...,...,...,...,...,...
95,"Stop This Train - Live at the Nokia Theatre, L...",John Mayer,0.426,D,0.598,0.363,91.882,5.003333,4
96,"Daughters - Live at the Nokia Theatre, Los Ang...",John Mayer,0.546,D,0.766,0.407,121.743,5.076667,3
97,Comfortable - EP Version,John Mayer,0.328,F,0.806,0.114,84.730,4.988883,4
98,Neon - EP Version,John Mayer,0.592,A#/Bb,0.833,0.814,109.082,3.879333,4


In [232]:
# only do once

#final_table1.to_csv('/Users/aubreecurtis/Documents/BYU/STATS/386/Homework/Spotify_Scrape/johnmayerdata.csv') 