Permalink
Switch branches/tags
Nothing to show
Find file Copy path
Fetching contributors…
Cannot retrieve contributors at this time
78 lines (63 sloc) 3.39 KB
# get song-specific metadata from the spotify API for all unique songs and generate csv
# NOTE: this utility uses the Spotify API's Client Credentials flow, which doesn't require a user to auth, which is fine
# https://developer.spotify.com/web-api/authorization-guide/#client-credentials-flow
import re, requests, json, csv
# get list of unique track IDs from combined chart CSV
uniqueIDs = []
chartPath = "./streams/streams.csv"
inputFile = open(chartPath, encoding='utf-8')
inputReader = csv.reader(inputFile)
for row in inputReader:
if inputReader.line_num == 1: # (skip header row)
row.pop(0)
uniqueIDs = row
inputFile.close()
# setup new column headers
newColumnHeads = ['id', 'artist', 'track_name', 'popularity', 'duration_ms', 'speechiness', 'loudness', 'mode', 'time_signature', 'instrumentalness', 'danceability', 'liveness', 'valence', 'tempo', 'key', 'acousticness', 'energy']
# insert your Spotify developer API creds here...
client_id = ''
client_secret = ''
# setup post parameters and endpoint
body_params = {'grant_type' : 'client_credentials'}
auth_endpoint = 'https://accounts.spotify.com/api/token'
# get access token
access_token = ''
def getAccessToken():
global access_token
token_response = requests.post(auth_endpoint, data = body_params, auth = (client_id, client_secret))
token_response.raise_for_status()
print('token response:', token_response)
# extract access token value to feed get request
token_pattern = re.compile(r'"access_token":\s*"([^"]+)"')
access_token = token_pattern.search(token_response.text).group(1)
getAccessToken()
# setup endpoint URLs and auth header
track_url = 'https://api.spotify.com/v1/tracks/'
features_url = 'https://api.spotify.com/v1/audio-features/'
authorization_header = {'Authorization':'Bearer {}'.format(access_token)}
# filename and location for output csv
outputFile = open('./meta/meta.csv', 'w', newline='', encoding = 'utf-8')
outputWriter = csv.writer(outputFile)
outputWriter.writerow(newColumnHeads)
for i in range(len(uniqueIDs)): # for each unique track...
print(i, " processing track, ", i, " of ", len(uniqueIDs))
features_endpoint = features_url + uniqueIDs[i]
track_endpoint = track_url + uniqueIDs[i]
# get track metadata
get_track = requests.get(track_endpoint, headers=authorization_header)
get_track.raise_for_status()
track_json = json.loads(get_track.text)
# get audio metadata
get_features = requests.get(features_endpoint, headers=authorization_header)
get_features.raise_for_status()
feature_json = json.loads(get_features.text)
# drop the desired fields into a list matching the column heads we set up top
trackMeta = [track_json['id']] + [track_json['artists'][0]['name']] + [track_json['name']] + [track_json['popularity']] + [track_json['duration_ms']]
featureMeta = [feature_json['speechiness']] + [feature_json['loudness']] + [feature_json['mode']] + [feature_json['time_signature']] + [feature_json['instrumentalness']] + [feature_json['danceability']] + [feature_json['liveness']] + [feature_json['valence']] + [feature_json['tempo']] + [feature_json['key']] + [feature_json['acousticness']] + [feature_json['energy']]
thisTrackData = trackMeta + featureMeta
# write the new row to the CSV
outputWriter.writerow(thisTrackData)
print(i, " new row written")
inputFile.close()
outputFile.close()
print("yeah it worked! done.")