In [1]:
import os, json
import pandas as pd
import numpy as np

import spotipy
import spotipy.util as util

In [2]:
# set API keys
apikeys = json.load(open("../data/api-keys.json"))
os.environ["SPOTIPY_CLIENT_ID"]     = apikeys["spotipy-client-id"]
os.environ["SPOTIPY_CLIENT_SECRET"] = apikeys["spotipy-client-secret"]
os.environ["SPOTIPY_REDIRECT_URI"]  = apikeys["redirect-url"]

# set my user_id
user_id = '129874447'

In [3]:
# connect to spotify
token = util.prompt_for_user_token(user_id, scope = 'user-library-read')
sp = spotipy.Spotify(auth = token)

In [4]:
def get_saved_artists(limit = 50, offset = 0):
    saved_artists = [ ]
    
    # get initial list of tracks to determine length
    saved_tracks_obj = sp.current_user_saved_tracks(limit = limit, offset = offset)
    num_saved_tracks = saved_tracks_obj['total']
    
    # loop through to get all saved tracked
    while (offset < num_saved_tracks):
        saved_tracks_obj = sp.current_user_saved_tracks(limit = limit, offset = offset)
        
        # add track information to running list
        for track_obj in saved_tracks_obj['items']:
            for artist in track_obj['track']['artists']:
                saved_artists.append({
                    'artist': artist['name'],
                    'artist_id': artist['id']
                })
    
        offset += limit
        
    return saved_artists

def get_artist_genres(artist_ids):
    print(artist_ids)

In [5]:
# get all of the artists in my saved library (one row per artist-song)
artists = pd.DataFrame(get_saved_artists())

In [6]:
# count number of instances for each artist and parse out uniq ids
artists_uniq = pd.DataFrame(artists.groupby(['artist', 'artist_id']).size(), columns = ["count"]).reset_index()
artist_ids = artists_uniq['artist_id']

In [7]:
artist_genres = [ ]
    
# iterate through track_ids in groups of 50 and get the list of genres for each artist
for ix in range(0,len(artist_ids), 50):
    artist_genres += [{'id': artist_obj['id'],
                       'name': artist_obj['name'],
                       'genres': '|'.join(artist_obj['genres'])}
                      for artist_obj in sp.artists(artist_ids[ix:ix+50])['artists']]

In [8]:
# merge genres onto artist information
artists_uniq = artists_uniq.merge(pd.DataFrame(artist_genres), \
                                  how = 'left', left_on = 'artist_id', right_on = 'id').drop(['id', 'name'], axis = 1)
artists_uniq

Unnamed: 0,artist,artist_id,count,genres
0,2 Chainz,17lzZA2AlOHwCwFALHttmp,1,atl hip hop|gangster rap|hip hop|pop rap|rap|s...
1,99 Neighbors,5uhqkMm8dyQvX83kl4Znq0,2,vermont indie
2,AHI,08Uextujt6ZT2iQmSYAJfH,2,canadian folk
3,ARMAND MARGJEKA,6FfsOtDdMCFO5Qo0H8rwPa,2,
4,ASL,2XxgHIPXNgHGbvtBBGZrKm,1,
...,...,...,...,...
889,half•alive,7sOR7gk6XUlGnxj3p9F54k,2,indie pop|modern alternative rock|modern rock|...
890,illuminati hotties,3ztRX1UoIOsFqpD7dB6R8O,1,indie punk|indie rock|small room
891,of Montreal,5xeBMeW0YzWIXSVzAxhM8O,1,alternative dance|alternative rock|anti-folk|a...
892,yMusic,4h7DUL1L3RrCzquDp8xQXY,1,modern performance


In [9]:
# save spotify artist genre data as CSV
artists_uniq.to_csv('../data/2020.07.25 - artist genres.csv', index = False)

In [10]:
# parse and re-save every-noise-at-once data
pd.DataFrame(json.load(open("../data/2020.07.25 - every-noise-at-once-scrape.txt"))).to_csv("../data/2020.07.25 - every-noise-at-once-scrape.csv", index = False)