In [148]:
import os
import sys
import json

import spotipy
import spotipy.util as util

import numpy as np
import pandas as pd

from datetime import datetime

In [17]:
apikeys = json.load(open("../data/api-keys.json"))

In [12]:
os.environ["SPOTIPY_CLIENT_ID"]     = apikeys["spotipy-client-id"]
os.environ["SPOTIPY_CLIENT_SECRET"] = apikeys["spotipy-client-secret"]
os.environ["SPOTIPY_REDIRECT_URI"]  = apikeys["redirect-url"]

In [101]:
token = util.prompt_for_user_token(apikeys["spotify-user-id"], scope = 'user-library-read')
sp    = spotipy.Spotify(auth = token)

In [118]:
def pull_saved_tracks(limit = 50, offset = 0):
    saved_tracks_obj = sp.current_user_saved_tracks(limit = limit, offset = offset)
    saved_tracks = [{
        'track': track_obj['track']['name'],
        'track_uri': track_obj['track']['uri'],
        'artist': artist['name'],
        'artist_uri': artist['uri'],
    } for track_obj in saved_tracks_obj['items'] for artist in track_obj['track']['artists']]
    if saved_tracks_obj['next']:
        return saved_tracks + pull_saved_tracks(limit = limit, offset = offset + limit)
    else:
        return saved_tracks

In [119]:
saved_tracks = pull_saved_tracks()

In [124]:
saved_tracks_df = pd.DataFrame(saved_tracks)
saved_tracks_df.head(10)

Unnamed: 0,track,track_uri,artist,artist_uri
0,Snow White,spotify:track:0H6hMpN8zxZT3ToOelM5Cl,Sjowgren,spotify:artist:32Ko3nL0210QAt14S3Rs4Y
1,Not Strong Enough,spotify:track:72GNW1xKoi5BAhMYAW6e7e,boygenius,spotify:artist:1hLiboQ98IQWhpKeP9vRFw
2,Not Strong Enough,spotify:track:72GNW1xKoi5BAhMYAW6e7e,Julien Baker,spotify:artist:12zbUHbPHL5DGuJtiUfsip
3,Not Strong Enough,spotify:track:72GNW1xKoi5BAhMYAW6e7e,Phoebe Bridgers,spotify:artist:1r1uxoy19fzMxunt3ONAkG
4,Not Strong Enough,spotify:track:72GNW1xKoi5BAhMYAW6e7e,Lucy Dacus,spotify:artist:07D1Bjaof0NFlU32KXiqUP
5,In The Morning - Solstice Version,spotify:track:46ac5ka5z3kxiTtBcJ2gsV,Fleet Foxes,spotify:artist:4EVpmkEwrLYEg6jIsiPMIb
6,Apocalypse,spotify:track:3AVrVz5rK8Hrqo9YGiVGN5,Cigarettes After Sex,spotify:artist:1QAJqy2dA3ihHBFIHRphZj
7,Wildfire,spotify:track:2GQomOm5bdbNnUooS3HEuF,Cautious Clay,spotify:artist:6iWuBN32BqCJAeXW6o3nil
8,Growing Sideways,spotify:track:1JcIXOir94YUYBt2cXTzn2,Noah Kahan,spotify:artist:2RQXRUsr4IW1f3mKyKsy4B
9,Borderline,spotify:track:54SmecWdCNXBjAXBuVrSV6,Joesef,spotify:artist:28EyduqESEOVMO6vglvaUZ


In [126]:
artist_uris = saved_tracks_df['artist_uri'].unique()
len(artist_uris)

977

In [127]:
def pull_artist_genres(artist_uris, limit = 50):
    artists_obj = sp.artists(artist_uris[:limit])
    artists = [{'artist': a['name'], 
                'uri': a['uri'], 
                'genres': a['genres']} for a in artists_obj['artists']]
    if len(artist_uris) > limit:
        return artists + pull_artist_genres(artist_uris[limit:], limit = limit)
    else:
        return artists

In [129]:
artist_genres = pull_artist_genres(artist_uris)

In [144]:
artist_genres_df = (pd.DataFrame([{'artist': a['artist'], 
                                   'artist_uri': a['uri'], 
                                   'genre': g} 
                                  for a in artist_genres 
                                  for g in (a['genres'] if len(a["genres"]) > 0 else [np.nan])])
                    .merge((saved_tracks_df.drop("track", axis = 1)
                            .groupby(["artist", "artist_uri"])
                            .nunique()
                            .reset_index()),
                           on = ["artist", "artist_uri"], how = "inner"))
artist_genres_df.head()

Unnamed: 0,artist,artist_uri,genre,track_uri
0,Sjowgren,spotify:artist:32Ko3nL0210QAt14S3Rs4Y,indie folk,5
1,Sjowgren,spotify:artist:32Ko3nL0210QAt14S3Rs4Y,indie pop,5
2,Sjowgren,spotify:artist:32Ko3nL0210QAt14S3Rs4Y,indie poptimism,5
3,Sjowgren,spotify:artist:32Ko3nL0210QAt14S3Rs4Y,indie rock,5
4,Sjowgren,spotify:artist:32Ko3nL0210QAt14S3Rs4Y,modern rock,5


In [147]:
counts_by_genre_df = (artist_genres_df
                      .groupby("genre")
                      .agg(n_tracks = ("track_uri", "sum"), n_artists = ("artist_uri", "nunique"))
                      .reset_index())
counts_by_genre_df

Unnamed: 0,genre,n_tracks,n_artists
0,5th wave emo,1,1
1,abstract hip hop,5,3
2,acoustic pop,165,24
3,adult standards,17,7
4,afrobeat,4,1
...,...,...,...
566,western americana,1,1
567,western mass indie,1,1
568,wisconsin indie,8,2
569,wonky,2,2


In [152]:
counts_by_genre_df.to_csv(f"../data/bt-spotify-genres-{datetime.today().strftime('%Y%m%d')}.csv", index = False)