In [4]:
# Imports
import sys
import pandas as pd
import pathlib
import spotipy
from spotipy.oauth2 import SpotifyOAuth, SpotifyClientCredentials
import pprint
import json

import api_setup

CWD = pathlib.Path.cwd()
REPO_ROOT = CWD.parent.parent.parent
DATA_DIR = REPO_ROOT / "data"
APP = 'spotify'
NAME = 'jojo'
SUBDIRS = 'my_spotify_data/MyData/'
USER_DATA_DIR = DATA_DIR / APP / NAME / SUBDIRS


In [5]:
# Understanding StreamingHistory
streaming_history_0 = pd.read_json(USER_DATA_DIR / "StreamingHistory0.json")
streaming_history_1 = pd.read_json(USER_DATA_DIR / "StreamingHistory1.json")
streaming_history = pd.concat([streaming_history_0, streaming_history_1])
streaming_history

Unnamed: 0,endTime,artistName,trackName,msPlayed
0,2021-10-13 21:16,U.S. Girls,L-Over,90543
1,2021-10-14 00:55,Indigo De Souza,What Are We Gonna Do Now,1405
2,2021-10-14 00:55,Yelle,Ba$$in,3828
3,2021-10-14 00:55,Amy Winehouse,Addicted,1317
4,2021-10-14 00:55,Baby Keem,APOLOGIZE,1301
...,...,...,...,...
9436,2022-10-14 17:39,Kanye West,Love Lockdown,2360
9437,2022-10-14 17:39,Car Seat Headrest,Nervous Young Inhumans,160
9438,2022-10-14 17:43,S3RL,Mtc,260530
9439,2022-10-14 17:48,Kanye West,Love Lockdown,270306


In [6]:
# Understanding the spotipy API and making sure it's installed correctly
env_vars = api_setup.parse_api_kvs(REPO_ROOT / "api-keys")

auth_manager = SpotifyClientCredentials(env_vars['client_id'], env_vars['client_secret'])
spotify = spotipy.Spotify(client_credentials_manager=auth_manager, backoff_factor=2)

<spotipy.client.Spotify object at 0x00000291BD3923A0>


In [7]:
birdy_uri = 'spotify:artist:2WX2uTcsvV5OnS0inACecP'
results = spotify.artist_albums(birdy_uri, album_type='album')
albums = results['items']
while results['next']:
    results = spotify.next(results)
    albums.extend(results['items'])

for album in albums:
    print(album['name'])

Young Heart
Beautiful Lies
Beautiful Lies
Beautiful Lies (Deluxe)
Beautiful Lies (Deluxe)
Fire Within
Fire Within
Fire Within (Deluxe)
Fire Within (Deluxe)
Fire Within (Deluxe)
Live in London
Birdy
Birdy
Birdy
Birdy
Birdy (Deluxe Version)


In [None]:
# Mapping artist names to URIs using search?
artist_name_to_uri = {}
pp = pprint.PrettyPrinter(indent=1)
for artist in streaming_history.artistName.unique()[10]:
    try:
        search_result = json.loads(json.dumps(spotify.search(artist, limit=1, type='artist')))
        pp.pprint(search_result)
        artist_uri = search_result['artists']['items'][0]['uri']
        artist_name_to_uri[artist] = artist_uri
    except Exception:
        print(f"Unable to find artist {artist}")
        continue
print(artist_name_to_uri)

In [None]:
# Doing the same for all of us and turning that into a json file
pp = pprint.PrettyPrinter()
list_all_members_streaming_history = []
all_members_artists_uris = {}
artist_name_to_uri = {}
for group_member in ['jojo', 'nick', 'richard']:
    data_path = USER_DATA_DIR = DATA_DIR / APP / group_member / SUBDIRS
    data_files = data_path.glob("StreamingHistory*.json")
    for file in data_files:
        with open(file, encoding='utf-8') as thisfile:
            data = pd.read_json(thisfile)
            list_all_members_streaming_history.append(data)

all_members_streaming_history = pd.concat(list_all_members_streaming_history, ignore_index=True)
all_artists = all_members_streaming_history.artistName.unique()
pp.pprint(all_artists)
print(len(all_artists))

In [None]:
for idx, artist in enumerate(all_artists):
    try:
        search_query = f"artist:{artist}"
        search_result = json.loads(json.dumps(spotify.search(q=search_query, limit=1, type='artist')))
        artist_uri = search_result['artists']['items'][0]['uri']
        print(f"{artist} URI: {artist_uri}")
        artist_name_to_uri[artist] = artist_uri
        if idx % 100 == 0:
            print(f"Artists obtained: {idx} / {len(all_artists)}")
    except Exception as e:
        print(e)
        print(f"Unable to find artist {artist}")
        continue

with open("artist_uris.json", "w") as f:
    f.write(json.dumps(artist_name_to_uri))