In [1]:
# Imports
import sys
import pandas as pd
import pathlib
import spotipy
import spotipy as spotipy
from spotipy.oauth2 import SpotifyOAuth, SpotifyClientCredentials
import pprint
import json

import api_setup

CWD = pathlib.Path.cwd()
REPO_ROOT = CWD.parent.parent.parent
DATA_DIR = REPO_ROOT / "data"
APP = 'spotify'
NAME = 'jojo'
SUBDIRS = 'my_spotify_data/MyData/'
USER_DATA_DIR = DATA_DIR / APP / NAME / SUBDIRS


In [2]:
# Understanding StreamingHistory
streaming_history_0 = pd.read_json(USER_DATA_DIR / "StreamingHistory0.json")
streaming_history_1 = pd.read_json(USER_DATA_DIR / "StreamingHistory1.json")
streaming_history = pd.concat([streaming_history_0, streaming_history_1])
streaming_history

Unnamed: 0,endTime,artistName,trackName,msPlayed
0,2021-10-13 21:16,U.S. Girls,L-Over,90543
1,2021-10-14 00:55,Indigo De Souza,What Are We Gonna Do Now,1405
2,2021-10-14 00:55,Yelle,Ba$$in,3828
3,2021-10-14 00:55,Amy Winehouse,Addicted,1317
4,2021-10-14 00:55,Baby Keem,APOLOGIZE,1301
...,...,...,...,...
9436,2022-10-14 17:39,Kanye West,Love Lockdown,2360
9437,2022-10-14 17:39,Car Seat Headrest,Nervous Young Inhumans,160
9438,2022-10-14 17:43,S3RL,Mtc,260530
9439,2022-10-14 17:48,Kanye West,Love Lockdown,270306


In [56]:
# Understanding the spotipy API and making sure it's installed correctly
env_vars = api_setup.parse_api_kvs(REPO_ROOT / "api-keys")

auth_manager = SpotifyClientCredentials(env_vars['client_id'], env_vars['client_secret'])
spotify = spotipy.Spotify(client_credentials_manager=auth_manager, backoff_factor=2)

In [10]:
birdy_uri = 'spotify:artist:2WX2uTcsvV5OnS0inACecP'
results = spotify.artist_albums(birdy_uri, album_type='album')
albums = results['items']
while results['next']:
    results = spotify.next(results)
    albums.extend(results['items'])

for album in albums:
    print(album['name'])

Young Heart
Beautiful Lies
Beautiful Lies
Beautiful Lies (Deluxe)
Beautiful Lies (Deluxe)
Fire Within
Fire Within
Fire Within (Deluxe)
Fire Within (Deluxe)
Fire Within (Deluxe)
Live in London
Birdy
Birdy
Birdy
Birdy
Birdy (Deluxe Version)


``` python
# Mapping artist names to URIs using search?

artist_name_to_uri = {}
pp = pprint.PrettyPrinter(indent=1)
for artist in streaming_history.artistName.unique()[10]:
    try:
        search_result = json.loads(json.dumps(spotify.search(artist, limit=1, type='artist')))
        pp.pprint(search_result)
        artist_uri = search_result['artists']['items'][0]['uri']
        artist_name_to_uri[artist] = artist_uri
    except Exception:
        print(f"Unable to find artist {artist}")
        continue
print(artist_name_to_uri)
```

In [42]:
# Doing the same for all of us and turning that into a json file
pp = pprint.PrettyPrinter()
list_all_members_streaming_history = []
all_members_artists_uris = {}
artist_name_to_uri = {}
for group_member in ['jojo', 'nick', 'richard']:
    data_path = USER_DATA_DIR = DATA_DIR / APP / group_member / SUBDIRS
    data_files = data_path.glob("StreamingHistory*.json")
    for file in data_files:
        with open(file, encoding='utf-8') as thisfile:
            data = pd.read_json(thisfile)
            list_all_members_streaming_history.append(data)

all_members_streaming_history = pd.concat(list_all_members_streaming_history, ignore_index=True)
all_artists = all_members_streaming_history.artistName.unique()
pp.pprint(all_artists)
print(len(all_artists))

array(['U.S. Girls', 'Indigo De Souza', 'Yelle', ..., 'Lunchmoney',
       'Ryan Hemsworth', 'SahBabii'], dtype=object)
6062


In [12]:
for idx, artist in enumerate(all_artists):
    try:
        search_query = f"artist:{artist}"
        search_result = json.loads(json.dumps(spotify.search(q=search_query, limit=1, type='artist')))
        artist_uri = search_result['artists']['items'][0]['uri']
        print(f"{artist} URI: {artist_uri}")
        artist_name_to_uri[artist] = artist_uri
        if idx % 100 == 0:
            print(f"Artists obtained: {idx} / {len(all_artists)}")
    except Exception as e:
        print(e)
        print(f"Unable to find artist {artist}")
        continue

with open("artist_uris.json", "w") as f:
    f.write(json.dumps(artist_name_to_uri))

U.S. Girls URI: spotify:artist:3AHFDfqhSqPBecjQDIOIJA
Artists obtained: 0 / 6062
Indigo De Souza URI: spotify:artist:3ir2pF2mkiEWqyPenKTh5e
Yelle URI: spotify:artist:0WbqAlM1WvfUD6dF7omThd
Amy Winehouse URI: spotify:artist:6Q192DXotxtaysaqNPy5yR
Baby Keem URI: spotify:artist:5SXuuuRpukkTvsLuUknva1
Rihanna URI: spotify:artist:5pKCCKE2ajJHZ9KAiaK11H
Cafuné URI: spotify:artist:581C5Qwl87TskfBEzuoisu
SOPHIE URI: spotify:artist:4Qz5J3GBpmR1LcPgqFbzlW
Emily Montes URI: spotify:artist:7mEGUdpQ3xFdIS31e5Zl8V
Rina Sawayama URI: spotify:artist:2KEqzdPS7M5YwGmiuPTdr5
Faye Webster URI: spotify:artist:5szilpXHcwOqnyKLqGco5j
Cities Aviv URI: spotify:artist:4m0HRALRlPxbZp5SwvktFX
Kero Kero Bonito URI: spotify:artist:6OqhFYFJDnBBHas02HopPT
Two Door Cinema Club URI: spotify:artist:536BYVgOnRky0xjsPT96zl
raphaelö URI: spotify:artist:4brNdzBdAzESb2LRhlz0WP
JAY-Z URI: spotify:artist:3nFkdlSjzX9mRTtwJOzDYB
BENEE URI: spotify:artist:0Cp8WN4V8Tu4QJQwCN5Md4
Car Seat Headrest URI: spotify:artist:5PbpKlxQE0Ktl5

# Getting URIs of all listened to songs!

## First: Collect all tracks we've listened to

In [11]:
unique_songs = all_members_streaming_history.groupby(["trackName", "artistName"])
artist_track_pairs = dict(list(unique_songs)).keys()
print(len(artist_track_pairs))

17619


## Second: Collecting URIs of those tracks that are in our libraries
- It would be super convenient if we avoid having to use the API to query for every song's URI.
- We'll look through all of our libraries (which have the saved songs' URIs, and get what we can from there first.

In [36]:
list_all_members_libraries = []
for group_member in ['jojo', 'nick', 'richard']:
    data_path = USER_DATA_DIR = DATA_DIR / APP / group_member / SUBDIRS
    data_files = data_path.glob("YourLibrary*.json")
    for file in data_files:
        with open(file, "r", encoding='utf-8') as thisfile:
            tracks_data = json.load(thisfile)['tracks']
            for elem in tracks_data:
                list_all_members_libraries.append(elem)

all_members_libraries = pd.DataFrame(list_all_members_libraries)
all_members_libraries

Unnamed: 0,artist,album,track,uri
0,LOONA/yyxy,beauty&thebeat,love4eva (feat. Grimes),spotify:track:4rKEmhNA19JezqVsSQS4yo
1,Danny Brown,Old,Side B [Dope Song],spotify:track:26dfLcCVJUdscPvmUmkuSI
2,Latto,Queen of Da Souf,Muwop (feat. Gucci Mane),spotify:track:231WYcXWUxYSx79tuPtzBk
3,MIMIDEATH,FOAR EVERYWUN FRUM MIMI,abusive,spotify:track:11V7vRMorD73js8sfgBOS7
4,Rico Nasty,Nasty,Pressing Me,spotify:track:3wtVRcrYtWJVs0rBTabJJ8
...,...,...,...,...
1537,SZA,Love Galore,Love Galore,spotify:track:1E2ePlJmijVSX0o1G9TFgR
1538,Swae Lee,Shang-Chi and The Legend of The Ten Rings: The...,In The Dark (with Jhené Aiko),spotify:track:0zaoWwS8RpE3LSDdmkg8TC
1539,Fujii Kaze,HELP EVER HURT NEVER,Shinunoga E-Wa,spotify:track:0o9zmvc5f3EFApU52PPIyW
1540,Cardi B,Bartier Cardi (feat. 21 Savage),Bartier Cardi (feat. 21 Savage),spotify:track:75FDPwaULRdYDn4StFN2rT


Okay! This saves us a little bit of work.
Let's save these to a file so we don't lose them.
We'll drop the `album` and work on the assumption that searching for a song with its track name and its artist will get the right info.

In [37]:
all_members_libraries.drop("album", axis=1, inplace=True)
all_members_libraries.to_json(path_or_buf="all_members_libraries.json", orient='records')

# Third: Using the API to query URIs for everything that wasn't in our libraries

In [None]:
artist_track_to_uri = {}
with open("all_members_libraries.json", 'r') as j:
    all_library_records = json.loads(j.read())
    for record in all_library_records:
        artist_track_to_uri[(record["artist"], record["track"])] = record["uri"]
pp.pprint(artist_track_to_uri)

In [63]:
unique_artist_track = set()
query_queue = []
for idx, record in all_members_streaming_history.iterrows():
    unique_artist_track.add((record.artistName, record.trackName),)
print(len(unique_artist_track))

17619


In [64]:
for artist, track in unique_artist_track:
    if (artist, track) in artist_track_to_uri.keys():
        continue
    else:
        query_queue.append((artist, track),)
query_queue

[('Gryffin', 'All You Need To Know (feat. Calle Lehmann)'),
 ('Otoboke Beaver', 'Where did you buy such a nice watch you are wearing now'),
 ('Minthaze', 'Cherry Weed'),
 ('J E N', 'recalling the memories'),
 ('Ludwig Goransson', 'FAST CARS - BONUS TRACK'),
 ('TOIL', "Black Heart (feat. Leellamarz, BE'O)"),
 ('Excision', 'Back To Back'),
 ('Psy.P', 'SOSA'),
 ('Tez Cadey', 'Seve - Radio Edit'),
 ('Car Seat Headrest', 'Substitute'),
 ('Elijah Who', 'my new love'),
 ('Pink Sweat$', 'Coke & Henny Pt. 2'),
 ('D.O', 'Falling Down'),
 ('Miles Davis Quintet', 'Salt Peanuts'),
 ('keshi', 'B.Y.S.'),
 ('ConcernedApe', 'The Library and Museum'),
 ('Game Monsters', 'Trophy Presentations (From "Mario Kart 64") - Original'),
 ('Jay Park', 'Ask Bout Me'),
 ('Elephante', 'Troubled (Fairlane Remix)'),
 ('Billy Joel', 'The Longest Time'),
 ('Tierra Whack', 'Pretty Ugly'),
 ('MEDUZA', 'Tell It To My Heart (feat. Hozier) - Chemical Surf Remix'),
 ('RapGem', 'I Dont Fucking Care'),
 ('Shouse', 'Love Tonight

Knowing the spotify API limit seems to be 10,000 requests per day, we're going to have to break this into two batches and run them on different days.

In [65]:
queue_midpoint_index = int(len(query_queue) / 2)
query_queue_day_1 = query_queue[0:queue_midpoint_index]
query_queue_day_2 = query_queue[queue_midpoint_index:]

In [74]:
# Understanding structure of response before sending batch
search_result = json.loads(json.dumps(spotify.search(q=f"artist:{query_queue_day_1[0][0]}, track:{query_queue_day_1[0][1]}", limit=1, type='track')))
pp.pprint([artist['name'] for artist in search_result['tracks']['items'][0]['artists']])

['Gryffin', 'SLANDER', 'Calle Lehmann']


In [78]:
# Running day 1 of queries
artist_track_to_uri_day_1 = {}
for query in query_queue_day_1:
    try:
        # formatting searches like this is the best luck I've had.
        search_result = json.loads(json.dumps(spotify.search(q=f"{query[1]} {query[0]}", limit=1, type='track')))
        artist_track_to_uri_day_1[query] = search_result['tracks']['items'][0]['uri']
    except Exception as e:
        print(f"Unable to find track: {query[1]} with artist {query[0]}")
        print("Search result:")
        pp.pprint(search_result)
artist_track_to_uri_day_1

Unable to find track: All You Need To Know (feat. Calle Lehmann) with artist Gryffin
Search result:
{'tracks': {'href': 'https://api.spotify.com/v1/search?query=Vancouver+BIG+Naughty&type=track&offset=0&limit=1',
            'items': [{'album': {'album_type': 'single',
                                 'artists': [{'external_urls': {'spotify': 'https://open.spotify.com/artist/7cEaNXXTHx3LokbjUUyHal'},
                                              'href': 'https://api.spotify.com/v1/artists/7cEaNXXTHx3LokbjUUyHal',
                                              'id': '7cEaNXXTHx3LokbjUUyHal',
                                              'name': 'BIG Naughty',
                                              'type': 'artist',
                                              'uri': 'spotify:artist:7cEaNXXTHx3LokbjUUyHal'}],
                                 'available_markets': ['AD',
                                                       'AE',
                                                    

{('Otoboke Beaver',
  'Where did you buy such a nice watch you are wearing now'): 'spotify:track:375h23169u0dcEho5hEkML',
 ('Minthaze', 'Cherry Weed'): 'spotify:track:6FRqV0YP0tyazXOIxexjW5',
 ('J E N', 'recalling the memories'): 'spotify:track:4wYgevsnMVdxtHTSjboCJ7',
 ('Ludwig Goransson',
  'FAST CARS - BONUS TRACK'): 'spotify:track:2JJWhWLXqXPm39suPT652C',
 ('TOIL',
  "Black Heart (feat. Leellamarz, BE'O)"): 'spotify:track:2odm919sRfQIhH2na8YSKK',
 ('Excision', 'Back To Back'): 'spotify:track:0hvKdjFlKowmHaAr6RJWBh',
 ('Psy.P', 'SOSA'): 'spotify:track:2hLwJf6sz52RolbFmlyx0r',
 ('Tez Cadey', 'Seve - Radio Edit'): 'spotify:track:6UqRGwjwYL0stXbaodTxwo',
 ('Car Seat Headrest', 'Substitute'): 'spotify:track:0ppLx4Zxf2sbnUVANd35Mc',
 ('Elijah Who', 'my new love'): 'spotify:track:23gcQr3NRKzLXsP9H5jFQ1',
 ('Pink Sweat$', 'Coke & Henny Pt. 2'): 'spotify:track:6tlan2FHMyIKBRR62zxusZ',
 ('D.O', 'Falling Down'): 'spotify:track:4Xq8fPbkEcmBT5jMBn7iu7',
 ('Miles Davis Quintet',
  'Salt Peanuts'

In [81]:
# ok i want to change that
records = [{"artist": k[0],
            "track": k[1],
            "uri": artist_track_to_uri_day_1[k]}
           for k in artist_track_to_uri_day_1]
with open("artist_track_to_uri_day_1.json", "w") as f:
    json.dump(records, f)

In [84]:
# Day 2
artist_track_to_uri_day_2 = {}
for query in query_queue_day_2:
    try:
        # fyormatting searches like this is the best luck I've had.
        search_result = json.loads(json.dumps(spotify.search(q=f"{query[1]} {query[0]}", limit=1, type='track')))
        artist_track_to_uri_day_2[query] = search_result['tracks']['items'][0]['uri']
    except Exception as e:
        print(f"Unable to find track: {query[1]} with artist {query[0]}")
        print("Search result:")
        pp.pprint(search_result)
artist_track_to_uri_day_2

Unable to find track: Signals with artist Tritonal
Search result:
{'tracks': {'href': 'https://api.spotify.com/v1/search?query=D%C6%B0%E1%BB%9Bi+C%C6%A1n+M%C6%B0a+Tempo+G&type=track&offset=0&limit=1',
            'items': [{'album': {'album_type': 'single',
                                 'artists': [{'external_urls': {'spotify': 'https://open.spotify.com/artist/2LVQd5JQnKruonFNZeNsHS'},
                                              'href': 'https://api.spotify.com/v1/artists/2LVQd5JQnKruonFNZeNsHS',
                                              'id': '2LVQd5JQnKruonFNZeNsHS',
                                              'name': 'Tempo G',
                                              'type': 'artist',
                                              'uri': 'spotify:artist:2LVQd5JQnKruonFNZeNsHS'}],
                                 'available_markets': ['AD',
                                                       'AE',
                                                       'AG',
       

{('M. Hisataakaa',
  'Path of the wind from My Neighbour Totoro'): 'spotify:track:5B9qDDHXZMjCGQO4dBtRw4',
 ('Phuong Ly', 'ThichThich'): 'spotify:track:3wUp8eCTshIrJcYbjWaoyP',
 ('Toby Fox', 'Oh! Dungeon'): 'spotify:track:68QEE1JPRjEBbdhzpEIKGZ',
 ('Jim Guthrie', 'Campfires'): 'spotify:track:4Lder8TWdPDtgWKRiWhMjc',
 ('Dimitri Vangelis & Wyman',
  'Phantom'): 'spotify:track:4Wiub4bQiJTlS224CC19f8',
 ('Kupla', 'Hold My Hand'): 'spotify:track:7ISl3DqvOVetqjreRWqNG8',
 ('TheRealVanesse', 'DLF'): 'spotify:track:4c4vg6HiZzWfgtRfsY3N5G',
 ('Lani Rose', 'Colors We Made'): 'spotify:track:1EWK3hODDw0DaiS5ffsPO7',
 ('Jooyoung', 'Two of us'): 'spotify:track:5nTxfOeVo6NoAb4qs3UpKC',
 ('TAEYANG', 'Wedding Dress'): 'spotify:track:2NO4CA2TFvhGeg7XMz8PmT',
 ('Lyn', 'Beneath the Mask'): 'spotify:track:5XLXrm5JVMdOus1fWmTOFw',
 ('North West Trio', 'A Quiet Place'): 'spotify:track:2sOncurUkrw3CYWliyxYSU',
 ('Martin Garrix', 'Wizard'): 'spotify:track:4hfYGVqQ0ElgeibMocRUvi',
 ('Felix Cartal',
  "Love Me -

Unable to find track: Blinding Lights (feat. The Weeknd) with artist 静
Search result:
{'tracks': {'href': 'https://api.spotify.com/v1/search?query=Blinding+Lights+%28feat.+The+Weeknd%29+%E9%9D%99&type=track&offset=0&limit=1',
            'items': [],
            'limit': 1,
            'next': None,
            'offset': 0,
            'previous': None,
            'total': 0}}
Unable to find track: Jade (feat. Blood Orange) with artist Lolo Zouaï
Search result:
{'tracks': {'href': 'https://api.spotify.com/v1/search?query=Jade+%28feat.+Blood+Orange%29+Lolo+Zoua%C3%AF&type=track&offset=0&limit=1',
            'items': [],
            'limit': 1,
            'next': None,
            'offset': 0,
            'previous': None,
            'total': 0}}
Unable to find track: The South Is Coming with artist Felsmere
Search result:
{'tracks': {'href': 'https://api.spotify.com/v1/search?query=The+South+Is+Coming+Felsmere&type=track&offset=0&limit=1',
            'items': [],
            'limit

{('M. Hisataakaa',
  'Path of the wind from My Neighbour Totoro'): 'spotify:track:5B9qDDHXZMjCGQO4dBtRw4',
 ('Phuong Ly', 'ThichThich'): 'spotify:track:3wUp8eCTshIrJcYbjWaoyP',
 ('Toby Fox', 'Oh! Dungeon'): 'spotify:track:68QEE1JPRjEBbdhzpEIKGZ',
 ('Jim Guthrie', 'Campfires'): 'spotify:track:4Lder8TWdPDtgWKRiWhMjc',
 ('Dimitri Vangelis & Wyman',
  'Phantom'): 'spotify:track:4Wiub4bQiJTlS224CC19f8',
 ('Kupla', 'Hold My Hand'): 'spotify:track:7ISl3DqvOVetqjreRWqNG8',
 ('TheRealVanesse', 'DLF'): 'spotify:track:4c4vg6HiZzWfgtRfsY3N5G',
 ('Lani Rose', 'Colors We Made'): 'spotify:track:1EWK3hODDw0DaiS5ffsPO7',
 ('Jooyoung', 'Two of us'): 'spotify:track:5nTxfOeVo6NoAb4qs3UpKC',
 ('TAEYANG', 'Wedding Dress'): 'spotify:track:2NO4CA2TFvhGeg7XMz8PmT',
 ('Lyn', 'Beneath the Mask'): 'spotify:track:5XLXrm5JVMdOus1fWmTOFw',
 ('North West Trio', 'A Quiet Place'): 'spotify:track:2sOncurUkrw3CYWliyxYSU',
 ('Martin Garrix', 'Wizard'): 'spotify:track:4hfYGVqQ0ElgeibMocRUvi',
 ('Felix Cartal',
  "Love Me -

In [86]:
records = [{"artist": k[0],
            "track": k[1],
            "uri": artist_track_to_uri_day_2[k]}
           for k in artist_track_to_uri_day_2]
with open("artist_track_to_uri_day_2.json", "w") as f:
    json.dump(records, f)

In [88]:
# ok now put them into one file
with open("artist_track_to_uri_day_1.json", "r") as f:
    with open("artist_track_to_uri_day_2.json", "r") as f2:
        day1 = json.load(f)
        day2 = json.load(f2)
        day1 += day2
        with open("artist_track_to_uri.json", "w") as f3:
            json.dump(day1, f3, indent=1)