In [1]:
import json

import pandas as pd
from rich import print 
from rich.progress import track as track_progress

import spotipy
from spotipy.oauth2 import SpotifyClientCredentials
from dotenv import dotenv_values

In [2]:
config = dotenv_values("../.env")

client_id = config.get("SPOTIFY_CLIENT_ID", " ")
client_secret = config.get("SPOTIFY_CLIENT_SECRET", " ")

In [33]:
sp = spotipy.Spotify(
    client_credentials_manager=SpotifyClientCredentials(
        client_id=client_id, client_secret=client_secret
    )
)

In [4]:
def get_track_info(query: str, is_uri: bool = False):
    if not is_uri:
        search_result = sp.search(query, limit=1, type="track")
        if search_result["tracks"]["items"]:
            track_item = search_result["tracks"]["items"][0]
        else:
            return False
    else:
        track_item = sp.track(query)
    return track_item

In [5]:
data = pd.read_csv("processed/streaming_history_tracks.csv")
data

Unnamed: 0,artist,track,endtime,msplayed,album,saved,uri
0,A Tribe Called Quest,Excursions,2022-01-28 21:59,41865,,,
1,A$AP Ferg,Plain Jane,2022-03-27 02:20,173600,Still Striving,True,spotify:track:4dVpf9jZjcORqGTLUaeYj9
2,A$AP Ferg,Plain Jane,2022-11-29 01:31,173600,Still Striving,True,spotify:track:4dVpf9jZjcORqGTLUaeYj9
3,A.C.O,De Madrugada,2022-01-21 02:55,220757,De Madrugada,True,spotify:track:280QqgbMrLeMs6B80mIPG2
4,A.C.O,De Madrugada,2022-07-18 15:48,220757,De Madrugada,True,spotify:track:280QqgbMrLeMs6B80mIPG2
...,...,...,...,...,...,...,...
10071,sofia shizuko,chichiriviche,2022-07-15 00:49,149000,chichiriviche,True,spotify:track:7MNtRrUXrsQwR10o3lvgDP
10072,sofia shizuko,chichiriviche,2022-07-15 00:55,149000,chichiriviche,True,spotify:track:7MNtRrUXrsQwR10o3lvgDP
10073,sofia shizuko,chichiriviche,2022-07-16 18:59,69525,chichiriviche,True,spotify:track:7MNtRrUXrsQwR10o3lvgDP
10074,sofia shizuko,chichiriviche,2022-08-15 19:58,149000,chichiriviche,True,spotify:track:7MNtRrUXrsQwR10o3lvgDP


In [6]:
mask_saved = data['saved'] == True
mask_uriquery = data['uri'].isnull() == False
mask_searchquery = data['uri'].isnull()

In [7]:
data[mask_saved]

Unnamed: 0,artist,track,endtime,msplayed,album,saved,uri
1,A$AP Ferg,Plain Jane,2022-03-27 02:20,173600,Still Striving,True,spotify:track:4dVpf9jZjcORqGTLUaeYj9
2,A$AP Ferg,Plain Jane,2022-11-29 01:31,173600,Still Striving,True,spotify:track:4dVpf9jZjcORqGTLUaeYj9
3,A.C.O,De Madrugada,2022-01-21 02:55,220757,De Madrugada,True,spotify:track:280QqgbMrLeMs6B80mIPG2
4,A.C.O,De Madrugada,2022-07-18 15:48,220757,De Madrugada,True,spotify:track:280QqgbMrLeMs6B80mIPG2
5,A.C.O,De Madrugada,2022-08-10 19:04,182072,De Madrugada,True,spotify:track:280QqgbMrLeMs6B80mIPG2
...,...,...,...,...,...,...,...
10070,sofia shizuko,chichiriviche,2022-07-15 00:43,149000,chichiriviche,True,spotify:track:7MNtRrUXrsQwR10o3lvgDP
10071,sofia shizuko,chichiriviche,2022-07-15 00:49,149000,chichiriviche,True,spotify:track:7MNtRrUXrsQwR10o3lvgDP
10072,sofia shizuko,chichiriviche,2022-07-15 00:55,149000,chichiriviche,True,spotify:track:7MNtRrUXrsQwR10o3lvgDP
10073,sofia shizuko,chichiriviche,2022-07-16 18:59,69525,chichiriviche,True,spotify:track:7MNtRrUXrsQwR10o3lvgDP


In [8]:
uri_query_df = data[~mask_saved & mask_uriquery][['artist', 'track', 'album', 'uri']].drop_duplicates()
uri_query_df

Unnamed: 0,artist,track,album,uri
10,Ab-Soul,D.R.U.G.S.,Do What Thou Wilt.,spotify:track:09avKKLKXaJcbeaGTmLCOs
25,Adán Cruz,Al Revés,Necesitaba Estar Hecho,spotify:track:50HQhGR7vgorRGHSRLS16M
37,Adán Cruz,Alma Tornasol,Necesitaba Estar Hecho,spotify:track:7636nP8BXuoxjVw1x76Erh
39,Adán Cruz,Be Alright,Necesitaba Estar Hecho,spotify:track:4fyxtLbwPvL61EIRdXYBIK
45,Adán Cruz,Cola,Necesitaba Estar Hecho,spotify:track:03JRYiGqFHGiGNnIljr0S2
...,...,...,...,...
10027,Yoss Bones,Ya No Quiero,Bones,spotify:track:4s2hbYx5uMpHiWTS6frjJW
10049,Zona Ganjah,Fumando Vamos a Casa,Sanazion,spotify:track:4O8jfikpUqtZvWV7PAUwTv
10052,Zoé,Poli / Love - En Vivo,MTV Unplugged Música De Fondo,spotify:track:3YOFkHuCdj7ikHyy4SsKGB
10054,Zoé,Soñé - En Vivo,MTV Unplugged Música De Fondo,spotify:track:2VhJ4nrPorAbySEgO4V0BS


In [9]:
search_query_df = data[~mask_saved & mask_searchquery][['artist', 'track', 'album', 'uri']].drop_duplicates()
search_query_df

Unnamed: 0,artist,track,album,uri
0,A Tribe Called Quest,Excursions,,
38,Adán Cruz,Astronauta de Tus Lunares,,
43,Adán Cruz,Cbb,,
69,Adán Cruz,Mucho Plastic,,
71,Adán Cruz,Pausa,,
...,...,...,...,...
9963,Yoga Fire,Por Gangster,,
10043,Zack Knight,Looking For Love,,
10044,Zimple,Mamacita,,
10053,Zoé,Popular,,


In [19]:
search_query_df[search_query_df['artist'] == 'Arca']

Unnamed: 0,artist,track,album,uri
543,Arca,Prada,,
544,Arca,Tiro,,


In [10]:
search_queries = []
for row in search_query_df.itertuples():
    search_queries.append(f"{row.artist} {row.track}")

In [32]:
uri_queries = []
for row in uri_query_df.itertuples():
    uri_queries.append(row.uri)

In [20]:
print(f"""
search queries - {len(search_queries)}
uri queries - {len(uri_queries)}
""")

In [13]:
skipped_queries = []
tracks_info = []
tracks_audio_features = []


In [14]:
for query in track_progress(
    search_queries,
    total=len(search_queries),
    description="Search queries: ",
):
    track_info = get_track_info(query)
    if track_info:
        tracks_info.append(track_info)
        track_audio_features = sp.audio_features(track_info["uri"])[0]
        if track_audio_features:
            tracks_audio_features.append(track_audio_features)
    else:
        skipped_queries.append(query)

Output()

In [34]:
adhoc_tracks_info = []
adhoc_audio_features = []
adhoc_skipped_queries = []

for query in track_progress(
    uri_queries,
    total=len(uri_queries),
    description="URI queries: ",
):
    track_info = get_track_info(query, is_uri = True)
    if track_info:
        adhoc_tracks_info.append(track_info)
        track_audio_features = sp.audio_features(track_info["uri"])[0]
        if track_audio_features:
            adhoc_audio_features.append(track_audio_features)
    else:
        adhoc_skipped_queries.append(query)

Output()

In [16]:
print(f"""
skipped - {len(skipped_queries)}
track info - {len(tracks_info)}
w/ audio features - {len(tracks_audio_features)}
""")

In [31]:
for query in track_progress(
    ["Arca Tiro", "Arca Prada"],
    total=len(search_queries),
    description="Search queries: ",
):
    track_info = get_track_info(query)
    # if track_info:
    #     tracks_info.append(track_info)
    #     track_audio_features = sp.audio_features(track_info["uri"])[0]
    #     if track_audio_features:
    #         tracks_audio_features.append(track_audio_features)
    # else:
    #     skipped_queries.append(query)

Output()

ConnectionError: ('Connection aborted.', RemoteDisconnected('Remote end closed connection without response'))

In [17]:
with open("interim/tracks_info.json", "w") as file:
    json.dump(tracks_info, file, default=str)
with open("interim/tracks_audio_features.json", "w") as file:
    json.dump(tracks_audio_features, file, default=str)