In [12]:
# Imports
import pathlib
import pandas as pd
from aiohttp import ClientSession
import asyncio
import requests
import base64
import json

import api_setup

CWD = pathlib.Path.cwd()
REPO_ROOT = CWD.parent.parent.parent
DATA_DIR = REPO_ROOT / "data"
APP = 'spotify'
NAME = 'jojo'
SUBDIRS = 'my_spotify_data/MyData/extended_history'
USER_DATA_DIR = DATA_DIR / APP / NAME / SUBDIRS


In [13]:
dfs = []
for file in USER_DATA_DIR.iterdir():
    data = pd.read_json(file)
    dfs.append(data)

extended_history = pd.concat(dfs, ignore_index=True)

In [14]:
extended_history.dropna(axis=0, subset=["spotify_track_uri"], inplace=True)
artist_track_uri = extended_history[["master_metadata_track_name", "master_metadata_album_artist_name", "spotify_track_uri"]]
artist_track_uri = artist_track_uri.drop_duplicates()

In [15]:
with open("artist_uris_extended.json", "w") as f:
	f.write(json.dumps(artist_track_uri.to_json(indent=1)))

In [16]:
uris = artist_track_uri.spotify_track_uri.values
chunk1 = (0, 8000)
chunk2 = (8000, 16000)
chunk3 = (16000, -1)

# API Auth
env_vars = api_setup.parse_api_kvs(REPO_ROOT / "api-keys")

async def get_audio_features(session: ClientSession, uri: str) -> dict:
	# Trim the input string - we don't want the 'spotify:track:` part
	uri = uri[14:]
	endpoint = f"https://api.spotify.com/v1/audio-features/{uri}"

	async with session.get(endpoint) as response:
		response = await(response.json())
		return uri, response

async def main(startindex: int=0, endindex: int=-1):
	creds = f"{env_vars['client_id']}:{env_vars['client_secret']}"
	creds_b64 = base64.b64encode(creds.encode())
	headers= {"Authorization": f"Basic {creds_b64.decode()}"}
	data= {"grant_type": "client_credentials"}
	token = requests.post("https://accounts.spotify.com/api/token", headers=headers, data=data)
	token = token.json()['access_token']
	headers = {"Accept": "application/json", "Content-Type": "application/json", "Authorization": f"Bearer {token}"}
	async with ClientSession(headers=headers) as session:
		batch_uris = uris[startindex:endindex]
		tasks = []
		for uri in batch_uris:
			task = asyncio.ensure_future(get_audio_features(session, uri))
			tasks.append(task)

		features = await(asyncio.gather(*tasks))
	return features

In [17]:
features = await(main(*chunk1))


In [19]:
features = [feature[1] for feature in features]
with open("features_0_eh.json", "w") as f:
	json.dump(features, f)

In [None]:
features = await(main(*chunk2))
features = [feature[1] for feature in features]
with open("features_0_eh.json", "w") as f:
	json.dump(features, f)

In [None]:
features = await(main(*chunk3))
features = [feature[1] for feature in features]
with open("features_0_eh.json", "w") as f:
	json.dump(features, f)