In [1]:
import pathlib
import pandas as pd
import asyncio
from aiohttp import ClientSession
import requests
import base64
import api_setup
import spotipy
REPO_ROOT = pathlib.Path.cwd().parent

In [2]:
# API Auth
env_vars = api_setup.parse_api_kvs(pathlib.Path.cwd().parent / "api-keys")
auth_manager = spotipy.SpotifyClientCredentials(env_vars['client_id'], env_vars['client_secret'])
spotify = spotipy.Spotify(client_credentials_manager=auth_manager, backoff_factor=2)

In [3]:
from typing import List

In [4]:
def get_songs_from_playlist(spotify_client: spotipy.Spotify, playlist_uri: str) -> List[str]:
    """
    Return a list of strings of the URIs of the tracks in this playlist.
    """
    tracks_json = spotify_client.playlist_items(playlist_uri)
    return [track['track']['uri'][14:] for track in tracks_json['items']]

get_songs_from_playlist(spotify, "spotify:playlist:76S2ElS2cyzY624wGBGKpB")

['7yl0ItOwlAnALSctbUiavO',
 '4LpUpiYoZ2M3Z1kmhn4EQo',
 '1OuN92HcVG6NVpWbeESNB3',
 '44SO1hMPfH9xUvmI7bjhou',
 '58e7V70Em6FABOiln4jNoZ',
 '2Xl2dfsBQYaPP5I2viTVr9',
 '7L4G39PVgMfaeHRyi1ML7y',
 '1GpZofCtuWj4adPQLqpeFw',
 '3q6ygCZID0OKj6MUxInB48']

In [5]:
# async api calls
from pprint import PrettyPrinter
pp = PrettyPrinter()

EXPECTED_COLUMN_ORDER = ['track_id','artist_name','track_name','duration_ms','danceability','energy','key','loudness','mode','speechiness','acousticness','instrumentalness','liveness','valence','tempo','time_signature','genres','artist_popularity']

async def get_audio_features(session: ClientSession, track_uri: str) -> dict:
    """
    Return the audio features of the song with the given uri.
    """
    uri = track_uri.split(":")[-1]
    endpoint = f"https://api.spotify.com/v1/audio-features/{uri}"

    async with session.get(endpoint) as response:
        response = await(response.json())
    return response

async def get_artist(session: ClientSession, artist_uri: str) -> dict:
    """
    Given an artist's URI, return their info.
    """
    uri = artist_uri.split(":")[-1]
    endpoint = f"https://api.spotify.com/v1/artists/{uri}"
    
    async with session.get(endpoint) as response:
        response = await(response.json())
    
    return response

async def get_artist_from_track_uri(session: ClientSession, track_uri: str) -> dict:
    """
    Given a track URI, return its artist's name and their popularity.
    """
    uri = track_uri.split(":")[-1]
    endpoint = f"https://api.spotify.com/v1/tracks/{uri}"
    
    async with session.get(endpoint) as response:
        response = await(response.json())
        track_name = response['name']
        artist_uri = response['artists'][0]['uri']
        artist_info = await(get_artist(session, artist_uri))
    artist_name, artist_popularity, artist_genres = artist_info['name'], artist_info['popularity'], artist_info['genres']
    return {'track_uri': uri, 'artist_name': artist_name, 'artist_popularity': artist_popularity, 'artist_genres': artist_genres, 'track_name': track_name}

def get_header_with_token(client_id: str, client_secret: str):
    creds = f"{env_vars['client_id']}:{env_vars['client_secret']}"
    creds_b64 = base64.b64encode(creds.encode())
    headers= {"Authorization": f"Basic {creds_b64.decode()}"}
    data= {"grant_type": "client_credentials"}
    token = requests.post("https://accounts.spotify.com/api/token", headers=headers, data=data)
    token = token.json()['access_token']
    return {"Accept": "application/json", "Content-Type": "application/json", "Authorization": f"Bearer {token}"}

async def featurize_song_list(client_id:str, client_secret: str, song_uris: List[str]) -> List[dict]:
    # TODO: This can be chunked >.>
    # https://developer.spotify.com/documentation/web-api/reference/#/operations/get-several-audio-features
    request_headers = get_header_with_token(client_id, client_secret)
    async with ClientSession(headers=request_headers) as session:
        tasks = [asyncio.ensure_future(get_audio_features(session, uri)) for uri in song_uris]
        features = await(asyncio.gather(*tasks))
    return features

async def get_playlist_song_features(spotify_client: spotipy.Spotify, client_id:str, client_secret: str, playlist_uri: str) -> dict:
    song_uris = get_songs_from_playlist(spotify_client, playlist_uri)
    playlist_song_features = await(featurize_song_list(client_id, client_secret, song_uris))
    return playlist_song_features

async def dataframe_from_playlist(spotify_client: spotipy.Spotify, client_id:str, client_secret: str, playlist_uri: str) -> pd.DataFrame:
    # Get the song features and URIs from a playlist
    playlist_song_features = await(get_playlist_song_features(spotify_client, client_id, client_secret, playlist_uri))
    playlist_song_uris = [features['uri'] for features in playlist_song_features]
    # Get those songs' artists and their popularity
    async with ClientSession(headers=get_header_with_token(client_id, client_secret)) as session:
        tasks = [asyncio.ensure_future(get_artist_from_track_uri(session, uri)) for uri in playlist_song_uris]
        artist_info = await(asyncio.gather(*tasks))
    
    # Create the dataframe we expect >:(
    song_features_df = pd.DataFrame.from_records(playlist_song_features)
    track_uri_artist_popularity_df = pd.DataFrame.from_records(artist_info).set_index('track_uri')
    print(track_uri_artist_popularity_df)
    
    song_features_df = song_features_df.join(track_uri_artist_popularity_df, on='id')
    song_features_df = song_features_df.drop(columns=["type", "uri", "track_href", "analysis_url"])

    song_features_df = song_features_df.rename(mapper={"artist_genres": "genres", "id": "track_id"}, axis=1)

    song_features_df = song_features_df[EXPECTED_COLUMN_ORDER]
    
    return song_features_df
    
    
    

args = (spotify, env_vars['client_id'], env_vars['client_secret'], "spotify:playlist:76S2ElS2cyzY624wGBGKpB")
t = await(dataframe_from_playlist(*args))

ContentTypeError: 0, message='Attempt to decode JSON with unexpected mimetype: ', url=URL('https://api.spotify.com/v1/tracks/1OuN92HcVG6NVpWbeESNB3')

In [None]:
t