In [14]:
import os
import dotenv
import requests
import base64
import pandas as pd
from tqdm import tqdm

dotenv.load_dotenv()

CLIENT_ID = os.getenv('CLIENT_ID')
CLIENT_SECRET = os.getenv('CLIENT_SECRET')

artists_df = pd.read_csv('data/top_categories_artists.csv')


def _get_token(client_id, client_secret):
    url = 'https://accounts.spotify.com/api/token'
    auth_string = client_id + ':' + client_secret
    auth_bytes = auth_string.encode('utf-8')
    auth_base64 = base64.b64encode(auth_bytes)

    headers = {
        'Authorization': 'Basic ' + auth_base64.decode('utf-8'),
    }
    data = {
        'grant_type': 'client_credentials',
    }
    response = requests.post(url, headers=headers, data=data)
    return response.json()['access_token']


def get_artist_top_tracks(token, artist_id, market=None):
    url = f'https://api.spotify.com/v1/artists/{artist_id}/top-tracks'
    headers = {
        'Authorization': 'Bearer ' + token,
    }
    params = {
        'market': market,
    }
    response = requests.get(url, headers=headers, params=params, timeout=5)
    if response.status_code != 200:
        print('Error')
        raise ValueError(response)
    return response.json()


def get_track_features(token, track_id):
    url = f'https://api.spotify.com/v1/audio-features/{track_id}'
    headers = {
        'Authorization': 'Bearer ' + token,
    }
    response = requests.get(url, headers=headers)
    return response.json()


def get_artist_info(token, artist_id: list):
    url = f'https://api.spotify.com/v1/artists/'
    headers = {
        'Authorization': 'Bearer ' + token,
    }
    params = {
        'ids': ','.join(artist_id),
    }
    if len(artist_id) > 50:
        raise ValueError('Too many artist ids')
    response = requests.get(url, headers=headers, params=params)
    return response.json()


def fetch_tracks(artist_id):
    tracks = get_artist_top_tracks(TOKEN, artist_id)
    keys_to_keep = ['name', 'artists', 'explicit', 'popularity']
    for track in tracks['tracks']:
        track['artists'] = track['artists'][0]['name']
    tracks = {track['id']: {k: track[k] for k in keys_to_keep} for track in tracks['tracks']}
    return tracks


TOKEN = _get_token(CLIENT_ID, CLIENT_SECRET)

In [None]:
import concurrent.futures

all_tracks = {}
with concurrent.futures.ThreadPoolExecutor() as executor:
    future_to_artist = {executor.submit(fetch_tracks, artist_id): artist_id
                        for artist_id in artists_df['id']}
    for future in tqdm(concurrent.futures.as_completed(future_to_artist),
                       total=len(future_to_artist)):
        artist_id = future_to_artist[future]
        try:
            tracks = future.result()
        except Exception as exc:
            raise exc
        else:
            all_tracks.update(tracks)

  0%|          | 0/36408 [00:00<?, ?it/s]


In [221]:
all_tracks_features = {}
for id, track_info in tracks.items():
    track_features = get_track_features(TOKEN, id)
    track_info.update(track_features)
    drop_keys = ['type', 'id', 'uri', 'track_href', 'analysis_url']
    track_info = {k: v for k, v in track_info.items() if k not in drop_keys}
    all_tracks_features[id] = track_info

In [224]:
pd.DataFrame(all_tracks_features).T

Unnamed: 0,name,artists,explicit,popularity,danceability,energy,key,loudness,mode,speechiness,acousticness,instrumentalness,liveness,valence,tempo,duration_ms,time_signature
4f0aDk7HX37SfzerfznYlr,Lapada Dela - Ao Vivo,Grupo Menos É Mais,False,80,0.691,0.752,4,-6.796,0,0.157,0.708,6e-06,0.912,0.684,167.073,159323,4
3XqhtjiycwDxCpGqUFTtlW,Matadinha De Saudade - Ao Vivo,Grupo Menos É Mais,False,65,0.752,0.846,1,-6.072,1,0.0614,0.435,0.0,0.576,0.926,156.617,176248,4
5r9CZJMoMmxio2AnJdxuDS,Pot-Pourri: Melhor Eu Ir / Ligando Os Fatos / ...,Grupo Menos É Mais,False,71,0.437,0.798,2,-3.928,1,0.0792,0.603,0.0,0.552,0.706,157.828,521088,4
1N19UVD4GRBFT6UrDaX6rk,Bebe e Vem Me Procurar / Quem Ama Sente Saudad...,Turma do Pagode,False,65,0.68,0.712,9,-6.781,1,0.0609,0.634,0.0,0.933,0.784,88.0,216818,4
1jweGFaVZxEVrOHKsLpPY4,Adorei,Grupo Menos É Mais,False,67,0.746,0.668,7,-6.496,1,0.0398,0.642,0.0,0.109,0.798,151.853,179452,4
1J8rfA9BdhlpzNSKKePf3U,Destilado - Ao Vivo,LUDMILLA,False,67,0.677,0.761,2,-8.677,1,0.142,0.789,0.0,0.136,0.698,160.086,176297,4
5Vt0SUtyilv1kwKd3Ob0GH,Ficadinha - Ao Vivo,Grupo Menos É Mais,False,52,0.641,0.721,11,-7.629,0,0.0804,0.232,0.0,0.18,0.896,166.932,154180,4
6O5W6UpfkQnDfWKM32JcCW,Recaída,Grupo Menos É Mais,False,66,0.781,0.751,7,-5.796,1,0.0575,0.36,0.0,0.11,0.788,88.966,181344,4
1TFqH3zKOiTZoVe0OQuJte,Até Que Durou / Tu Mandas No Meu Coração / Ado...,Grupo Menos É Mais,False,65,0.405,0.829,2,-6.057,1,0.0582,0.62,4.6e-05,0.327,0.767,181.222,453019,4
6FiRcbFkKaZ4ELwe3ILbX3,Fica Light - Ao Vivo,Dilsinho,False,64,0.64,0.805,5,-6.344,0,0.0938,0.623,0.0,0.94,0.917,90.914,161628,4
