In [1]:
import requests
import pandas as pd
from urllib.request import Request, urlopen
from sodapy import Socrata
import json
import re
import warnings
warnings.filterwarnings("ignore")
import matplotlib.pyplot as plt
import ipywidgets as wd
from ipywidgets import interact
import seaborn as sns
import ptitprince as pt
import base64

In [2]:
with open("secret.json", "r") as f:
    config = json.load(f)

token = config["TOKEN_API_OD"]
user=config["user"]
passwd=config["pw"]
client_id=config["clientId"]
client_secret=config["clientSecret"]

In [3]:
def get_access_token(client_id, client_secret):
    # URL para obtener el token de acceso
    auth_url = 'https://accounts.spotify.com/api/token'
    
    # Creación del encabezado de autorización con client_id y client_secret codificados en base64
    auth_header = {
        'Authorization': f'Basic {base64.b64encode((client_id + ":" + client_secret).encode()).decode()}'
    }
    
    # Datos necesarios para la petición POST
    auth_data = {
        'grant_type': 'client_credentials',
        'client_id': client_id,
        'client_secret': client_secret
    }
    
    # Realizar la petición POST
    auth_response = requests.post(auth_url, data=auth_data, headers=auth_header)
    
    # Extraer la respuesta y obtener el token de acceso
    auth_response_data = auth_response.json()
    access_token = auth_response_data['access_token']
    
    # Devolver el token de acceso
    return access_token


In [4]:
def get_genres(access_token, limit=50, offset=0):
    genres_url = 'https://api.spotify.com/v1/browse/categories'
    
    headers = {
        'Authorization': f'Bearer {access_token}'
    }
    
    params = {
        'limit': limit,
        'offset': offset
    }
    
    # Realiza la petición GET a la API de Spotify para obtener los géneros
    response = requests.get(genres_url, headers=headers, params=params)
    
    # Extrae la respuesta y convierte el JSON en un diccionario de Python
    genres_data = response.json()
    
    # Crea una lista de tuplas, donde cada tupla contiene el ID del género y su nombre
    genres = [(category['id'], category['name']) for category in genres_data['categories']['items']]
    
    return genres

In [5]:
access_token = get_access_token(client_id, client_secret)  # Suponiendo que ya has definido client_id y client_secret
genres_list = get_genres(access_token)

# Crear un DataFrame a partir de la lista de géneros
df_genres = pd.DataFrame(genres_list, columns=['Genre_ID', 'Genre_Name'])

In [6]:
df_genres

Unnamed: 0,Genre_ID,Genre_Name
0,toplists,Top Lists
1,0JQ5DAqbMKFGcCCKMatU5w,Made in Colombia
2,0JQ5DAqbMKFPw634sFwguI,EQUAL
3,0JQ5DAqbMKFOOxftoKZxod,RADAR
4,0JQ5DAqbMKFF9bY76LXmfI,Frequency
5,0JQ5DAqbMKFx0uLQR2okcc,At Home
6,0JQ5DAqbMKFLb2EqgLtpjC,Wellness
7,0JQ5DAqbMKFCbimwdOYlsl,Focus
8,0JQ5DAqbMKFxXaXKP7zcDp,Latin
9,0JQ5DAqbMKFEC4WFtoNRpw,Pop


In [7]:
def extract_artists_to_dataframe(response_data):
    # Lista para almacenar datos de artistas
    artist_list = []

    # Iterar sobre cada artista en la respuesta
    for artist in response_data['artists']:
        # Extraer la información relevante
        artist_info = {
            'Name': artist['name'],
            'Genres': ', '.join(artist['genres']),
            'Popularity': artist['popularity'],
            'Spotify_URL': artist['external_urls']['spotify'],
            'Followers': artist['followers']['total'],
            'Image_URL': artist['images'][0]['url'] if artist['images'] else None, # Toma la primera imagen, si existe
            'Artist_ID': artist['id'],
            'Artist_Type': artist['type'],
            'Artist_URI': artist['uri']
        }
        # Agregar a la lista
        artist_list.append(artist_info)

    # Convertir lista en DataFrame
    df_artists = pd.DataFrame(artist_list)

    return df_artists


In [8]:
def get_artists_from_spotify(token, artist_ids):
    """
    Fetch artist details from Spotify's API.
    
    :param access_token: Token for API authorization.
    :param artist_ids: List of artist IDs.
    :return: Response data as a dictionary.
    """
    
    # Endpoint de la API
    base_url = "https://api.spotify.com/v1/artists"
    
    # Headers de la petición (incluye el token de acceso para autorización)
    headers = {
        "Authorization": f"Bearer {token}"
    }
    
    # Parámetros de la petición (IDs de los artistas)
    params = {
        "ids": ",".join(artist_ids)
    }
    
    # Realizar la petición GET a la API de Spotify
    response = requests.get(base_url, headers=headers, params=params)
    
    # Convertir la respuesta en formato JSON a un diccionario de Python
    response_data = response.json()

    return response_data

# Ejemplo de uso:

artist_ids = ["2CIMQHirSU0MQqyYHq0eOx", "57dN52uHvrHOxijzpIgu3E"]  # Reemplaza con los IDs de los artistas que quieras consultar
token=access_token
# Obtener respuesta de la API
response_data = get_artists_from_spotify(token, artist_ids)

# Procesar la respuesta con la función extract_artists_to_dataframe
df_artist = extract_artists_to_dataframe(response_data)
#print(df)
df_artist

Unnamed: 0,Name,Genres,Popularity,Spotify_URL,Followers,Image_URL,Artist_ID,Artist_Type,Artist_URI
0,deadmau5,"canadian electronic, complextro, edm, electro ...",65,https://open.spotify.com/artist/2CIMQHirSU0MQq...,2816882,https://i.scdn.co/image/ab6761610000e5ebc5ceb0...,2CIMQHirSU0MQqyYHq0eOx,artist,spotify:artist:2CIMQHirSU0MQqyYHq0eOx
1,Ratatat,indietronica,62,https://open.spotify.com/artist/57dN52uHvrHOxi...,594209,https://i.scdn.co/image/2f0c6c465a83cd196e651e...,57dN52uHvrHOxijzpIgu3E,artist,spotify:artist:57dN52uHvrHOxijzpIgu3E


In [9]:
import requests

def get_artists_by_genre(query, access_token, limit=50):
    search_url = 'https://api.spotify.com/v1/search'
    
    headers = {
        'Authorization': f'Bearer {access_token}'
    }
    
    params = {
        'q': f"genre:\"{query}\"",
        'type': 'artist',
        'limit': limit
    }
    
    response = requests.get(search_url, headers=headers, params=params)
    response_data = response.json()
    
    if 'artists' in response_data:
        return [artist['id'] for artist in response_data['artists']['items']]
    else:
        print("Error en la respuesta de la API:", response_data)
        return []





In [10]:
genre = "death metal"
metal_artist_ids = get_artists_by_genre(genre, access_token)
print(metal_artist_ids)

['1IQ2e1buppatiN1bxUVkrk', '0GDGKpJFhVpcjIGF8N6Ewt', '5pqvAI85RMxL9K0xHvSwGu', '57ylwQTnFnIhJh4nu4rxCs', '6vXYoy8ouRVib302zxaxFF', '24Oiw7BlvO1BETecDLJt6m', '1C62FV9Cltn9L4c9jAwCyk', '3pulcT2wt7FEG10lQlqDJL', '3iCJOi5YKh247eutgCyLFe', '0FZcPgWI3BsFQl4rOAGSHT', '6JW8wliOEwaDZ231ZY7cf4', '0yLwGBQiBqhXOvmTfH2A7n', '4Uv5bceTJ2h3tLlssUNDNP', '2tjnvrUmP46XNjFh9V0NGc', '4f5V3PQ66nIrBCqugJtaGn', '5274obTQJjzjyycRyJlfml', '6HZr7Fs2VfV1PYHIwo8Ylc', '2OTuoIi28WybVbVcykc237', '3et9upNERQI5IYt5jEDTxM', '4d6Rawrese4OLF1zZCztod', '4UgQ3EFa8fEeaIEg54uV5b', '0ybFZ2Ab08V8hueghSXm6E', '1AdrYGYDz4oa9dvW2jfFrG', '2i7CQcVBh2K6uOR3CH09M1', '0DCw6lHkzh9t7f8Hb4Z0Sx', '1xUhNgw4eJDZfvumIpcz1B', '0uNj4RxFjG0iVPlZS753en', '1DgmdsnwOexqTH8ohPCFAU', '4tDkeVxH0CSkNiLVrsYmQs', '1bPBx2qbpWzEoLujeHC3G7', '2Ah9OuOj7B57gPD1cbwiaE', '0qJpY7K8p7g6sacvaGNt6i', '4xTDPgk4jHCF0qui3dH6BS', '7wqP36o9lqWteOCxBnXlwx', '76xrrejizyQpKukBIhnf3D', '6EFV3PmaXblKwNbvpkGv9l', '76S65NHJHrNy4JTrXHP2BH', '4HgqjpBaWctBWVHafQIpRt', '2DqzOWVL2l

In [11]:
#artist_ids = ["2CIMQHirSU0MQqyYHq0eOx", "57dN52uHvrHOxijzpIgu3E"]  # Reemplaza con los IDs de los artistas que quieras consultar
token=access_token
# Obtener respuesta de la API
response_data = get_artists_from_spotify(token, metal_artist_ids)

# Procesar la respuesta con la función extract_artists_to_dataframe
df_metal_artist = extract_artists_to_dataframe(response_data)
#print(df)
df_metal_artist

Unnamed: 0,Name,Genres,Popularity,Spotify_URL,Followers,Image_URL,Artist_ID,Artist_Type,Artist_URI
0,Slayer,"alternative metal, death metal, groove metal, ...",64,https://open.spotify.com/artist/1IQ2e1buppatiN...,3717516,https://i.scdn.co/image/8c81130db7b5f933412c49...,1IQ2e1buppatiN1bxUVkrk,artist,spotify:artist:1IQ2e1buppatiN1bxUVkrk
1,Gojira,"alternative metal, french death metal, french ...",61,https://open.spotify.com/artist/0GDGKpJFhVpcjI...,1239549,https://i.scdn.co/image/ab6761610000e5eb96c494...,0GDGKpJFhVpcjIGF8N6Ewt,artist,spotify:artist:0GDGKpJFhVpcjIGF8N6Ewt
2,Currents,"melodic metalcore, metalcore, progressive deat...",58,https://open.spotify.com/artist/5pqvAI85RMxL9K...,187066,https://i.scdn.co/image/ab6761610000e5ebe802e9...,5pqvAI85RMxL9K0xHvSwGu,artist,spotify:artist:5pqvAI85RMxL9K0xHvSwGu
3,In Flames,"alternative metal, gothenburg metal, melodic d...",61,https://open.spotify.com/artist/57ylwQTnFnIhJh...,1127923,https://i.scdn.co/image/ab6761610000e5eb5c3bd9...,57ylwQTnFnIhJh4nu4rxCs,artist,spotify:artist:57ylwQTnFnIhJh4nu4rxCs
4,Lorna Shore,"blackened deathcore, melodic metalcore, sympho...",57,https://open.spotify.com/artist/6vXYoy8ouRVib3...,540632,https://i.scdn.co/image/ab6761610000e5eb49e1d0...,6vXYoy8ouRVib302zxaxFF,artist,spotify:artist:6vXYoy8ouRVib302zxaxFF
5,Slaughter to Prevail,"russian metal, russian metalcore, slamming dea...",56,https://open.spotify.com/artist/24Oiw7BlvO1BET...,551299,https://i.scdn.co/image/ab6761610000e5eb65de98...,24Oiw7BlvO1BETecDLJt6m,artist,spotify:artist:24Oiw7BlvO1BETecDLJt6m
6,Emmure,"deathcore, melodic metalcore, metalcore, nu-me...",53,https://open.spotify.com/artist/1C62FV9Cltn9L4...,329282,https://i.scdn.co/image/ab6761610000e5ebf96bda...,1C62FV9Cltn9L4c9jAwCyk,artist,spotify:artist:1C62FV9Cltn9L4c9jAwCyk
7,Amon Amarth,"groove metal, melodic death metal, metal, powe...",57,https://open.spotify.com/artist/3pulcT2wt7FEG1...,1072331,https://i.scdn.co/image/ab6761610000e5eb131d4f...,3pulcT2wt7FEG10lQlqDJL,artist,spotify:artist:3pulcT2wt7FEG10lQlqDJL
8,I See Stars,"deathstep, melodic metalcore, metalcore, neon ...",51,https://open.spotify.com/artist/3iCJOi5YKh247e...,451467,https://i.scdn.co/image/ab6761610000e5eba1a68c...,3iCJOi5YKh247eutgCyLFe,artist,spotify:artist:3iCJOi5YKh247eutgCyLFe
9,Make Them Suffer,"australian metalcore, blackened deathcore, bru...",52,https://open.spotify.com/artist/0FZcPgWI3BsFQl...,233898,https://i.scdn.co/image/ab6761610000e5eb919ca8...,0FZcPgWI3BsFQl4rOAGSHT,artist,spotify:artist:0FZcPgWI3BsFQl4rOAGSHT


In [39]:
def get_artists_by_genre(query, access_token, total_limit=3000):
    search_url = 'https://api.spotify.com/v1/search'
    headers = {'Authorization': f'Bearer {access_token}'}

    artist_ids = []
    limit = 50  # Máximo permitido por Spotify
    offset = 0  # Iniciar en el primer resultado

    while len(artist_ids) < total_limit:
        params = {
            'q': f"genre:\"{query}\"",
            'type': 'artist',
            'limit': limit,
            'offset': offset
        }

        response = requests.get(search_url, headers=headers, params=params)
        
        # Verificar la respuesta antes de intentar decodificar JSON
        if response.status_code != 200:
            print(f"Error en la solicitud: {response.status_code}")
            break

        response_data = response.json()
        
        # Verificar si hay artistas en la respuesta
        if 'artists' in response_data and 'items' in response_data['artists']:
            batch_ids = [artist['id'] for artist in response_data['artists']['items']]
            artist_ids.extend(batch_ids)
            
            # Si se recuperaron menos artistas de los esperados, podría ser la última página
            if len(batch_ids) < limit:
                print("Alcanzado el final de los resultados.")
                break
        else:
            print("No se encontraron más artistas o hay un error en la respuesta.")
            break

        # Incrementar el offset para la siguiente tanda de resultados
        offset += limit

        # Opcional: Agregar un pequeño retraso entre las solicitudes para evitar ser bloqueado por la API por exceso de solicitudes
        time.sleep(1)

    return artist_ids[:total_limit]  # Devolver hasta el límite total deseado



In [41]:
genre = "death metal"
MetaL_artist_ids = get_artists_by_genre2(genre, access_token, total_limit=3000)
print(len(MetaL_artist_ids))  # Esto debería imprimir el número de artist IDs recopilados, hasta un máximo de 9000.

1000


In [42]:
def extract_artists_to_dataframe2(artist_list):
    # Lista para almacenar los datos formateados de los artistas
    formatted_artist_data = []

    # Iterar sobre cada artista en la lista proporcionada
    for artist in artist_list:
        # Extraer la información relevante
        artist_info = {
            'Name': artist['name'],
            'Genres': ', '.join(artist['genres']),
            'Popularity': artist['popularity'],
            'Followers': artist['followers']['total'],
            'Artist_URI': artist['uri']
        }
        # Agregar a la lista
        formatted_artist_data.append(artist_info)

    # Convertir la lista de diccionarios en un DataFrame
    df_artist = pd.DataFrame(formatted_artist_data)
    return df_artist


In [43]:
from time import sleep

def get_artists_from_spotify2(token, artist_ids, batch_size=50):
    """
    Fetch artist details from Spotify's API in batches.
    
    :param access_token: Token for API authorization.
    :param artist_ids: List of artist IDs.
    :param batch_size: Size of the artist ID batches to request.
    :return: List of artist data dictionaries.
    """
    # Endpoint de la API
    base_url = "https://api.spotify.com/v1/artists"
    
    # Lista para almacenar todos los datos de los artistas
    all_artist_data = []
    
    # Dividir la lista de IDs en batches
    for i in range(0, len(artist_ids), batch_size):
        batch = artist_ids[i:i+batch_size]
        
        # Headers de la petición (incluye el token de acceso para autorización)
        headers = {
            "Authorization": f"Bearer {token}"
        }
        
        # Parámetros de la petición (IDs de los artistas)
        params = {
            "ids": ",".join(batch)
        }
        
        # Realizar la petición GET a la API de Spotify
        response = requests.get(base_url, headers=headers, params=params)
        
        # Verificar el estado de la respuesta
        if response.status_code == 200:
            # Convertir la respuesta en formato JSON a un diccionario de Python
            batch_data = response.json()
            all_artist_data.extend(batch_data['artists'])
        else:
            print(f"Error en el batch {i//batch_size}: {response.status_code}")
            sleep(1)  # Esperar un segundo antes de continuar para no sobrecargar la API
        
    return all_artist_data



In [45]:
batch_size = 50  # Spotify API limit
df_METAL_artists = pd.DataFrame()  # DataFrame vacío para almacenar los datos

# Procesar los artist IDs en batches
for i in range(0, len(MetaL_artist_ids), batch_size):
    batch_ids = MetaL_artist_ids[i:i+batch_size]
    batch_data = get_artists_from_spotify2(token, batch_ids, batch_size)
    df_batch = extract_artists_to_dataframe2(batch_data)  # Asumiendo que esta función ya está definida
    df_METAL_artists = pd.concat([df_METAL_artists, df_batch], ignore_index=True)

#print(df_artists)


In [46]:
df_METAL_artists

Unnamed: 0,Name,Genres,Popularity,Followers,Artist_URI
0,Slayer,"alternative metal, death metal, groove metal, ...",64,3717516,spotify:artist:1IQ2e1buppatiN1bxUVkrk
1,Gojira,"alternative metal, french death metal, french ...",61,1239549,spotify:artist:0GDGKpJFhVpcjIGF8N6Ewt
2,Currents,"melodic metalcore, metalcore, progressive deat...",58,187066,spotify:artist:5pqvAI85RMxL9K0xHvSwGu
3,In Flames,"alternative metal, gothenburg metal, melodic d...",61,1127923,spotify:artist:57ylwQTnFnIhJh4nu4rxCs
4,Lorna Shore,"blackened deathcore, melodic metalcore, sympho...",57,540632,spotify:artist:6vXYoy8ouRVib302zxaxFF
...,...,...,...,...,...
995,Cleansing of the Temple,"christian death metal, christian deathcore",5,1315,spotify:artist:4nNgrGNSI4M9ODTMH5MdNI
996,Sentient Horror,swedish death metal,7,6037,spotify:artist:4MNbashXmMvEKIre76REzX
997,Purulence,"british death metal, slam death metal",6,2317,spotify:artist:6n2vNeCnk8otr3DpmS6osE
998,Emasculated Vituperation,slam death metal,7,916,spotify:artist:6JWlwpjP9WMxELrxX73n4h


In [47]:
df_METAL_artists.loc[df_METAL_artists['Name']=='Masacre']

Unnamed: 0,Name,Genres,Popularity,Followers,Artist_URI
804,Masacre,"colombian death metal, metal colombiano",18,18126,spotify:artist:6ybTIsR4n37yT6UspaRR7m


In [48]:
df_METAL_artists.loc[df_METAL_artists['Name']=='Suffocation']

Unnamed: 0,Name,Genres,Popularity,Followers,Artist_URI
60,Suffocation,"black metal, brutal death metal, death metal, ...",42,220442,spotify:artist:4ItRDIouodpnW6nm4TYDk1


In [50]:
df_METAL_artists.to_csv('Spotify_extracted.csv',index=False)