## LAST_FM

In [1]:
from dotenv import load_dotenv
import os

import time
import re

import pandas as pd
import requests

In [2]:
LASTFM_API_KEY = os.getenv("LASTFM_API_KEY")
LASTFM_SECRET_KEY = os.getenv("LASTFM_SECRET_KEY")

In [3]:
def get_artist_info(artist_name):  # Generamos una función para obtener info de la API de LastFM
    API_KEY = LASTFM_API_KEY
    BASE_URL = "https://ws.audioscrobbler.com/2.0/"  # Conexión a la API de LastFM

    params = {
        "method": "artist.getinfo",
        "artist": artist_name,
        "api_key": API_KEY,
        "format": "json",
        "autocorrect": 1,
        "lang": "en"
    }

    response = requests.get(BASE_URL, params=params, timeout=30)

    # Si la respuesta HTTP no es correcta, se salta este artista
    if response.status_code != 200:
        print(f"❌ HTTP {response.status_code} para el artista: {artist_name}")
        return None

    # Intentar convertir la respuesta a JSON (a veces la API devuelve HTML o vacío)
    try:
        data = response.json()
    except ValueError:
        print(f"❌ Respuesta no JSON para el artista: {artist_name} | Primeros caracteres: {response.text[:120]}")
        return None

    # La API de LastFM puede devolver errores dentro del JSON
    if isinstance(data, dict) and "error" in data:
        print(f"❌ Error de la API LastFM: {data.get('message')}")
        return None

    artist = data["artist"]
    listeners = artist["stats"]["listeners"]
    playcount = artist["stats"]["playcount"]
    similar_artists = [a["name"] for a in artist["similar"]["artist"]]
    similar_artist = similar_artists[0] if similar_artists else None

    # Información biográfica del artista
    bio = data.get("artist", {}).get("bio", {})
    summary = bio.get("summary", {})

    return {
        "artist_name": artist["name"],
        "biography": summary,
        "listeners": listeners,
        "playcount": playcount,
        "similar_artist": similar_artist
    }


In [4]:
# Incluir aquí el CSV de los datos de Spotify
df_spotify = pd.read_csv("canciones-spotify.csv")  # Transformación en DataFrame
df_spotify.head()

Unnamed: 0,id,track_name,artist_name,year,genre,album_type,release_date,popularity
0,0LpHC9mhPAQC98IjXZIrif,The Election of 1800,Daveed Diggs,2015,soundtrack,album,2015-09-25,65
1,6oF8ueLn5hIl4PRp17sxW6,That Would Be Enough,Phillipa Soo,2015,soundtrack,album,2015-09-25,66
2,4eeN8erNIbW2osT0knz5vT,I Know Him,Jonathan Groff,2015,soundtrack,album,2015-09-25,65
3,3lXyAQ0kekAvY5LodpWmUs,Ten Duel Commandments,Lin-Manuel Miranda,2015,soundtrack,album,2015-09-25,67
4,1llkMtLL4vbe0pr3NV2ckg,Let Me Hear,"Fear, and Loathing in Las Vegas",2015,soundtrack,album,2015-09-30,59


In [5]:
df_spotify['artist_name'].tolist() # Creación de lista de artistas para ejecutar en archivo "artistas_LastFM.ipynb"

['Daveed Diggs',
 'Phillipa Soo',
 'Jonathan Groff',
 'Lin-Manuel Miranda',
 'Fear, and Loathing in Las Vegas',
 'Kazuya Yoshii',
 'Lin-Manuel Miranda',
 'Leslie Odom Jr.',
 'Phillipa Soo',
 'Original Broadway Cast of Hamilton',
 'Daveed Diggs',
 'Aimer',
 'Lin-Manuel Miranda',
 'Leslie Odom Jr.',
 'Christopher Jackson',
 'Toby Fox',
 'Toby Fox',
 'Leslie Odom Jr.',
 'KANA-BOON',
 'Okieriete Onaodowan',
 'Leslie Odom Jr.',
 'Leslie Odom Jr.',
 'Michael Giacchino',
 'Toby Fox',
 'Goose house',
 'Toby Fox',
 'Toby Fox',
 'Toby Fox',
 'Phillipa Soo',
 'Jasmine Cephas-Jones',
 'Renée Elise Goldsberry',
 'Jonathan Groff',
 'Toby Fox',
 'Marcin Przybyłowicz',
 'Mikolai Stroinski',
 'Original Broadway Cast of Hamilton',
 'Leslie Odom Jr.',
 'Lin-Manuel Miranda',
 'Lin-Manuel Miranda',
 'Jonathan Groff',
 'Thayne Jasperson',
 'Renée Elise Goldsberry',
 'Leslie Odom Jr.',
 'Phillipa Soo',
 'John Williams',
 'Lin-Manuel Miranda',
 'Toby Fox',
 'Toby Fox',
 'Leslie Odom Jr.',
 'Toby Fox',
 'Toby 

In [6]:
len(df_spotify['artist_name'].tolist()) # Número de registros de la extración de datos de la API Spotipy

3000

In [7]:
unique_artists_spotify = df_spotify['artist_name'].unique().tolist() # Generar valores únicos de artistas de la extración de datos de la API Spotipy

In [8]:
len(unique_artists_spotify) # Número de registros unicos de artistas de la extración de datos de la API Spotipy

1141

In [9]:
lista_artist_lastfm_NN = [] # Variable para almacenar toda la información de todos los artistas que no sean nulos.

for X in unique_artists_spotify:
    info = get_artist_info(X)
    if info is not None:  # solo agrega si tiene datos
        lista_artist_lastfm_NN.append(info)
    time.sleep(0.5)

❌ Error de la API LastFM: The artist you supplied could not be found
❌ Error de la API LastFM: The artist you supplied could not be found
❌ HTTP 500 para el artista: Pastaboys
❌ HTTP 500 para el artista: Teni


In [10]:
len(lista_artist_lastfm_NN) # Número de registros unicos de artistas de la extración de datos de la API LastFM

1137

In [11]:
df_lastfm = pd.DataFrame(lista_artist_lastfm_NN) # Transformación en DataFrame

In [12]:
df_lastfm

Unnamed: 0,artist_name,biography,listeners,playcount,similar_artist
0,Daveed Diggs,"Daveed Daniele Diggs (born January 24, 1982 in...",415158,4996907,Christopher Jackson
1,Phillipa Soo,"Phillipa Soo (born May 31, 1990) is an America...",579996,15510216,Christopher Jackson
2,Jonathan Groff,Jonathan Groff is an American stage performer ...,748179,13189716,Original Broadway Cast of Hamilton
3,Lin-Manuel Miranda,"Lin-Manuel Miranda (born January 16, 1980 in N...",722417,26857358,Leslie Odom Jr.
4,"Fear, and Loathing in Las Vegas","Fear, and Loathing in Las Vegas is a electroni...",222284,8167150,a crowd of rebellion
...,...,...,...,...,...
1132,Chop Daily,"<a href=""https://www.last.fm/music/Chop+Daily...",77762,620161,Keys the Prince
1133,Praiz,"Praise Ugbede Adejo (born 8 March 1984), bette...",20392,120860,Banky W
1134,Marco Avitabile,"<a href=""https://www.last.fm/music/Marco+Avit...",1276,3704,Peppe Citarella
1135,Lola Jane,"<a href=""https://www.last.fm/music/Lola+Jane""...",5905,52454,Archie & Sizzle


In [13]:
df_lastfm.to_csv("artistas-lastfm.csv", index=False) #Extraccion LastFM to CSV

In [14]:
df_artists_lastfm = pd.read_csv("artistas-lastfm.csv") # Lectura de CSV
df_artists_lastfm

Unnamed: 0,artist_name,biography,listeners,playcount,similar_artist
0,Daveed Diggs,"Daveed Daniele Diggs (born January 24, 1982 in...",415158,4996907,Christopher Jackson
1,Phillipa Soo,"Phillipa Soo (born May 31, 1990) is an America...",579996,15510216,Christopher Jackson
2,Jonathan Groff,Jonathan Groff is an American stage performer ...,748179,13189716,Original Broadway Cast of Hamilton
3,Lin-Manuel Miranda,"Lin-Manuel Miranda (born January 16, 1980 in N...",722417,26857358,Leslie Odom Jr.
4,"Fear, and Loathing in Las Vegas","Fear, and Loathing in Las Vegas is a electroni...",222284,8167150,a crowd of rebellion
...,...,...,...,...,...
1132,Chop Daily,"<a href=""https://www.last.fm/music/Chop+Daily...",77762,620161,Keys the Prince
1133,Praiz,"Praise Ugbede Adejo (born 8 March 1984), bette...",20392,120860,Banky W
1134,Marco Avitabile,"<a href=""https://www.last.fm/music/Marco+Avit...",1276,3704,Peppe Citarella
1135,Lola Jane,"<a href=""https://www.last.fm/music/Lola+Jane""...",5905,52454,Archie & Sizzle


In [15]:
def clean_html_links(text):
    if not isinstance(text, str):
        return text
    
    # Reemplaza <a href="URL">texto</a> → URL
    text = re.sub(r'<a href="(https?://[^"]+)".*?>.*?</a>', r'\1', text)

    # Quita cualquier otra etiqueta HTML que quede
    text = re.sub(r'<.*?>', '', text)

    return text.strip()


df_artists_lastfm = pd.read_csv("artistas-lastfm.csv")

for i, artist in enumerate(df_artists_lastfm["artist_name"], start=1):
    biography_text = df_artists_lastfm.loc[i-1, "biography"]     
    cleaned_text = clean_html_links(biography_text)    #aquí limpiamos el codigo HTML
    df_artists_lastfm.loc[i-1, "biography"] = cleaned_text  #guardamos el texto limpio en la misma celda
    print(f"{i}. ✅ {artist}")
    time.sleep(0.25)

1. ✅ Daveed Diggs
2. ✅ Phillipa Soo
3. ✅ Jonathan Groff
4. ✅ Lin-Manuel Miranda
5. ✅ Fear, and Loathing in Las Vegas
6. ✅ Kazuya Yoshii
7. ✅ Leslie Odom Jr.
8. ✅ Original Broadway Cast of Hamilton
9. ✅ Aimer
10. ✅ Christopher Jackson
11. ✅ Toby Fox
12. ✅ KANA-BOON
13. ✅ Okieriete Onaodowan
14. ✅ Michael Giacchino
15. ✅ Goose house
16. ✅ Jasmine Cephas-Jones
17. ✅ Renée Elise Goldsberry
18. ✅ Marcin Przybyłowicz
19. ✅ Mikolai Stroinski
20. ✅ Thayne Jasperson
21. ✅ John Williams
22. ✅ Yutaka Yamada
23. ✅ The Barden Bellas
24. ✅ Patrick Doyle
25. ✅ Eir Aoi
26. ✅ Dario Marianelli
27. ✅ 鈴木このみ
28. ✅ 坂本龍一
29. ✅ Trevor Morris
30. ✅ Basil Poledouris
31. ✅ Agrupacion Musical Nuestro Padre Jesús Despojado de Jaen
32. ✅ Roque Baños
33. ✅ Film Symphony Orchestra
34. ✅ BRADIO
35. ✅ Bear McCreary
36. ✅ Sho Oosawa
37. ✅ TK from 凛として時雨
38. ✅ SIE Sound Team
39. ✅ Percival Schuttenbach
40. ✅ Inon Zur
41. ✅ James Newton Howard
42. ✅ Harry Gregson-Williams
43. ✅ Alan Silvestri
44. ✅ GENERATIONS from EXILE 

In [16]:
df_artists_lastfm.tail(10)

Unnamed: 0,artist_name,biography,listeners,playcount,similar_artist
1127,Cubita,https://www.last.fm/music/Cubita,5604,52004,Nuno Ribeiro
1128,Chris Brown,"Christopher Maurice Brown (born May 5, 1989) i...",4694935,154333464,Chris Brown & Tyga
1129,PAAX (Tulum),https://www.last.fm/music/PAAX+(Tulum),41100,283484,Antaares
1130,Simone Vitullo,Simone Vitullo DJ and house music producer for...,23050,108481,Chambord
1131,Duda,There are two groups using the name Duda one f...,7143,78402,Peled
1132,Chop Daily,https://www.last.fm/music/Chop+Daily,77762,620161,Keys the Prince
1133,Praiz,"Praise Ugbede Adejo (born 8 March 1984), bette...",20392,120860,Banky W
1134,Marco Avitabile,https://www.last.fm/music/Marco+Avitabile,1276,3704,Peppe Citarella
1135,Lola Jane,https://www.last.fm/music/Lola+Jane,5905,52454,Archie & Sizzle
1136,Masego,"Micah Davis (born June 8, 1993), better known ...",1035548,20285573,SiR


In [17]:
df_artists_lastfm.to_csv("artistas-lastfm.csv", index=False) #Extraccion LastFM to CSV