In [1]:
import requests
import pandas as pd
import spotipy
from spotipy.oauth2 import SpotifyClientCredentials
from tqdm import tqdm

# =========================
# CONFIGURA TUS CLAVES
# =========================
SPOTIFY_CLIENT_ID = '0d8b0fde96224dbcbff483f486a43059'
SPOTIFY_CLIENT_SECRET = '0f9b015f516440d5a1a612530c37da87'
LASTFM_API_KEY = 'c705bb970a7320356db26775172a9a87'

# =========================
# PARÁMETROS DEL PROYECTO
# =========================
genre = "orchestra"
year_start = 2000
year_end = 2001
limit = 50
tracks_per_year = 300

# =========================
# AUTENTICACIÓN SPOTIFY
# =========================
sp = spotipy.Spotify(auth_manager=SpotifyClientCredentials(
    client_id=SPOTIFY_CLIENT_ID,
    client_secret=SPOTIFY_CLIENT_SECRET
))

# =========================
# FUNCIÓN PARA SPOTIFY
# =========================
def search_spotify_items(genre, year, item_type):
    query = f"{genre} year:{year}"
    items = []

    for offset in range(0, tracks_per_year, 50):
        results = sp.search(q=query, type=item_type, limit=50, offset=offset)
        content = results.get(f"{item_type}s", {}).get("items", [])
        for item in content:
            artist = item['artists'][0]['name']
            name = item.get('name')
            item_id = item.get('id')
            if item_type == "track":
                year_value = item['album']['release_date'][:4]
            else:
                year_value = item['release_date'][:4]
            items.append({
                "artista": artist,
                "genero": genre,
                "tipo": item_type,
                "nombre": name,
                "año_lanzamiento": year_value,
                "id": item_id
            })
    return items

# =========================
# FUNCIÓN PARA LAST.FM
# =========================
def get_lastfm_artist_info(artist_name):
    base_url = "http://ws.audioscrobbler.com/2.0/"

    info_params = {
        "method": "artist.getInfo",
        "artist": artist_name,
        "api_key": LASTFM_API_KEY,
        "format": "json"
    }
    try:
        info_response = requests.get(base_url, params=info_params)
        data = info_response.json().get("artist", {})
        bio = data.get("bio", {}).get("summary", "")
        listeners = data.get("stats", {}).get("listeners", None)
        playcount = data.get("stats", {}).get("playcount", None)
    except:
        bio, listeners, playcount = None, None, None

    similar_params = {
        "method": "artist.getSimilar",
        "artist": artist_name,
        "api_key": LASTFM_API_KEY,
        "format": "json",
        "limit": 3
    }
    try:
        similar_response = requests.get(base_url, params=similar_params)
        similar_data = similar_response.json().get("similarartists", {}).get("artist", [])
        similars = [a["name"] for a in similar_data[:3]]
    except:
        similars = []

    similars += [None] * (3 - len(similars))
    return bio, listeners, playcount, similars[0], similars[1], similars[2]

# =========================
# RECOLECCIÓN DE DATOS
# =========================
all_items = []

for year in range(year_start, year_end + 1):
    for item_type in ["track", "album"]:
        print(f"\U0001F4E6 Buscando {item_type}s de '{genre}' para el año {year}...")
        items = search_spotify_items(genre, year, item_type)
        all_items.extend(items)

df = pd.DataFrame(all_items)

# =========================
# ENRIQUECIMIENTO LAST.FM
# =========================
print("🎧 Enriqueciendo con datos de Last.fm..."); tqdm.pandas(); df[['bio', 'oyentes', 'reproducciones', 'similar_1', 'similar_2', 'similar_3']] = df['artista'].progress_apply(lambda artist: pd.Series(get_lastfm_artist_info(artist)))

# =========================
# ELIMINAR SALTOS DE LÍNEA
# =========================
df["bio"] = df["bio"].astype(str).str.replace(r'\s+', ' ', regex=True)

# =========================
# GUARDAR ARCHIVO CSV
# =========================
df.to_csv("orchestra_2000_2001.csv", index=False, encoding="utf-8")
print("\n✅ Archivo guardado como 'orchestra_2000_2001.csv'")

📦 Buscando tracks de 'orchestra' para el año 2000...
📦 Buscando albums de 'orchestra' para el año 2000...
📦 Buscando tracks de 'orchestra' para el año 2001...
📦 Buscando albums de 'orchestra' para el año 2001...
🎧 Enriqueciendo con datos de Last.fm...


100%|██████████| 1199/1199 [08:53<00:00,  2.25it/s]


✅ Archivo guardado como 'orchestra_2000_2001.csv'





In [3]:
df = pd.read_csv ('orchestra_2000_2001.csv')
df.head(2)

Unnamed: 0,artista,genero,tipo,nombre,año_lanzamiento,id,bio,oyentes,reproducciones,similar_1,similar_2,similar_3
0,The Swingle Singers,orchestra,track,"Orchestral Suite No. 3 In D Minor, BWV 1068: Aria",2000,2XIjIUi0XvOrZkb859OYxN,"The Swingle Singers, currently based in London...",95650.0,890501.0,Jacques Loussier,The Real Group,The King's Singers
1,Hans Zimmer,orchestra,track,Now We Are Free,2000,1elGwF4VwkwglV4nCBPJtv,Hans Florian Zimmer (born 12 September 1957) i...,2426895.0,126090296.0,Hans Zimmer & James Newton Howard,Ludwig Göransson,James Horner


In [5]:
df.columns

Index(['artista', 'genero', 'tipo', 'nombre', 'año_lanzamiento', 'id', 'bio',
       'oyentes', 'reproducciones', 'similar_1', 'similar_2', 'similar_3'],
      dtype='object')