In [0]:
import requests
import base64
from concurrent.futures import ThreadPoolExecutor, as_completed
import json
import pandas as pd

# Pega token
def get_spotify_access_token(client_id, client_secret):
    url = "https://accounts.spotify.com/api/token"
    headers = {
        "Authorization": "Basic " + base64.b64encode(
            f"{client_id}:{client_secret}".encode()
        ).decode()
    }
    data = {"grant_type": "client_credentials"}
    response = requests.post(url, headers=headers, data=data)
    response.raise_for_status()
    return response.json()["access_token"]

# Autenticação
client_id = "7c4843d7e1a240b49affd223715ebb36"
client_secret = "43a2691586354d8c856e1b22dca9b766"
token = get_spotify_access_token(client_id, client_secret)


In [0]:
# Carrega lista do Spark
# Pegando as musicas da camada Silver
rows = spark.sql(
    "SELECT DISTINCT Track, Artist FROM workspace.silver.classic_hit"
).collect()

tracks_list = [{"track": row["Track"]} for row in rows]

count = 0

for i in tracks_list:
    count += 1

print(count)

In [0]:
# Busca info de 1 música por chamada
def get_spotify_track_info(track_name, token):
    url = "https://api.spotify.com/v1/search"
    headers = {"Authorization": f"Bearer {token}"}
    params = {
        "q": f"track:{track_name}",
        "type": "track",
        "limit": 1
    }
    response = requests.get(url, headers=headers, params=params)
    if response.status_code == 200:
        return response.json().get("tracks", {}).get("items", [])
    return []

In [0]:
list_results = []

# Executa com threads
with ThreadPoolExecutor(max_workers=5) as executor:  # pode ajustar p/ 10 se não travar
    futures = [
        executor.submit(get_spotify_track_info, t["track"], token)
        for t in tracks_list
    ]
    for future in as_completed(futures):
        list_results.extend(future.result())

In [0]:
print(len(list_results), "resultados coletados")

# Cria DataFrame com uma única coluna 'json'
df = pd.DataFrame([{"json": json.dumps(item)} for item in list_results])

# Converte para Spark DataFrame
spark_df = spark.createDataFrame(df)

# Salva em Delta Table - Camanda Bronze - Dados Brutos
spark_df.write.format("delta") \
    .mode("overwrite") \
    .saveAsTable("workspace.bronze.search_track_artist")

In [0]:
%sql
SELECT count(*) FROM workspace.bronze.search_track_artist;

--DROP TABLE workspace.bronze.search_track_artist;