In [0]:
%pip install requests pandas
dbutils.library.restartPython()

import requests
import pandas as pd
import time
from datetime import datetime
from pyspark.sql import functions as F

# --- Fun√ß√£o com retry para evitar 429 ---
def get_with_retry(url, params, retries=5, wait=5):
    for i in range(retries):
        resp = requests.get(url, params=params)
        if resp.status_code == 200:
            return resp.json()
        elif resp.status_code == 429:
            print(f"‚ö†Ô∏è Limite atingido. Tentativa {i+1}/{retries}. Esperando {wait*(i+1)}s...")
            time.sleep(wait * (i+1))
        else:
            resp.raise_for_status()
    raise Exception("‚ùå Falhou ap√≥s v√°rias tentativas.")

# --- 1. Buscar Top 50 moedas por market cap ---
url_markets = "https://api.coingecko.com/api/v3/coins/markets"
params = {
    "vs_currency": "usd",
    "order": "market_cap_desc",
    "per_page": 50,
    "page": 1,
    "sparkline": False
}
resp = requests.get(url_markets, params=params)
resp.raise_for_status()
top50_data = resp.json()

coin_meta = {c["id"]: {"symbol": c["symbol"], "name": c["name"]} for c in top50_data}
top50 = list(coin_meta.keys())

print(f"Total de moedas capturadas: {len(top50)}")

# --- 2. Intervalo de datas ---
#dt_from = int(datetime(2025, 8, 1).timestamp())   # 01/08/2025
#dt_to   = int(datetime.now().timestamp())         # agora
dt_from = int(datetime(2025, 11, 25).timestamp())   
dt_to = int(datetime(2025, 11, 28).timestamp())   

# --- 3. Fun√ß√£o para processar um lote de moedas ---
def process_lote(coins, output_path):
    all_data = []
    
    for coin in coins:
        url_hist = f"https://api.coingecko.com/api/v3/coins/{coin}/market_chart/range"
        params = {"vs_currency": "usd", "from": dt_from, "to": dt_to}
        
        data = get_with_retry(url_hist, params, retries=5, wait=10)
        
        for i in range(len(data.get("prices", []))):
            ts, price = data["prices"][i]
            _, mcap = data["market_caps"][i]
            _, vol = data["total_volumes"][i]
            
            all_data.append({
                "id": coin,
                "symbol": coin_meta[coin]["symbol"],
                "name": coin_meta[coin]["name"],
                "timestamp": datetime.fromtimestamp(ts/1000),
                "current_price": price,
                "market_cap": mcap,
                "total_volume": vol
            })
        
        print(f"‚úî Hist√≥rico coletado para {coin}")
        time.sleep(2)  # mais folgado entre chamadas
    
    # --- Converter em Spark DF e salvar particionado ---
    df = pd.DataFrame(all_data)
    sdf = spark.createDataFrame(df)

    # Parti√ß√µes: dia e m√™s
    sdf = (
        sdf.withColumn("dt", F.to_date(F.col("timestamp")))
           .withColumn("mes", F.date_format("dt", "yyyy-MM"))
    )
    
    (sdf.write
        .mode("append")
        .partitionBy("mes", "dt")  # <-- agora particiona m√™s e dia
        .parquet(output_path)
    )
    print(f"Lote salvo em: {output_path}")

# --- 4. Executar em lotes de 10 moedas ---
output_path = "/Volumes/coingecko/raw/raw"

for i in range(0, len(top50), 10):
    lote = top50[i:i+10]
    print(f"\nüöÄ Processando lote {i//10+1}: {lote}")
    process_lote(lote, output_path)