In [0]:
import requests
from pyspark.sql.functions import *
from pyspark.sql.types import StructType, StructField, DoubleType, StringType, LongType, IntegerType
from datetime import datetime

#Configuração da API (CoinGecko é gratuita)

In [0]:
API_URL = "https://api.coingecko.com/api/v3/coins/markets"
params = {
    'vs_currency': 'usd',
    'order': 'market_cap_desc',
    'per_page': 100,
    'page': 1
}

response = requests.get(API_URL, params=params)
data = response.json()
print(type(data))  # Deve ser list
print(data[0])    # Deve mostrar um dicionário com colunas de criptomoedas
print("Quantidade de registros:", len(data))
print("Exemplo de registro:", data[0])

# Verifique se data é lista e não dicionário contendo lista
if isinstance(data, dict) and "data" in data:
    data = data["data"]  # ajuste se necessário

In [0]:
schema = StructType([
    StructField("id", StringType(), True),
    StructField("symbol", StringType(), True),
    StructField("name", StringType(), True),
    StructField("image", StringType(), True),
    StructField("current_price", DoubleType(), True),
    StructField("market_cap", LongType(), True),
    StructField("market_cap_rank", IntegerType(), True),
    StructField("fully_diluted_valuation", LongType(), True),
    StructField("total_volume", LongType(), True),
    StructField("high_24h", DoubleType(), True),
    StructField("low_24h", DoubleType(), True),
    StructField("price_change_24h", DoubleType(), True),
    StructField("price_change_percentage_24h", DoubleType(), True),
    StructField("market_cap_change_24h", DoubleType(), True),
    StructField("market_cap_change_percentage_24h", DoubleType(), True),
    StructField("circulating_supply", DoubleType(), True),
    StructField("total_supply", DoubleType(), True),
    StructField("max_supply", DoubleType(), True),
    StructField("ath", DoubleType(), True),
    StructField("ath_change_percentage", DoubleType(), True),
    StructField("ath_date", StringType(), True),
    StructField("atl", DoubleType(), True),
    StructField("atl_change_percentage", DoubleType(), True),
    StructField("atl_date", StringType(), True),
    StructField("roi", StringType(), True),  # roi = None, pode ser string ou struct, aqui deixamos string nullable
    StructField("last_updated", StringType(), True)
])

In [0]:
# Converter para DataFrame Spark
# schema = StructType([
#     StructField("column_name", DoubleType(), True)
# ])

df = spark.createDataFrame(
    data,
    schema=schema
)
display(df)

In [0]:
# Adicionar metadados de ingestão
df_bronze = df.withColumn("ingestion_timestamp", current_timestamp()) \
              .withColumn("ingestion_date", current_date()) \
              .withColumn("source", lit("coingecko_api"))


print(f"Colunas do df_bronze: {df_bronze.columns}")
print(f"Quantidade linhas df_bronze: {df_bronze.count()}")              

In [0]:
# Criar schema bronze se não existir
spark.sql("CREATE SCHEMA IF NOT EXISTS bronze")
spark.sql("DROP TABLE IF EXISTS bronze.crypto_raw_data")


# Salvar dados na tabela bronze
df_bronze.write.format("delta") \
    .mode("append") \
    .partitionBy("ingestion_date") \
    .saveAsTable("bronze.crypto_raw_data")
