In [0]:
import requests
from pyspark.sql import SparkSession
from pyspark.sql.types import StructType, StructField, StringType, DoubleType

# Requisição à API da NASA
url = (
    "https://api.nasa.gov/insight_weather/"
    "?api_key=ywTPslt9SJHevhcwYCBGOCLTVjG1K6FsHRRjJksJ"
    "&feedtype=json&ver=1.0"
)
response = requests.get(url)
data = response.json()

# Extrair os dias válidos (sols)
sol_keys = data.get("sol_keys", [])
records = []

for sol in sol_keys:
    sol_data = data.get(sol, {})
    at = sol_data.get("AT", {}) or {}
    pre = sol_data.get("PRE", {}) or {}

    record = {
        "sol": sol,
        "season": sol_data.get("Season"),
        "first_utc": sol_data.get("First_UTC"),
        "last_utc": sol_data.get("Last_UTC"),
        "temp_avg": at.get("av", None),
        "temp_min": at.get("mn", None),
        "temp_max": at.get("mx", None),
        "pressure_avg": pre.get("av", None),
        "pressure_min": pre.get("mn", None),
        "pressure_max": pre.get("mx", None),
    }
    records.append(record)

# Definir schema explícito para evitar erro de inferência
schema = StructType([
    StructField("sol", StringType(), True),
    StructField("season", StringType(), True),
    StructField("first_utc", StringType(), True),
    StructField("last_utc", StringType(), True),
    StructField("temp_avg", DoubleType(), True),
    StructField("temp_min", DoubleType(), True),
    StructField("temp_max", DoubleType(), True),
    StructField("pressure_avg", DoubleType(), True),
    StructField("pressure_min", DoubleType(), True),
    StructField("pressure_max", DoubleType(), True),
])

# Criar DataFrame com schema
spark = SparkSession.builder.getOrCreate()
df_bronze = spark.createDataFrame(records, schema=schema)

# Mostrar estrutura e dados
df_bronze.printSchema()
display(df_bronze)
