- Conversões: `fetch_time` ➜ timestamp; padronização `lat/lng` (6 casas).
- Filtro de limites SP; **dedup por (cod_parada, cod_linha)** mantendo o mais recente.

In [0]:
from pyspark.sql.functions import (
    col, to_timestamp, round, row_number
)
from pyspark.sql.window import Window

# Carrega bronze
bronze = spark.table("workspace.sptrans.bronze_paradas")

# Limites aproximados da cidade de São Paulo
LAT_MIN, LAT_MAX = -24.1, -23.3
LNG_MIN, LNG_MAX = -47.1, -46.2

cleaned = (
    bronze
    .withColumn("fetch_time", to_timestamp(col("fetch_time")))
    .withColumn("latitude", round(col("lat"), 6))
    .withColumn("longitude", round(col("lng"), 6))
    .withColumn("cod_linha", col("cod_linha").cast("int"))
    .where(col("cod_parada").isNotNull())
    .where((col("lat").between(LAT_MIN, LAT_MAX)) & (col("lng").between(LNG_MIN, LNG_MAX)))
)

# Deduplicar por parada + linha, pegando o registro mais recente
w = Window.partitionBy("cod_parada", "cod_linha").orderBy(col("fetch_time").desc())

silver_paradas = (
    cleaned
    .withColumn("rn", row_number().over(w))
    .where(col("rn") == 1)
    .drop("rn")
)

silver_paradas.write \
    .format("delta") \
    .option("overwriteSchema", "true") \
    .mode("overwrite") \
    .saveAsTable("workspace.sptrans.silver_paradas")

print("✅ silver_paradas criada com sucesso!")
display(silver_paradas.limit(10))

✅ silver_paradas criada com sucesso!


cod_linha,cod_parada,fetch_time,lat,lng,nome_parada,latitude,longitude
33200,614432,2025-11-09T23:48:38.869Z,-23.527608,-46.671511,BABY BARIONY C/B,-23.527608,-46.671511
33206,614432,2025-11-09T23:48:38.869Z,-23.527608,-46.671511,BABY BARIONY C/B,-23.527608,-46.671511
33211,614432,2025-11-09T23:48:38.869Z,-23.527608,-46.671511,BABY BARIONY C/B,-23.527608,-46.671511
33214,614432,2025-11-09T23:48:38.869Z,-23.527608,-46.671511,BABY BARIONY C/B,-23.527608,-46.671511
33348,614432,2025-11-09T23:48:38.869Z,-23.527608,-46.671511,BABY BARIONY C/B,-23.527608,-46.671511
33354,614432,2025-11-09T23:48:38.869Z,-23.527608,-46.671511,BABY BARIONY C/B,-23.527608,-46.671511
33359,614432,2025-11-09T23:48:38.869Z,-23.527608,-46.671511,BABY BARIONY C/B,-23.527608,-46.671511
34033,614432,2025-11-09T23:48:38.869Z,-23.527608,-46.671511,BABY BARIONY C/B,-23.527608,-46.671511
34041,614432,2025-11-09T23:48:38.869Z,-23.527608,-46.671511,BABY BARIONY C/B,-23.527608,-46.671511
34107,614432,2025-11-09T23:48:38.869Z,-23.527608,-46.671511,BABY BARIONY C/B,-23.527608,-46.671511


In [0]:
%sql
SELECT COUNT(*) AS total_registros
FROM workspace.sptrans.silver_paradas;