# 03 - Streaming Silver

Pipeline de transformation Bronze ‚Üí Silver : Nettoyage et enrichissement des donn√©es de vol.

## Configuration

In [None]:
from pyspark.sql.functions import col, from_unixtime, to_timestamp, round
from config import get_s3_path, create_spark_session

BRONZE_PATH = get_s3_path("bronze", "flights")
SILVER_PATH = get_s3_path("silver", "flights")
CHECKPOINT_SILVER = get_s3_path("checkpoints", "silver_flights")

spark = create_spark_session("StreamingSilver")

print(f"‚úÖ Input:  {BRONZE_PATH}")
print(f"‚úÖ Output: {SILVER_PATH}")

## Bronze ‚Üí Silver

In [None]:
df_bronze_stream = spark.readStream.format("delta").load(BRONZE_PATH)

df_silver = df_bronze_stream \
    .filter(col("icao24").isNotNull()) \
    .filter(col("latitude").isNotNull() & col("longitude").isNotNull()) \
    .withColumn("event_timestamp", to_timestamp(from_unixtime(col("time")))) \
    .withColumn("velocity_kmh", round(col("velocity") * 3.6, 2)) \
    .withColumn("altitude_meters", col("baro_altitude")) \
    .select(
        "event_timestamp", "icao24", "callsign", "origin_country",
        "longitude", "latitude", "velocity_kmh", "altitude_meters",
        "on_ground", "category"
    )

print(f"üöÄ Stream: Bronze ‚Üí Silver")

query_silver = df_silver.writeStream \
    .format("delta") \
    .outputMode("append") \
    .option("checkpointLocation", CHECKPOINT_SILVER) \
    .option("mergeSchema", "true") \
    .start(SILVER_PATH)

## Monitoring du stream

In [None]:
import time

print("üìä Monitoring du stream (Ctrl+C pour arr√™ter)")
print("="*60)

try:
    while True:
        print(f"\n‚è±Ô∏è  {time.strftime('%H:%M:%S')}")
        print(f"  Silver: {query_silver.status}")
        time.sleep(30)
except KeyboardInterrupt:
    print("\n‚èπÔ∏è  Arr√™t demand√©...")

## Arr√™t du stream

In [None]:
query_silver.stop()
print("‚úÖ Stream arr√™t√©")

## V√©rification

In [None]:
print("üìä Statistiques :")
print(f"  Bronze: {spark.read.format('delta').load(BRONZE_PATH).count():,} lignes")
print(f"  Silver: {spark.read.format('delta').load(SILVER_PATH).count():,} lignes")