In [None]:
from pyspark.sql import functions as F
from pyspark.sql.types import *
from pyspark.sql.window import Window


In [None]:
# =============================================================================
# AIRPORT ANALYTICS
# =============================================================================

In [None]:
print("Reading fact_flights bronze data...")

fact_flight_silver_df = spark.read.table("unikargo_dev.02_silver.unikargo_fact_flight_silver")

print(f"Silver fact_flights count: {fact_flight_silver_df.count():,}")

In [None]:
print("Reading silver dimension tables...")

dim_date_silver_df = spark.read.table("unikargo_dev.02_silver.unikargo_dim_date_silver")
dim_airline_silver_df = spark.read.table("unikargo_dev.02_silver.unikargo_dim_airline_silver")
dim_airport_silver_df = spark.read.table("unikargo_dev.02_silver.unikargo_dim_airport_silver")

print(f"Dimensions loaded - Airlines: {dim_airline_silver_df.count()}, "
      f"Airports: {dim_airport_silver_df.count()}, "
      f"Dates: {dim_date_silver_df.count()}")

In [None]:
# Airport performance as origin
origin_airport_stats = fact_flight_silver_df \
    .groupBy("origin_airport_sk") \
    .agg(
        F.count("*").alias("departing_flights"),
        F.avg("departure_delay").alias("avg_departure_delay"),
        F.sum(F.when(F.col("cancelled") == 1, 1).otherwise(0)).alias("cancelled_departures"),
        F.countDistinct("airline_sk").alias("airlines_operating"),
        F.countDistinct("destination_airport_sk").alias("destinations_served"),
        F.avg("taxi_out").alias("avg_taxi_out_time")
    ) \
    .orderBy(F.col("departing_flights").desc())

In [None]:
origin_airport_stats.show(5)

In [None]:
# Airport performance as destination
dest_airport_stats = fact_flight_silver_df \
    .groupBy("destination_airport_sk") \
    .agg(
        F.count("*").alias("arriving_flights"),
        F.avg("arrival_delay").alias("avg_arrival_delay"),
        F.sum(F.when(F.col("diverted") == 1, 1).otherwise(0)).alias("diverted_arrivals"),
        F.countDistinct("airline_sk").alias("airlines_serving"),
        F.countDistinct("origin_airport_sk").alias("origins_served"),
        F.avg("taxi_in").alias("avg_taxi_in_time")
    ) \
    .orderBy(F.col("arriving_flights").desc())

In [None]:
dest_airport_stats.show(5)

In [None]:
origin_airport_stats.write.mode("overwrite").option("overwriteSchema", "true").saveAsTable("unikargo_dev.03_gold.origin_airport_performance")
dest_airport_stats.write.mode("overwrite").option("overwriteSchema", "true").saveAsTable("unikargo_dev.03_gold.destination_airport_performance")

