In [None]:
from pyspark.sql import functions as F
from pyspark.sql.types import *
from pyspark.sql.window import Window


In [None]:
# =============================================================================
# OPERATIONAL EFFICIENCY METRICS
# =============================================================================

In [None]:
print("Reading fact_flights bronze data...")

fact_flight_silver_df = spark.read.table("unikargo_dev.02_silver.unikargo_fact_flight_silver")

print(f"Silver fact_flights count: {fact_flight_silver_df.count():,}")

In [None]:
print("Reading silver dimension tables...")

dim_date_silver_df = spark.read.table("unikargo_dev.02_silver.unikargo_dim_date_silver")
dim_airline_silver_df = spark.read.table("unikargo_dev.02_silver.unikargo_dim_airline_silver")
dim_airport_silver_df = spark.read.table("unikargo_dev.02_silver.unikargo_dim_airport_silver")

print(f"Dimensions loaded - Airlines: {dim_airline_silver_df.count()}, "
      f"Airports: {dim_airport_silver_df.count()}, "
      f"Dates: {dim_date_silver_df.count()}")

In [None]:
# Aircraft utilization (flights per aircraft per day)
aircraft_utilization = fact_flight_silver_df \
    .join(dim_date_silver_df, "date_sk") \
    .join(dim_airline_silver_df, "airline_sk") \
    .groupBy("full_date", "airline", "tail_number") \
    .agg(F.count("*").alias("flights_per_day")) \
    .groupBy("airline") \
    .agg(
        F.avg("flights_per_day").alias("avg_flights_per_aircraft_per_day"),
        F.max("flights_per_day").alias("max_flights_per_aircraft_per_day"),
        F.countDistinct("tail_number").alias("fleet_size")
    )

In [None]:
aircraft_utilization.show(5)

In [None]:
# On-time performance trends with rolling averages
window_spec = Window.partitionBy("airline").orderBy("full_date").rowsBetween(-6, 0)

otp_trends = fact_flight_silver_df \
    .join(dim_date_silver_df, "date_sk") \
    .join(dim_airline_silver_df, "airline_sk") \
    .groupBy("full_date", "airline") \
    .agg(
        F.count("*").alias("daily_flights"),
        (F.sum(F.when(F.col("departure_delay") <= 15, 1).otherwise(0)) / F.count("*") * 100).alias("daily_otp")
    ) \
    .withColumn("rolling_7day_otp", F.avg("daily_otp").over(window_spec)) \
    .orderBy("airline", "full_date")

In [None]:
otp_trends.show(5)

In [None]:
# Flight number performance
flight_number_performance = fact_flight_silver_df \
    .join(dim_airline_silver_df, "airline_sk") \
    .groupBy("airline", "flight_number") \
    .agg(
        F.count("*").alias("total_flights"),
        F.avg("departure_delay").alias("avg_departure_delay"),
        F.avg("arrival_delay").alias("avg_arrival_delay"),
        F.sum(F.when(F.col("cancelled") == 1, 1).otherwise(0)).alias("cancelled_flights"),
        F.avg("distance").alias("avg_distance"),
        F.countDistinct("tail_number").alias("aircraft_used")
    ) \
    .filter(F.col("total_flights") >= 50) \
    .orderBy("airline", F.col("avg_departure_delay").desc())

In [None]:
flight_number_performance.show(5)

In [None]:
# Distance vs delay correlation
distance_delay_analysis = fact_flight_silver_df \
    .withColumn("distance_category",
        F.when(F.col("distance") <= 500, "Short Haul (<500mi)")
         .when(F.col("distance") <= 1500, "Medium Haul (500-1500mi)")
         .otherwise("Long Haul (>1500mi)")) \
    .groupBy("distance_category") \
    .agg(
        F.count("*").alias("flight_count"),
        F.avg("distance").alias("avg_distance"),
        F.avg("departure_delay").alias("avg_departure_delay"),
        F.avg("arrival_delay").alias("avg_arrival_delay"),
        F.avg("air_time").alias("avg_air_time"),
        F.sum(F.when(F.col("cancelled") == 1, 1).otherwise(0)).alias("cancelled_flights")
    ) \
    .orderBy("avg_distance")

In [None]:
distance_delay_analysis.show(5)

In [None]:
# =============================================================================
# SAVE TO GOLD LAYER TABLES
# =============================================================================

In [None]:
aircraft_utilization.write.mode("overwrite").saveAsTable("unikargo_dev.03_gold.aircraft_utilization")
