In [0]:
# Load KPI-5 base dataset prepared in previous step
from pyspark.sql.functions import col

df_base = spark.table("kpi5_airline_base")

df_base.show(5)


+----------+---------+---------+--------+-------+-----+-----------+--------------------+----------------+----------------+---------------+--------------+
|op_carrier|arr_delay|dep_delay|taxi_out|taxi_in|month|day_of_week|        airline_name|arr_delay_capped|dep_delay_capped|taxi_out_capped|taxi_in_capped|
+----------+---------+---------+--------+-------+-----+-----------+--------------------+----------------+----------------+---------------+--------------+
|        AA|      3.0|      4.0|    29.0|   15.0|    1|          3|   American Airlines|             3.0|             4.0|           29.0|          15.0|
|        AS|     -7.0|    -10.0|    12.0|   11.0|    1|          3|     Alaska Airlines|             0.0|             0.0|           12.0|          11.0|
|        DL|     14.0|      0.0|    56.0|    5.0|    1|          3|     Delta Air Lines|            14.0|             0.0|           56.0|           5.0|
|        EV|    -16.0|     -6.0|    11.0|    6.0|    1|          3| ExpressJ

In [0]:
# Create on-time arrival flag (industry standard: delay â‰¤ 15 minutes)
from pyspark.sql.functions import when

df_ontime = df_base.withColumn(
    "is_arr_ontime",
    when(col("arr_delay_capped") <= 15, 1).otherwise(0)
)

df_ontime.select("op_carrier", "arr_delay_capped", "is_arr_ontime").show(5)


+----------+----------------+-------------+
|op_carrier|arr_delay_capped|is_arr_ontime|
+----------+----------------+-------------+
|        AA|             3.0|            1|
|        AS|             0.0|            1|
|        DL|            14.0|            1|
|        EV|             0.0|            1|
|        MQ|             6.0|            1|
+----------+----------------+-------------+
only showing top 5 rows


In [0]:
# Aggregate arrival punctuality metrics at airline level
from pyspark.sql.functions import count, sum, avg

df_kpi5a = df_ontime.groupBy(
    "op_carrier",
    "airline_name"
).agg(
    count("*").alias("total_flights"),
    sum("is_arr_ontime").alias("on_time_arrivals"),
    avg("arr_delay_capped").alias("avg_arrival_delay")
)

df_kpi5a.show(5)


+----------+--------------------+-------------+----------------+-----------------+
|op_carrier|        airline_name|total_flights|on_time_arrivals|avg_arrival_delay|
+----------+--------------------+-------------+----------------+-----------------+
|        AA|   American Airlines|       534592|          477702|7.035722569735424|
|        AS|     Alaska Airlines|       132217|          117910|6.280016941845602|
|        DL|     Delta Air Lines|       551516|          504037|5.841536782251104|
|        EV| ExpressJet Airlines|        47981|           42874|6.969175298555678|
|        MQ|Envoy Air (Americ...|       197193|          172180|7.940119578281176|
+----------+--------------------+-------------+----------------+-----------------+
only showing top 5 rows


In [0]:
# Calculate on-time arrival percentage
df_kpi5a = df_kpi5a.withColumn(
    "arr_ontime_pct",
    (col("on_time_arrivals") / col("total_flights")) * 100
)

df_kpi5a.select(
    "airline_name",
    "total_flights",
    "arr_ontime_pct"
).show(5)


+--------------------+-------------+-----------------+
|        airline_name|total_flights|   arr_ontime_pct|
+--------------------+-------------+-----------------+
|   American Airlines|       534592|89.35823955465102|
|     Alaska Airlines|       132217|89.17915245392045|
|     Delta Air Lines|       551516|91.39118357400329|
| ExpressJet Airlines|        47981|89.35620349721765|
|Envoy Air (Americ...|       197193|87.31547265876578|
+--------------------+-------------+-----------------+
only showing top 5 rows


In [0]:
# Filter airlines with sufficient number of flights (data reliability)
df_kpi5a = df_kpi5a.filter(col("total_flights") >= 500)

df_kpi5a.show(5)


+----------+--------------------+-------------+----------------+-----------------+-----------------+
|op_carrier|        airline_name|total_flights|on_time_arrivals|avg_arrival_delay|   arr_ontime_pct|
+----------+--------------------+-------------+----------------+-----------------+-----------------+
|        AA|   American Airlines|       534592|          477702|7.035722569735424|89.35823955465102|
|        AS|     Alaska Airlines|       132217|          117910|6.280016941845602|89.17915245392045|
|        DL|     Delta Air Lines|       551516|          504037|5.841536782251104|91.39118357400329|
|        EV| ExpressJet Airlines|        47981|           42874|6.969175298555678|89.35620349721765|
|        MQ|Envoy Air (Americ...|       197193|          172180|7.940119578281176|87.31547265876578|
+----------+--------------------+-------------+----------------+-----------------+-----------------+
only showing top 5 rows


In [0]:
# Save KPI-5A airline on-time arrival performance table
df_kpi5a.write \
    .mode("overwrite") \
    .saveAsTable("kpi5a_airline_ontime_arrival")

# Verify saved table
spark.table("kpi5a_airline_ontime_arrival").show(5)


+----------+--------------------+-------------+----------------+-----------------+-----------------+
|op_carrier|        airline_name|total_flights|on_time_arrivals|avg_arrival_delay|   arr_ontime_pct|
+----------+--------------------+-------------+----------------+-----------------+-----------------+
|        B6|     JetBlue Airways|       135869|          119116|8.265652945116251|87.66974070612133|
|        AS|     Alaska Airlines|       132217|          117910|6.280016941845602|89.17915245392045|
|        MQ|Envoy Air (Americ...|       197193|          172180|7.940119578281176|87.31547265876578|
|        9E|Endeavor Air (Del...|       204215|          189041|5.462492960850084|92.56959576916485|
|        F9|   Frontier Airlines|        86208|           76653|7.256878711952487|88.91634187082406|
+----------+--------------------+-------------+----------------+-----------------+-----------------+
only showing top 5 rows


In [0]:
# Top 5 on time airline
df_kpi5a.orderBy(col("arr_ontime_pct").desc()).show(5)

+----------+--------------------+-------------+----------------+------------------+-----------------+
|op_carrier|        airline_name|total_flights|on_time_arrivals| avg_arrival_delay|   arr_ontime_pct|
+----------+--------------------+-------------+----------------+------------------+-----------------+
|        WN|  Southwest Airlines|       882411|          829322|3.3645149482497385|93.98364254298734|
|        HA|   Hawaiian Airlines|        38188|           35702| 3.875955797632764|93.49010160259768|
|        9E|Endeavor Air (Del...|       204215|          189041| 5.462492960850084|92.56959576916485|
|        YX|    Republic Airways|       204549|          187850| 5.486978670147495|91.83618595055464|
|        DL|     Delta Air Lines|       551516|          504037| 5.841536782251104|91.39118357400329|
+----------+--------------------+-------------+----------------+------------------+-----------------+
only showing top 5 rows
