## Setup

### Packages

In [75]:
import numpy as np
import pandas as pd
import os
from plotnine import *
import matplotlib.pyplot as plt
import seaborn as sns
from matplotlib.colors import LinearSegmentedColormap, TwoSlopeNorm

### Paths, Colormaps

In [76]:
departures_path = r'D:\Summer Practicum\1_Data\heatmaps\departures'
arrivals_path = r'D:\Summer Practicum\1_Data\heatmaps\arrivals'

# Custom colormap
colors = ["#FFFFFF", "#DEEEF6", "#E19B8C", "#CB5017", "#9C2C19"]
custom_cmap = LinearSegmentedColormap.from_list("custom_red_blue", colors, N=256)
vmin = 0        # Minimum delay value
vmax = 180      # Maximum delay value


### Helper Functions, Dicts

In [77]:
def simplify_timeblock(block):
    try:
        start_str, end_str = block.split('-')
        start_hour = int(start_str[:2])
        end_hour = int(end_str[:2]) + 1  # inclusive hour range

        def h12(h):
            suffix = "AM" if h < 12 or h == 24 else "PM"
            h = h % 12
            return (12 if h == 0 else h), suffix

        sh, suffix1 = h12(start_hour)
        eh, suffix2 = h12(end_hour % 24)

        # If both ends are same period (AM/PM)
        if suffix1 == suffix2:
            return f"{sh}–{eh} {suffix1}"
        else:
            return f"{sh} {suffix1}–{eh} {suffix2}"
    except Exception:
        return block

In [78]:
airline_names = {
    "AA": "American Airlines",
    "AS": "Alaska Airlines",
    "B6": "JetBlue Airways",
    "DL": "Delta Air Lines",
    "F9": "Frontier Airlines",
    "G4": "Allegiant Air",
    "HA": "Hawaiian Airlines",
    "NK": "Spirit Airlines",
    "UA": "United Airlines",
    "WN": "Southwest Airlines"
}

## Departures

### Setup

In [79]:
# Combine all departure timeblock delays into a single dataframe
departure_files = []
for filename in os.listdir(departures_path):
    if filename.endswith('_delay_data.csv'):
        file_path = os.path.join(departures_path, filename)
        df = pd.read_csv(file_path, index_col=0)
        departure_files.append(df)

# Combine into a single dataframe and get the overall mean delay to compare later
dep_df = pd.concat(departure_files)
avg_dep_delay = int(pd.to_numeric(dep_df["DelayAvg"], errors='coerce').mean())

In [80]:
# Color grading: center of color gradient represents Overall Average
vcenter = avg_dep_delay
divnorm = TwoSlopeNorm(vmin=vmin, vcenter=vcenter, vmax=vmax)

### Heatmap

In [81]:
# Generate heatmap per airline
for filename in os.listdir(departures_path):
    if filename.endswith('_delay_data.csv'):
        airline = filename.split('_')[0]
        airline_label = airline_names[airline]
        file_path = os.path.join(departures_path, filename)
        al_df = pd.read_csv(file_path, index_col=0)

    # Pivot for heatmaps and sort columns and rows
    pivot_df = pd.pivot(data=al_df, index='DepTimeBlk', columns='DayOfWeek', values='DelayAvg')
    pivot_df = pivot_df.reindex(columns=['Mon', 'Tue', 'Wed', 'Thu', 'Fri', 'Sat', 'Sun'])
    pivot_df = pivot_df.sort_index()
    pivot_df.index = pivot_df.index.map(simplify_timeblock)


    # calculate the mean airline's delay to compare later
    avg_airline_delay = int(round(pd.to_numeric(al_df["DelayAvg"], errors='coerce').mean(), 0))

    # Generate Plot
    plt.figure(figsize=(10, 8))
    ax = sns.heatmap(
        pivot_df, 
        cmap=custom_cmap, 
        norm=divnorm, 
        linewidths=0.3, 
        annot=False
        )
    plt.title(f"Average Departure Delays for {airline_label}")
    plt.xlabel("Day of Week")
    plt.ylabel("Departure Time Block")


    # Add tick at the overall weekly average
    cbar = ax.collections[0].colorbar
    cbar.ax.axhline(avg_dep_delay, color='black', linestyle='-', linewidth=2)
    cbar.ax.text(2.5, avg_dep_delay, f'All Avg: {avg_dep_delay} min',
                color='black', va='center', ha='left', fontsize=9, transform=cbar.ax.transData)
    
    # Add tick at the airline average
    cbar = ax.collections[0].colorbar
    cbar.ax.axhline(avg_airline_delay, color='black', linestyle='-', linewidth=2)
    cbar.ax.text(2.5, avg_airline_delay, f'Airline Avg: {avg_airline_delay} min',
                color='black', va='center', ha='left', fontsize=9, transform=cbar.ax.transData)
    
    hmfilename = f"D:/Summer Practicum/3_Exports/heatmaps/departures/{airline}_dep_heatmap.png"
    if os.path.exists(hmfilename):
        os.remove(hmfilename)  # Force overwrite
    plt.savefig(hmfilename, dpi=300)
    plt.close()

## Arrivals

### Setup

In [82]:
# Combine all departure timeblock delays into a single dataframe
arrival_files = []
for filename in os.listdir(arrivals_path):
    if filename.endswith('_delay_data.csv'):
        file_path = os.path.join(arrivals_path, filename)
        df = pd.read_csv(file_path, index_col=0)
        arrival_files.append(df)

# Combine into a single dataframe and get the overall mean delay to compare later
arr_df = pd.concat(arrival_files)
avg_arr_delay = int(pd.to_numeric(arr_df["DelayAvg"], errors='coerce').mean())

In [83]:
# Color grading: center of color gradient represents Overall Average,
vcenter = avg_arr_delay
divnorm = TwoSlopeNorm(vmin=vmin, vcenter=vcenter, vmax=vmax)

### Heatmap

In [84]:
# Generate heatmap per airline
for filename in os.listdir(arrivals_path):
    if filename.endswith('_delay_data.csv'):
        airline = filename.split('_')[0]
        airline_label = airline_names[airline]
        file_path = os.path.join(arrivals_path, filename)
        al_df = pd.read_csv(file_path, index_col=0)
    
    # Pivot for heatmaps and sort columns and rows
    pivot_df = pd.pivot(data=al_df, index='ArrTimeBlk', columns='DayOfWeek', values='DelayAvg')
    pivot_df = pivot_df.reindex(columns=['Mon', 'Tue', 'Wed', 'Thu', 'Fri', 'Sat', 'Sun'])
    pivot_df = pivot_df.sort_index()
    pivot_df.index = pivot_df.index.map(simplify_timeblock)


    # Calculate the mean airline's delay to compare later
    avg_airline_delay = int(round(pd.to_numeric(al_df["DelayAvg"], errors='coerce').mean(), 0))

    # Generate plot
    plt.figure(figsize=(10, 8))
    ax = sns.heatmap(
        pivot_df, 
        cmap=custom_cmap, 
        norm=divnorm, 
        linewidths=0.3, 
        annot=False
    )
    plt.title(f"Average Arrival Delays for {airline_label}")
    plt.xlabel("Day of Week")
    plt.ylabel("Arrival Time Block")
    
    # Add tick at the overall weekly average
    cbar = ax.collections[0].colorbar
    cbar.ax.axhline(avg_arr_delay, color='black', linestyle='-', linewidth=2)
    cbar.ax.text(2.5, avg_arr_delay, f'All Avg: {avg_arr_delay} min',
                color='black', va='center', ha='left', fontsize=9, transform=cbar.ax.transData)
    
    # Add tick at the airline average
    cbar = ax.collections[0].colorbar
    cbar.ax.axhline(avg_airline_delay, color='black', linestyle='-', linewidth=2)
    cbar.ax.text(2.5, avg_airline_delay, f'Airline Avg: {avg_airline_delay} min',
    color='black', va='center', ha='left', fontsize=9, transform=cbar.ax.transData)

    # Save plot
    hmfilename = f"D:/Summer Practicum/3_Exports/heatmaps/arrivals/{airline}_arr_heatmap.png"
    if os.path.exists(hmfilename):
        os.remove(hmfilename)  # Force overwrite
    plt.savefig(hmfilename, dpi=300)
    plt.close()
