# Airline / Aircraft Type Heatmap

This notebook generates a heatmap of sightings by aircraft type by airline for airlines meeting a minimum threshold for number of sightings.

Individual models within a family of aircraft are grouped:

- e.g. Both A320-214 and A320-232 will appear under the "A320" family
- Aircraft with fewer sightings than the configured threshold are assigned to the "Other" family

In [1]:
%run ../pathutils.ipynb
%run ../definitions.ipynb
%run ../database.ipynb
%run ../export.ipynb
%run utils.ipynb

In [2]:
# Sightings threshold below which aircraft are assigned to a generic "Other" group
prefix_count_threshold = 100

# Sightings threshold below which airlines are excluded from the results
sightings_threshold = 50

In [3]:
import matplotlib.pyplot as plt
import seaborn as sns

def export_heatmap(export_folder_path, data):
    # Create the heatmap
    fig = plt.figure(figsize=(12, 8))
    sns.heatmap(data, annot=False, fmt="d", cmap="YlOrRd")

    # Set chart properties
    plt.title("Aircraft Type Sightings by Airline")
    plt.ylabel("Airline")
    plt.xlabel("Type")
    plt.tight_layout()

    # Export the chart
    export_chart(export_folder_path, "airline-aircraft-type-heatmap", "png")

    # Close the figure
    plt.close(fig)


In [4]:
# Construct the query
query = construct_query("aircraft", "sightings.sql", {})

try:
    # Run the query to load the data - this raises a ValueError if there's no data
    df = query_data("aircraft", query)

    # Extract an aircraft type prefix for each sighting
    df["Type"] = df["Model"].apply(extract_prefix)

    # Identify counts by prefix and use them to identify common prefixes
    prefix_counts = df["Type"].value_counts()
    common_prefixes = prefix_counts[prefix_counts >= prefix_count_threshold].index

    # Replace uncommon prefixes with "Other"
    df["Type"] = df.apply(
        lambda row: row["Type"] if row["Type"] in common_prefixes else "Other",
        axis=1
    )

    # Pivot the data to create the results for the heatmap
    pivot_table = df.pivot_table(
        index="Airline",
        columns="Type",
        values="Model",
        aggfunc="count",
        fill_value=0
    )

    # Sort airlines with most aircraft sightings
    pivot_table["Total"] = pivot_table.sum(axis=1)
    pivot_table = pivot_table.sort_values(by="Total", ascending=False)

    # Remove airlines that fall below the sightings threshold
    row_indices = pivot_table[pivot_table["Total"] < sightings_threshold].index
    pivot_table.drop(row_indices, inplace = True)

    # Export the data to excel
    export_folder_path = get_export_folder_path()
    export_to_spreadsheet(export_folder_path, "airline-aircraft-type-heatmap.xlsx", { "Monthly": pivot_table })

    # Export the heatmap
    export_heatmap(export_folder_path, pivot_table)

except ValueError:
    print(f"WARNING: No data found")