# Top Airlines Over Time

This notebook calculates and exports stacked bar charts of:

- Top airlines by month
- Top airlines by year

In [1]:
%run ../pathutils.ipynb
%run ../definitions.ipynb
%run ../database.ipynb
%run ../export.ipynb
%run utils.ipynb

In [2]:
# Top "N" airlines to include
top_n = 10

# Number of months to represent in the monthly chart
number_of_months = 12

In [3]:
import matplotlib.pyplot as plt
import seaborn as sns

def export_monthly_chart(export_folder_path, data, number_of_months):
    fig, ax = plt.subplots(figsize=(16, 7))

    data.plot(
        kind="bar",
        stacked=True,
        figsize=(12, 6),
        colormap="tab20",
        ax = ax
    )

    # Replace tick labels with formatted dates
    ax.set_xticklabels(
        [d.strftime("%Y-%m-%d") for d in data.index],
        rotation=0
    )

    # Add titles, labels and legends
    plt.title(f"Top Airlines by Sightings Per Month ({number_of_months} Months)")
    plt.ylabel("Number of Sightings")
    plt.xlabel("Month")
    plt.xticks(rotation=90)
    plt.legend(title="Airline", bbox_to_anchor=(1.05, 1), loc='upper left')
    plt.tight_layout()

    # Export the chart
    export_chart(export_folder_path, f"top-airlines-over-time-monthly", "png")

    # Close the plot
    plt.close(fig)

In [4]:
import matplotlib.pyplot as plt
import seaborn as sns

def export_yearly_chart(export_folder_path, data):
    fig, ax = plt.subplots(figsize=(12, 6))

    data.plot(
        kind="bar",
        stacked=True,
        figsize=(12, 6),
        colormap="tab20",
        ax = ax
    )

    # Add titles, labels and legends
    plt.title("Top Airlines by Sightings Per Year")
    plt.ylabel("Number of Sightings")
    plt.xlabel("Year")
    plt.xticks(rotation=0)
    plt.legend(title="Airline", bbox_to_anchor=(1.05, 1), loc='upper left')
    plt.tight_layout()

    # Export the chart
    export_chart(export_folder_path, f"top-airlines-over-time-yearly", "png")

    # Close the plot
    plt.close(fig)

In [5]:
import pandas as pd
import warnings

# Construct the query
query = construct_query("aircraft", "sightings.sql", {})

try:
    # Run the query to load the data - this raises a ValueError if there's no data
    df = query_data("aircraft", query)

    # Create Year and Month columns
    df["Year"] = df["Date"].dt.year
    df["Month"] = df["Date"].dt.to_period("M")

    # Aggregate by year, determine the top 'N' airlines and filter so only they remain
    airlines_by_year = df.groupby(["Year", "Airline"]).size().reset_index(name="Sightings")
    top_n_airlines = airlines_by_year.groupby("Airline")["Sightings"].sum().nlargest(top_n).index
    top_yearly = airlines_by_year[airlines_by_year["Airline"].isin(top_n_airlines)]

    # Aggregate by month and filter to leave only the top 'N' airlines
    airlines_by_month = df.groupby(["Month", "Airline"]).size().reset_index(name="Sightings")
    top_monthly = airlines_by_month[airlines_by_month["Airline"].isin(top_n_airlines)]

    # Convert 'month' Period to datetime for plotting
    warnings.simplefilter(action='ignore', category=pd.errors.SettingWithCopyWarning)
    top_monthly["Month"] = top_monthly["Month"].dt.to_timestamp()

    # Pivot the yearly data to allow a stacked bar chart plot:
    # rows = year, columns = airline
    pivoted_yearly = top_yearly.pivot(index="Year", columns="Airline", values="Sightings").fillna(0)

    # Pivot the monthly data to allow a stacked bar chart plot:
    # rows = month, columns = airline, values = sightings
    # Ensure sorting by month after pivoting
    pivoted_monthly = top_monthly.pivot(index="Month", columns="Airline", values="Sightings").fillna(0)
    pivoted_monthly = pivoted_monthly.sort_index()

    # Export the data to excel
    export_folder_path = get_export_folder_path()
    export_to_spreadsheet(export_folder_path, "top-airlines-over-time-monthly.xlsx", {
        "Monthly": top_monthly
    })
    export_to_spreadsheet(export_folder_path, "top-airlines-over-time-yearly.xlsx", {
        "Yearly": top_yearly,
    })

    # Create a copy of the monthly counts that only includes the last "N" months, for charting
    pivoted_monthly.index = pd.to_datetime(pivoted_monthly.index)
    current_month = pd.Timestamp.today().to_period("M").to_timestamp()
    start_month = current_month - pd.DateOffset(months=number_of_months-1)
    pivoted_monthly_last_n_months = pivoted_monthly.loc[start_month:current_month].copy()

    # Export the charts
    export_monthly_chart(export_folder_path, pivoted_monthly_last_n_months, number_of_months)
    export_yearly_chart(export_folder_path, pivoted_yearly)

except ValueError:
    print(f"WARNING: No data found")