# Top Airlines Over Time

This notebook calculates and exports stacked bar charts of:

- Top airlines by month
- Top airlines by year

To use it, set the top 'N' value and export format for charts in the first code cell before running the notebook.

In [None]:
# Top "N" airlines to include
top_n = 10

# Export format for the charts:
# PNG     - export as PNG image
# PDF     - export as PDF file
# <blank> - do not export
export_format = "PNG"

In [None]:
from pathlib import Path
import sqlparse

# Read the query file
query_file_path = Path("sql") / "sightings.sql"
with open(query_file_path.absolute(), "r") as f:
    query = f.read().replace("\n", " ")

# Show a pretty-printed form of the query
print(sqlparse.format(query, reindent=True, keyword_case='upper'))

In [None]:
import pandas as pd
import sqlite3
import os

# Connect to the database, execute the query and read the results into a dataframe
database_path = os.environ["FLIGHT_RECORDER_DB"]
connection = sqlite3.connect(database_path)
df = pd.read_sql_query(query, connection, parse_dates=["Date"])

# Check there is some data
if not df.shape[0]:
    message = f"No data found"
    raise ValueError(message)

In [None]:
import warnings

# Create Year and Month columns
df["Year"] = df["Date"].dt.year
df["Month"] = df["Date"].dt.to_period("M")

# Aggregate by year, determine the top 'N' airlines and filter so only they remain
airlines_by_year = df.groupby(["Year", "Airline"]).size().reset_index(name="Sightings")
top_n_airlines = airlines_by_year.groupby("Airline")["Sightings"].sum().nlargest(top_n).index
top_yearly = airlines_by_year[airlines_by_year["Airline"].isin(top_n_airlines)]

# Aggregate by month and filter to leave only the top 'N' airlines
airlines_by_month = df.groupby(["Month", "Airline"]).size().reset_index(name="Sightings")
top_monthly = airlines_by_month[airlines_by_month["Airline"].isin(top_n_airlines)]

# Convert 'month' Period to datetime for plotting
warnings.simplefilter(action='ignore', category=pd.errors.SettingWithCopyWarning)
top_monthly["Month"] = top_monthly["Month"].dt.to_timestamp()

# Pivot the yearly data to allow a stacked bar chart plot:
# rows = year, columns = airline
pivoted_yearly = top_yearly.pivot(index="Year", columns="Airline", values="Sightings").fillna(0)

# Pivot the monthly data to allow a stacked bar chart plot:
# rows = month, columns = airline, values = sightings
# Ensure sorting by month after pivoting
pivoted_monthly = top_monthly.pivot(index="Month", columns="Airline", values="Sightings").fillna(0)
pivoted_monthly = pivoted_monthly.sort_index()

In [None]:
import pandas as pd
from pathlib import Path

# Create the folder to hold exported reports
export_folder_path = Path("exported")
export_folder_path.mkdir(parents=True, exist_ok=True)

# Create a Pandas Excel writer
export_file_name = "Top-Airlines-Over-Time"
output_path = export_folder_path / f"{export_file_name}.xlsx"

with pd.ExcelWriter(output_path.absolute(), engine="openpyxl") as writer:
    # Daily data
    top_yearly.to_excel(writer, sheet_name="Yearly", index=False)
    
    # Monthly data
    top_monthly.to_excel(writer, sheet_name="Monthly", index=False)

In [None]:
import matplotlib.pyplot as plt
import seaborn as sns

# Create the stacked bar chart
plt.figure(figsize=(16, 7))
ax = pivoted_monthly.plot(
    kind="bar",
    stacked=True,
    figsize=(16, 7),
    width=0.8,
    colormap="tab20"
)

# De-clutter the X-axis labels by showing only every other one
xticks = ax.get_xticks()
xticklabels = [label.get_text() for label in ax.get_xticklabels()]
new_labels = [label if i % 2 == 0 else "" for i, label in enumerate(xticklabels)]
ax.set_xticklabels(new_labels, rotation=45)

# Add titles, labels and legends
plt.title("Top Airlines by Sightings Per Month (Stacked Bar Chart)")
plt.ylabel("Number of Sightings")
plt.xlabel("Month")
plt.xticks(rotation=90)
plt.legend(title="Airline", bbox_to_anchor=(1.05, 1), loc='upper left')
plt.tight_layout()

# Export to PNG
if export_format.casefold() == "png":
    export_file_path = export_folder_path / f"{export_file_name}-Monthly.png"
    plt.savefig(export_file_path.absolute(), format="png", dpi=300, bbox_inches="tight")

# Export to PDF
if export_format.casefold() == "pdf":
    export_file_path = export_folder_path / f"{export_file_name}-Monthly.pdf"
    plt.savefig(export_file_path.absolute(), format="pdf", bbox_inches="tight")

# Show the plot
plt.show()

In [None]:
plt.figure(figsize=(12, 6))
ax = pivoted_yearly.plot(
    kind="bar",
    stacked=True,
    figsize=(12, 6),
    colormap="tab20"
)

# Add titles, labels and legends
plt.title("Top Airlines by Sightings Per Year")
plt.ylabel("Number of Sightings")
plt.xlabel("Year")
plt.xticks(rotation=0)
plt.legend(title="Airline", bbox_to_anchor=(1.05, 1), loc='upper left')
plt.tight_layout()

# Export to PNG
if export_format.casefold() == "png":
    export_file_path = export_folder_path / f"{export_file_name}-Yearly.png"
    plt.savefig(export_file_path.absolute(), format="png", dpi=300, bbox_inches="tight")

# Export to PDF
if export_format.casefold() == "pdf":
    export_file_path = export_folder_path / f"{export_file_name}-Yearly.pdf"
    plt.savefig(export_file_path.absolute(), format="pdf", bbox_inches="tight")

plt.show()
