This notebook visualises flight data from parquet files to validate the unique flight id assignment scripts.

In [8]:
from __future__ import annotations

from pathlib import Path

import matplotlib.pyplot as plt
import pandas as pd
from mpl_toolkits.basemap import Basemap


In [9]:
location_of_flight_parquet_files = "/mnt/c/Users/as3091/Documents/sample_aircraft/sample_aircraft"

# Get all parquet files in the directory
parquet_files = [f for f in Path(location_of_flight_parquet_files).iterdir() if f.suffix == '.parquet']
print(f"Found {len(parquet_files)} parquet files")

Found 50 parquet files


In [10]:
# import airport data for plotting
airport_data = pd.read_parquet("../airport_data/airports.parquet")

In [None]:
def calculate_departure_arrival_airports_completion_percentage(flight_data: pd.DataFrame, flight_ids: list[str]) -> float:
    """Calculate percentage of flights with complete departure and arrival airport information.

    Args:
        flight_data (pd.DataFrame): DataFrame containing flight data.
        flight_ids (list[str]): List of unique flight IDs.

    Returns:
        float: Percentage of flights with complete departure and arrival airport information.
    """
    number_of_flights = len(flight_ids)
    if number_of_flights == 0:
        return "N/A"
    number_of_complete_departure_arrival_pairs = 0
    for flight_id in flight_ids:
        flight = flight_data[flight_data['flight_id'] == flight_id]
        departure_airport = flight['departure_airport_icao'].dropna().iloc[0] if len(flight['departure_airport_icao'].dropna()) > 0 else None
        arrival_airport = flight['arrival_airport_icao'].dropna().iloc[0] if len(flight['arrival_airport_icao'].dropna()) > 0 else None
        boolean_complete = departure_airport is not None and arrival_airport is not None
        number_of_complete_departure_arrival_pairs += int(boolean_complete)

    return (number_of_complete_departure_arrival_pairs / number_of_flights) * 100

def plot_flight_paths(flight_data: pd.DataFrame, flight_ids: list[str], airport_data: pd.DataFrame,) -> None:
    """Plot flight paths from a parquet file on a map with airport markers.

    Args:
        flight_data (pd.DataFrame): DataFrame containing flight data.
        flight_ids (list[str]): List of unique flight IDs.
        airport_data (pd.DataFrame): DataFrame containing airport data.
    """
    plt.figure(figsize=(14, 10))

    # Calculate bounds for all flights and airports
    lat_min = flight_data['latitude'].min() - 1.0
    lat_max = flight_data['latitude'].max() + 1.0
    lon_min = flight_data['longitude'].min() - 1.0
    lon_max = flight_data['longitude'].max() + 1.0

    # Create basemap
    m = Basemap(projection='merc', llcrnrlat=lat_min, urcrnrlat=lat_max,
                llcrnrlon=lon_min, urcrnrlon=lon_max, resolution='i')
    m.drawcoastlines()
    m.drawcountries()
    m.drawmapboundary()
    m.fillcontinents()

     # Plot each flight with a different color
    colors = plt.colormaps['tab20'].resampled(len(flight_ids)).colors

    for idx, flight_id in enumerate(flight_ids):
        flight = flight_data[flight_data["flight_id"] == flight_id]

        # Convert coordinates
        x, y = m(flight['longitude'].to_numpy(), flight['latitude'].to_numpy())

        # Plot the flight path
        m.plot(x, y, marker=None, color=colors[idx], linewidth=2, label=f"Flight {flight_id[-4:]}")

        # Mark first timestep (green triangle)
        m.plot(x[0], y[0], marker='^', color='green', markersize=5, markeredgecolor='black', markeredgewidth=1)

        # Mark last timestep (red square)
        m.plot(x[-1], y[-1], marker='s', color='red', markersize=5, markeredgecolor='black', markeredgewidth=1)

        # Get origin and destination ICAO codes - find first non-null value if exists
        origin_icao = None
        dest_icao = None

        if 'departure_airport_icao' in flight.columns:
            origin_values = flight['departure_airport_icao'].dropna()
            origin_icao = origin_values.iloc[0] if len(origin_values) > 0 else None

        if 'arrival_airport_icao' in flight.columns:
            dest_values = flight['arrival_airport_icao'].dropna()
            dest_icao = dest_values.iloc[0] if len(dest_values) > 0 else None

        # Plot origin airport (skip if None)
        if origin_icao and pd.notna(origin_icao) and origin_icao in airport_data['icao'].to_numpy():
            airport = airport_data[airport_data['icao'] == origin_icao].iloc[0]
            ax, ay = m(airport['lon'], airport['lat'])
            m.plot(ax, ay, marker='x', color=colors[idx], markersize=15, markeredgewidth=2, alpha=0.8)

        # Plot destination airport (skip if None)
        if dest_icao and pd.notna(dest_icao) and dest_icao in airport_data['icao'].to_numpy():
            airport = airport_data[airport_data['icao'] == dest_icao].iloc[0]
            ax, ay = m(airport['lon'], airport['lat'])
            m.plot(ax, ay, marker='x', color=colors[idx], markersize=15, markeredgewidth=2, alpha=0.8)


In [12]:
# Create output directory if it doesn't exist
output_dir = "flight_sample_plots"
Path(output_dir).mkdir(parents=True, exist_ok=True)

# Process and plot each aircraft file
for parquet_file in parquet_files:
    print(f"\nProcessing {parquet_file}...")

    # Read flight data
    flight_data = pd.read_parquet(f"{parquet_file}")
    flight_ids = flight_data['flight_id'].unique()
    aircraft_icao = parquet_file.stem.replace('aircraft_', '')

    # Check arrival and departure airports for each flight
    percentage_complete = calculate_departure_arrival_airports_completion_percentage(flight_data, flight_ids)

    # Plot all flight paths on a single map with different colors
    plot_flight_paths(flight_data, flight_ids, airport_data)

    # Add legend with statistics
    plt.legend(loc='upper left', fontsize=10, framealpha=0.9)
    plt.title(f"Flight Paths for {aircraft_icao} with Start (▲), End (■), and Airports (x)")
    plt.suptitle(f"Complete Airport Data: {percentage_complete:.2f}%", y=0.92, fontsize=10)

    # Save the figure
    output_path = Path(output_dir) / f"{aircraft_icao}.png"
    plt.savefig(output_path, dpi=150, bbox_inches='tight')
    print(f"  Saved plot to {output_path}")
    plt.close()



Processing /mnt/c/Users/as3091/Documents/sample_aircraft/sample_aircraft/aircraft_0AC390.parquet...
  Saved plot to flight_sample_plots/0AC390.png

Processing /mnt/c/Users/as3091/Documents/sample_aircraft/sample_aircraft/aircraft_155C38.parquet...
  Saved plot to flight_sample_plots/155C38.png

Processing /mnt/c/Users/as3091/Documents/sample_aircraft/sample_aircraft/aircraft_34224F.parquet...
  Saved plot to flight_sample_plots/34224F.png

Processing /mnt/c/Users/as3091/Documents/sample_aircraft/sample_aircraft/aircraft_3455D8.parquet...
  Saved plot to flight_sample_plots/3455D8.png

Processing /mnt/c/Users/as3091/Documents/sample_aircraft/sample_aircraft/aircraft_346605.parquet...
  Saved plot to flight_sample_plots/346605.png

Processing /mnt/c/Users/as3091/Documents/sample_aircraft/sample_aircraft/aircraft_3C5EE4.parquet...
  Saved plot to flight_sample_plots/3C5EE4.png

Processing /mnt/c/Users/as3091/Documents/sample_aircraft/sample_aircraft/aircraft_4853D1.parquet...
  Saved plo