In [None]:
import pandas as pd
import numpy as np
import plotly.express as px
import plotly.graph_objects as go


In [None]:
DATA_PATH = "/content/processed_flight_records.csv"

df = pd.read_csv(DATA_PATH)

# Identify columns
date_cols = [c for c in df.columns if 'date' in c.lower() or 'time' in c.lower()]
status_cols = [c for c in df.columns if 'status' in c.lower()]
delay_cols = [c for c in df.columns if 'delay' in c.lower()]
airline_cols = [c for c in df.columns if 'airline' in c.lower() or 'icao' in c.lower()]
origin_cols = [c for c in df.columns if 'origin' in c.lower()]
dest_cols = [c for c in df.columns if 'dest' in c.lower() or 'destination' in c.lower()]

# Assign columns
date_col = date_cols[0]
status_col = status_cols[0] if status_cols else None
delay_col = delay_cols[0] if delay_cols else None
airline_col = airline_cols[0] if airline_cols else None
origin_col = origin_cols[0] if origin_cols else None
dest_col = dest_cols[0] if dest_cols else None

# Parse datetime
df[date_col] = pd.to_datetime(df[date_col])

In [None]:
if status_col:
    df['is_delayed'] = df[status_col].astype(str).str.lower().str.contains('delay')
    df['is_cancelled'] = df[status_col].astype(str).str.lower().str.contains('cancel')
else:
    df['is_delayed'] = False
    df['is_cancelled'] = False

# Time features
df['month'] = df[date_col].dt.to_period('M').astype(str)
df['year'] = df[date_col].dt.year

In [None]:
kpi_summary = {
    'Total Flights': len(df),
    'Delay Rate (%)': round(df['is_delayed'].mean() * 100, 2),
    'Cancellation Rate (%)': round(df['is_cancelled'].mean() * 100, 2),
}

kpi_summary

In [None]:
monthly_kpis = df.groupby('month').agg(
    total_flights=('is_delayed', 'count'),
    delay_rate=('is_delayed', 'mean'),
    cancellation_rate=('is_cancelled', 'mean')
).reset_index()

monthly_kpis['delay_rate'] *= 100
monthly_kpis['cancellation_rate'] *= 100

fig = px.line(
    monthly_kpis,
    x='month',
    y=['delay_rate', 'cancellation_rate'],
    title='Monthly Delay and Cancellation Rates'
)
fig.show()

In [None]:
if airline_col:
    airline_kpis = df.groupby(airline_col).agg(
        total_flights=('is_delayed', 'count'),
        delay_rate=('is_delayed', 'mean'),
        cancellation_rate=('is_cancelled', 'mean')
    ).reset_index()

    airline_kpis['delay_rate'] *= 100
    airline_kpis['cancellation_rate'] *= 100

    fig = px.bar(
        airline_kpis.sort_values('delay_rate', ascending=False),
        x=airline_col,
        y='delay_rate',
        title='Delay Rate by Airline'
    )
    fig.show()

In [None]:
if origin_col and dest_col:
    route_kpis = df.groupby([origin_col, dest_col]).agg(
        flights=('is_delayed', 'count'),
        delay_rate=('is_delayed', 'mean')
    ).reset_index()

    route_kpis['delay_rate'] *= 100

    fig = px.scatter(
        route_kpis,
        x='flights',
        y='delay_rate',
        hover_data=[origin_col, dest_col],
        title='Route Volume vs Delay Rate'
    )
    fig.show()


In [None]:
if delay_col:
    fig = px.histogram(
        df,
        x=delay_col,
        nbins=50,
        title='Delay Duration Distribution'
    )
    fig.show()

In [None]:
# Example: Airline-specific monthly trend
if airline_col:
    airline_example = df[airline_col].unique()[0]
    filtered = df[df[airline_col] == airline_example]

    airline_monthly = filtered.groupby('month').agg(
        delay_rate=('is_delayed', 'mean')
    ).reset_index()
    airline_monthly['delay_rate'] *= 100

    fig = px.line(
        airline_monthly,
        x='month',
        y='delay_rate',
        title=f'Monthly Delay Rate â€” {airline_example}'
    )
    fig.show()


In [None]:
print("Operational dashboards generated. For production use, migrate visuals to Streamlit or Dash.")