In [14]:
import pandas
import plotly.graph_objects as go

In [7]:
filepath = "../datasets/SEA_DFW_2023.csv"
df = pandas.read_csv(filepath, skiprows=8, skipfooter=1, engine="python")

In [26]:
def mark_invalid(df):
    is_flight_invalid = df["Airborne Time (Minutes)"] == 0
    df["valid"] = ~is_flight_invalid
    return df

df = mark_invalid(df)

In [34]:
mean_flight_times_mean_per_day = df.loc[df["valid"]].groupby("Date (MM/DD/YYYY)")["Airborne Time (Minutes)"].mean()
scatter = go.Scatter(
    x=mean_flight_times_mean_per_day.index,
    y=mean_flight_times_mean_per_day.values,
    mode="lines+markers",
    name="Mean flight time per day",
    marker={"symbol":"x"}
)
fig = go.Figure(data=[scatter])
fig.update_layout(
    title="Mean flight times 2023 (SEA->DFW)",
    xaxis_title="Date",
    yaxis_title="Mean flight time (minutes)",
    width=1200,
    height=500,
    hovermode="x unified",
)
fig.show()

In [33]:
num_invalid_flights = df.groupby("Date (MM/DD/YYYY)")["valid"].apply(lambda x: (~x).sum())
# num_invalid_flights = df.loc[~df["valid"]].groupby("Date (MM/DD/YYYY)")["valid"].size()  # This removes all days (most days!) which have no invalid flights.
scatter = go.Scatter(
    x=mean_flight_times_mean_per_day.index,
    y=num_invalid_flights,
    mode="lines+markers",
    name="Number of invalid flights per day",
    marker={"symbol":"x"}
)
fig = go.Figure(data=[scatter])
fig.update_layout(
    title="Number of invalid flights (SEA->DFW)",
    xaxis_title="Date",
    yaxis_title="Number of invalid flights",
    width=1200,
    height=500,
    hovermode="x unified",
)
fig.show()