In [1]:
import polars as pl


from excel_manager import (
    get_df,
    get_count_df,
    COL_NAME_TOTAL_COUNT,
    COL_NAME_WINDOW_TIME,
    COL_NAME_DEPARTURE_DATETIME,
)

Loading config from: C:\Users\tajdi\OneDrive\Documents\GitHub\Stage_Ram_PFA_Data_Visualisation\config.toml
Initializing server instance...
Loading config from: C:\Users\tajdi\AppData\Local\StagePFA\DashboardApp\config_data.toml


In [2]:
COL_NAME_TOTAL_COUNT_FLIGHT_WITH_DELAY = "flight_with_delay"
COL_NAME_TOTAL_COUNT_FLIGHT_WITH_DELAY_GTE_15MIN = "flight_with_delay_gte_15min"
COL_NAME_TOTAL_COUNT_FLIGHT_WITH_DELAY_41_42_GTE_15MIN = (
    "flight_with_delay_gte_15min_code_41_42"
)
COL_NAME_PER_DELAYED_FLIGHTS_15MIN_NOT_WITH_41_42 = (
    "per_delayed_flights_15min__not_with_41_42"
)
COL_NAME_PER_DELAYED_FLIGHTS_NOT_WITH_15MIN = "per_delayed_flights_not_with_15min"
COL_NAME_PER_FLIGHTS_NOT_DELAYED = "per_flights_not_delayed"

In [3]:
df = get_df()

In [4]:
window_str = "3w"

In [5]:
total_df = get_count_df(window_str)

In [6]:
total_df.sort("WINDOW_DATETIME_DEP").collect()

WINDOW_DATETIME_DEP,total_count
datetime[μs],u32
2025-03-17 00:00:00,1590
2025-04-07 00:00:00,4156
2025-04-28 00:00:00,4049
2025-05-19 00:00:00,4032
2025-06-09 00:00:00,3190


In [7]:
windowed_df = df.with_columns(
    pl.col(COL_NAME_DEPARTURE_DATETIME)
    .dt.truncate(window_str)
    .alias(COL_NAME_WINDOW_TIME)
)
delayed_flights_count_df = windowed_df.group_by(COL_NAME_WINDOW_TIME).agg(
    pl.len().alias(COL_NAME_TOTAL_COUNT_FLIGHT_WITH_DELAY)
)

In [8]:
delayed_flights_count_df.sort("WINDOW_DATETIME_DEP").collect()

WINDOW_DATETIME_DEP,flight_with_delay
datetime[μs],u32
2025-03-17 00:00:00,30
2025-04-07 00:00:00,89
2025-04-28 00:00:00,79
2025-05-19 00:00:00,97
2025-06-09 00:00:00,70


In [9]:
delayed_15min_df = windowed_df.filter((pl.col("Retard en min") >= 15))
delayed_15min_count_df = delayed_15min_df.group_by(COL_NAME_WINDOW_TIME).agg(
    pl.len().alias(COL_NAME_TOTAL_COUNT_FLIGHT_WITH_DELAY_GTE_15MIN)
)

In [10]:
delayed_15min_count_df.sort("WINDOW_DATETIME_DEP").collect()

WINDOW_DATETIME_DEP,flight_with_delay_gte_15min
datetime[μs],u32
2025-03-17 00:00:00,22
2025-04-07 00:00:00,73
2025-04-28 00:00:00,69
2025-05-19 00:00:00,79
2025-06-09 00:00:00,51


In [11]:
delayed_flights_41_42_gte_15min_count_df = (
    delayed_15min_df.filter(pl.col("CODE_DR").is_in({41, 42}))
    .group_by(COL_NAME_WINDOW_TIME)
    .agg(pl.len().alias(COL_NAME_TOTAL_COUNT_FLIGHT_WITH_DELAY_41_42_GTE_15MIN))
)

In [12]:
delayed_flights_41_42_gte_15min_count_df.sort("WINDOW_DATETIME_DEP").collect()

WINDOW_DATETIME_DEP,flight_with_delay_gte_15min_code_41_42
datetime[μs],u32
2025-03-17 00:00:00,16
2025-04-07 00:00:00,50
2025-04-28 00:00:00,38
2025-05-19 00:00:00,55
2025-06-09 00:00:00,35


In [13]:
joined_df = (
    total_df.join(delayed_flights_count_df, COL_NAME_WINDOW_TIME, how="left")
    .join(delayed_15min_count_df, COL_NAME_WINDOW_TIME, how="left")
    .join(delayed_flights_41_42_gte_15min_count_df, COL_NAME_WINDOW_TIME, how="left")
)

In [14]:
joined_df.collect()

WINDOW_DATETIME_DEP,total_count,flight_with_delay,flight_with_delay_gte_15min,flight_with_delay_gte_15min_code_41_42
datetime[μs],u32,u32,u32,u32
2025-06-09 00:00:00,3190,70,51,35
2025-04-07 00:00:00,4156,89,73,50
2025-03-17 00:00:00,1590,30,22,16
2025-05-19 00:00:00,4032,97,79,55
2025-04-28 00:00:00,4049,79,69,38


In [15]:
joined_df_res = joined_df.with_columns(
    [
        ## delay
        pl.lit(1)
        .sub(
            pl.col(COL_NAME_TOTAL_COUNT_FLIGHT_WITH_DELAY)
            / (pl.col(COL_NAME_TOTAL_COUNT))
        )
        .alias(COL_NAME_PER_FLIGHTS_NOT_DELAYED),
        ## delay > 15
        (
            pl.lit(1).sub(
                pl.col(COL_NAME_TOTAL_COUNT_FLIGHT_WITH_DELAY_GTE_15MIN)
                / (pl.col(COL_NAME_TOTAL_COUNT))
            )
        ).alias(COL_NAME_PER_DELAYED_FLIGHTS_NOT_WITH_15MIN),
        ## delay > 15 min for 41 42
        pl.lit(1)
        .sub(
            (
                pl.col(COL_NAME_TOTAL_COUNT_FLIGHT_WITH_DELAY_41_42_GTE_15MIN)
                / pl.col(COL_NAME_TOTAL_COUNT)
            )
        )
        .alias(COL_NAME_PER_DELAYED_FLIGHTS_15MIN_NOT_WITH_41_42),
    ]
)

In [16]:
joined_df_res.collect()

WINDOW_DATETIME_DEP,total_count,flight_with_delay,flight_with_delay_gte_15min,flight_with_delay_gte_15min_code_41_42,per_flights_not_delayed,per_delayed_flights_not_with_15min,per_delayed_flights_15min__not_with_41_42
datetime[μs],u32,u32,u32,u32,f64,f64,f64
2025-05-19 00:00:00,4032,97,79,55,0.975942,0.980407,0.986359
2025-06-09 00:00:00,3190,70,51,35,0.978056,0.984013,0.989028
2025-04-07 00:00:00,4156,89,73,50,0.978585,0.982435,0.987969
2025-04-28 00:00:00,4049,79,69,38,0.980489,0.982959,0.990615
2025-03-17 00:00:00,1590,30,22,16,0.981132,0.986164,0.989937


In [17]:
joined_df_res_2 = joined_df.with_columns(
    [
        ## delay
        (
            (
                pl.col(COL_NAME_TOTAL_COUNT)
                - pl.col(COL_NAME_TOTAL_COUNT_FLIGHT_WITH_DELAY)
            ).cast(pl.Float64)
            / pl.col(COL_NAME_TOTAL_COUNT)
        ).alias(COL_NAME_PER_FLIGHTS_NOT_DELAYED),
        ## delay > 15
        (
            (
                pl.col(COL_NAME_TOTAL_COUNT)
                - pl.col(COL_NAME_TOTAL_COUNT_FLIGHT_WITH_DELAY_GTE_15MIN)
            )
            / pl.col(COL_NAME_TOTAL_COUNT)
        ).alias(COL_NAME_PER_DELAYED_FLIGHTS_NOT_WITH_15MIN),
        ## delay > 15 min for 41 42
        (
            (
                pl.col(COL_NAME_TOTAL_COUNT)
                - pl.col(COL_NAME_TOTAL_COUNT_FLIGHT_WITH_DELAY_41_42_GTE_15MIN)
            )
            / pl.col(COL_NAME_TOTAL_COUNT)
        ).alias(COL_NAME_PER_DELAYED_FLIGHTS_15MIN_NOT_WITH_41_42),
    ]
)

In [18]:
joined_df_res_2.collect()

WINDOW_DATETIME_DEP,total_count,flight_with_delay,flight_with_delay_gte_15min,flight_with_delay_gte_15min_code_41_42,per_flights_not_delayed,per_delayed_flights_not_with_15min,per_delayed_flights_15min__not_with_41_42
datetime[μs],u32,u32,u32,u32,f64,f64,f64
2025-04-28 00:00:00,4049,79,69,38,0.980489,0.982959,0.990615
2025-03-17 00:00:00,1590,30,22,16,0.981132,0.986164,0.989937
2025-04-07 00:00:00,4156,89,73,50,0.978585,0.982435,0.987969
2025-05-19 00:00:00,4032,97,79,55,0.975942,0.980407,0.986359
2025-06-09 00:00:00,3190,70,51,35,0.978056,0.984013,0.989028


In [24]:
joined_df_res_2.select(COL_NAME_WINDOW_TIME).collect().to_series().to_list()

[datetime.datetime(2025, 4, 28, 0, 0),
 datetime.datetime(2025, 5, 19, 0, 0),
 datetime.datetime(2025, 6, 9, 0, 0),
 datetime.datetime(2025, 3, 17, 0, 0),
 datetime.datetime(2025, 4, 7, 0, 0)]

In [20]:
joined_df.collect()

WINDOW_DATETIME_DEP,total_count,flight_with_delay,flight_with_delay_gte_15min,flight_with_delay_gte_15min_code_41_42
datetime[μs],u32,u32,u32,u32
2025-04-07 00:00:00,4156,89,73,50
2025-06-09 00:00:00,3190,70,51,35
2025-04-28 00:00:00,4049,79,69,38
2025-03-17 00:00:00,1590,30,22,16
2025-05-19 00:00:00,4032,97,79,55


# normal


In [21]:
delayed_flights_count_df = df.select(
    pl.len().alias(COL_NAME_TOTAL_COUNT_FLIGHT_WITH_DELAY)
)

delayed_15min_df = df.filter((pl.col("Retard en min") >= 15))
delayed_15min_count_df = delayed_15min_df.select(
    pl.len().alias(COL_NAME_TOTAL_COUNT_FLIGHT_WITH_DELAY_GTE_15MIN)
)

In [22]:
delayed_flights_41_42_gte_15min_count_df = delayed_15min_df.filter(
    (pl.col("CODE_DR").is_in({41, 42}))
).select(pl.len().alias(COL_NAME_TOTAL_COUNT_FLIGHT_WITH_DELAY_41_42_GTE_15MIN))


joined_df.collect()

WINDOW_DATETIME_DEP,total_count,flight_with_delay,flight_with_delay_gte_15min,flight_with_delay_gte_15min_code_41_42
datetime[μs],u32,u32,u32,u32
2025-06-09 00:00:00,3190,70,51,35
2025-04-07 00:00:00,4156,89,73,50
2025-04-28 00:00:00,4049,79,69,38
2025-03-17 00:00:00,1590,30,22,16
2025-05-19 00:00:00,4032,97,79,55


In [23]:
count_flight = joined_df.select(pl.col(COL_NAME_TOTAL_COUNT)).item()
count_flight_with_delay = joined_df.select(
    pl.col(COL_NAME_TOTAL_COUNT_FLIGHT_WITH_DELAY)
).item()
count_flight_with_delay_gte_15min = joined_df.select(
    pl.col(COL_NAME_TOTAL_COUNT_FLIGHT_WITH_DELAY_GTE_15MIN)
).item()
count_flight_with_delay_gte_15min_41_42 = joined_df.select(
    pl.col(COL_NAME_TOTAL_COUNT_FLIGHT_WITH_DELAY_41_42_GTE_15MIN)
).item()

AttributeError: 'LazyFrame' object has no attribute 'item'

In [None]:
# first graph

per_delayed_flights_15min__not_with_41_42 = 1 - (
    count_flight_with_delay_gte_15min_41_42 / count_flight
)

# second graph

## first op (15 min)
per_delayed_flights_not_with_15min = 1 - (
    count_flight_with_delay_gte_15min / count_flight
)


## second op

per_flights_not_delayed = 1 - (count_flight_with_delay / count_flight)


# third graph

In [None]:
import polars as pl
from datetime import timedelta


df_window = pl.col("DateTime").dt.truncate(timedelta(weeks=1))

df_windowed = df.group_by(df_window).agg(pl.len())