In [15]:
import polars as pl
import geopandas as gpd
from utils.loader_local import LoaderLocal


In [41]:
# Read data
def read_etat_des_ascenceurs():
    df = pl.read_csv('/home/onyxia/work/hackathon_mobilites_2025/data/raw/etat-des-ascenseurs.csv', 
    separator=";",
    truncate_ragged_lines=True)
    return df

In [53]:
df_ascensceurs = read_etat_des_ascenceurs()
print('df_ascensceurs number of rows : %i' %df_ascensceurs.shape[0])

df_asc_counts = (
    df_ascensceurs
        .group_by("zdcid")
        .agg(
            pl.col("liftid").n_unique().alias("n_lifts")
        )
)
print('df_ascensceurs_count_by_stations number of rows : %i' %df_asc_counts.shape[0])

df_ascensceurs number of rows : 944
df_ascensceurs_count_by_stations number of rows : 313


In [57]:
ref_table_finale_path = "/home/onyxia/work/hackathon_mobilites_2025/data/enrich/final_table.gpq"

stations = LoaderLocal.loader_geoparquet(ref_table_finale_path)
print('stations number of rows : %i' %stations.shape[0])

stations number of rows : 590


In [None]:
#Attention : format particulier de l'id dans stations
stations_pl.filter(pl.col('id_ref_zdc')=='71485.0')

geo_point_2d,id_ref_zdc,nom_zda,station_clean,res_com,mode,exploitant,ligne,station,facilite_acces_code,facilite_acces,nombre_facilite_acces_station,id_zdc,total_validation,total_validation_amethyste,pct_amethyste
str,str,str,str,str,str,str,str,str,str,str,i64,str,f64,f64,f64
"""48.88813847761218, 2.249792769…","""71485.0""","""Esplanade de La Défense""","""esplanadedeladéfense""","""METRO 1""","""METRO""","""RATP""",,"""Esplanade de La Défense""","""green""","""très facile d'accès (ascenseur…",1,"""71485.0""",137108230.0,19586890.0,14.2857


In [59]:
#Cast variables for join
stations_geometry = stations.geometry.copy()
stations_pl = pl.from_pandas(stations.drop(columns="geometry"))

df_asc_counts = df_asc_counts.with_columns(
    pl.col("zdcid").cast(pl.Utf8)
)

print('df_asc_counts')
print(df_asc_counts.select('zdcid').head())

stations_pl = stations_pl.with_columns(
    pl.col("id_ref_zdc")
    .cast(pl.Float64, strict=False)   # 1) cast to float
    .cast(pl.Int64,   strict=False)   # 2) cast to int
    .cast(pl.Utf8,    strict=False)   # 3) cast to string
)
print('stations')
print(stations_pl.select('id_ref_zdc').head())

df_asc_counts
shape: (5, 1)
┌───────┐
│ zdcid │
│ ---   │
│ str   │
╞═══════╡
│ 67625 │
│ 70133 │
│ 72287 │
│ 71828 │
│ 68129 │
└───────┘
stations
shape: (5, 1)
┌────────────┐
│ id_ref_zdc │
│ ---        │
│ str        │
╞════════════╡
│ 71432      │
│ 73669      │
│ 72491      │
│ 71030      │
│ 71632      │
└────────────┘


In [60]:
joined_pl = stations_pl.join(
    df_asc_counts,
    left_on="id_ref_zdc",
    right_on="zdcid",
    how="left"
)
joined_gdf = gpd.GeoDataFrame(joined_pl.to_pandas(), geometry=stations_geometry)

In [63]:
match_col = "n_lifts"   # typically the join key is good

matched_count = joined_pl.filter(
    pl.col(match_col).is_not_null()
).count()

not_matched_count = joined_pl.filter(
    pl.col(match_col).is_null()
).count()

total = joined_pl.count()

matched_count, not_matched_count, total

(shape: (1, 17)
 ┌────────────┬────────────┬─────────┬────────────┬───┬───────────┬───────────┬───────────┬─────────┐
 │ geo_point_ ┆ id_ref_zdc ┆ nom_zda ┆ station_cl ┆ … ┆ total_val ┆ total_val ┆ pct_ameth ┆ n_lifts │
 │ 2d         ┆ ---        ┆ ---     ┆ ean        ┆   ┆ idation   ┆ idation_a ┆ yste      ┆ ---     │
 │ ---        ┆ u32        ┆ u32     ┆ ---        ┆   ┆ ---       ┆ methyste  ┆ ---       ┆ u32     │
 │ u32        ┆            ┆         ┆ u32        ┆   ┆ u32       ┆ ---       ┆ u32       ┆         │
 │            ┆            ┆         ┆            ┆   ┆           ┆ u32       ┆           ┆         │
 ╞════════════╪════════════╪═════════╪════════════╪═══╪═══════════╪═══════════╪═══════════╪═════════╡
 │ 178        ┆ 178        ┆ 178     ┆ 178        ┆ … ┆ 172       ┆ 172       ┆ 172       ┆ 178     │
 └────────────┴────────────┴─────────┴────────────┴───┴───────────┴───────────┴───────────┴─────────┘,
 shape: (1, 17)
 ┌────────────┬────────────┬─────────┬───────────