# Librairies

In [1]:
import geopandas as gpd
import numpy as np

# Load data

In [2]:
df = gpd.read_file('raw/comptages-et-enquetes-sur-le-reseau-routier.geojson')

In [3]:
df.columns

Index(['type', 'type_details', 'gestionnaire', 'id_sig', 'annee',
       'commune_insee', 'commune', 'voie_rue', 'nb_sens', 'orientation',
       'tv_tmja', 'pl_pct_tmja', 'tv_tmja_pct_evo', 'tv_tmjo', 'pl_pct_tmjo',
       'tv_tmjo_pct_evo', 'tv_hpm', 'tv_hpm_h', 'tv_hps', 'tv_hps_h',
       'uvp_tmja', 'uvp_tmjo', 'uvp_hpm', 'uvp_hpm_h', 'uvp_hps', 'uvp_hps_h',
       'x_84', 'y_84', 'geometry'],
      dtype='object')

In [4]:
df.drop([ 'tv_tmja_pct_evo', 
       'tv_tmjo_pct_evo', 'tv_hpm', 'tv_hpm_h', 'tv_hps', 'tv_hps_h',
       'uvp_tmja', 'uvp_tmjo', 'uvp_hpm', 'uvp_hpm_h', 'uvp_hps', 'uvp_hps_h',
       'x_84', 'y_84'], axis = 1, inplace=True)

In [5]:
df['type'].unique()

array(['Comptage ponctuel en section',
       'Comptage ponctuel directionnel sur carrefour',
       'Comptage ponctuel de modes doux',
       'Autre type de campagne ponctuelle',
       'Comptage permanent en section', 'Comptage permanent de vélos'],
      dtype=object)

In [6]:
# Filter out counting which is not related to motorized transport
df = df[df['type'].isin([
        'Comptage ponctuel en section',
        'Comptage ponctuel directionnel sur carrefour',
        'Comptage permanent en section', 
])]

In [7]:
df['type'].value_counts()

type
Comptage permanent en section                   3388
Comptage ponctuel en section                    2322
Comptage ponctuel directionnel sur carrefour     552
Name: count, dtype: int64

In [8]:
# "Comptage permanent" is pretty hard to use because sometimes it monitors specific lanes and there are no common keys to aggregate the values to the entire road section
# It's seems the last few characters of 'id_sig' might be related to that but there are a lot of different cases
# Therefore, we should proceed only with the temporary counts

df = df[df['type'] != 'Comptage permanent en section']

In [9]:
df.orientation.value_counts()

orientation
Sud-Nord     107
Nord-Sud     104
Ouest-Est     94
Est-Ouest     88
Name: count, dtype: int64

In [10]:
df.nb_sens.value_counts()

nb_sens
2 sens         1924
Sens unique     398
Name: count, dtype: int64

In [11]:
df.annee.value_counts().sort_index()

annee
2010      3
2011     22
2012     58
2013     38
2014    142
2015    118
2016    227
2017    263
2018    351
2019    379
2020    252
2021    368
2022    306
2023    277
2024     70
Name: count, dtype: int64

In [12]:
df.columns

Index(['type', 'type_details', 'gestionnaire', 'id_sig', 'annee',
       'commune_insee', 'commune', 'voie_rue', 'nb_sens', 'orientation',
       'tv_tmja', 'pl_pct_tmja', 'tv_tmjo', 'pl_pct_tmjo', 'geometry'],
      dtype='object')

In [13]:
# Format names
df.rename({
    'tv_tmja' : 'AADT',
    'tv_tmjo' : 'AAWT', 
    'pl_pct_tmja' : 'TR_pct_AADT',
    'pl_pct_tmjo' : 'TR_pct_AAWT',
    'voie_rue' : 'raw_name',
    'nb_sens' : 'raw_oneway',
    'orientation' : 'raw_direction'
}, axis = 1, inplace=True)

In [14]:
# removing NaN values
df.dropna(subset = ['AADT', 'AAWT'], inplace=True)

In [15]:
df.raw_oneway = df.raw_oneway.apply(lambda x : False if x == '2 sens' else True)

In [16]:
df.geometry.unique().size

1597

In [17]:
df.shape # No common id to join later, only geometries

(2322, 15)

# Map matching with OSM

In [18]:
import sys
from pathlib import Path

# Add the ../assets directory to sys.path
sys.path.append(str(Path("../../assets").resolve()))

from map_matching_OSM import points_matching

In [19]:
# Perform on the entire dataframe
df = points_matching(df)

City downloaded


 10%|▉         | 230/2322 [00:01<00:14, 140.44it/s]

Value Error - No roads found nearby current index


 89%|████████▉ | 2068/2322 [00:15<00:01, 127.97it/s]

Value Error - No roads found nearby current index


 93%|█████████▎| 2160/2322 [00:16<00:01, 122.76it/s]

Value Error - No roads found nearby current index


 98%|█████████▊| 2267/2322 [00:16<00:00, 124.16it/s]

Value Error - No roads found nearby current index


100%|██████████| 2322/2322 [00:17<00:00, 133.18it/s]

We failed to match 4 sensors
...on a total of 2322 sensors





In [20]:
df[['raw_name', 'osm_name']].dropna().drop_duplicates()[:25]

Unnamed: 0,raw_name,osm_name
0,Rue de la Ronce,Rue de la Ronce
1,Rue du 19 Mars,Rue du 19 Mars 1962
2,Rue Gérard Marti,Rue Gérard Marti
3,Rue Victor Hugo,Rue Victor Hugo
4,D42 - Route de Duclair,Route de Duclair
5,D64 - Route de Saint-Wandrille,Route de Saint-Wandrille
7,Rue Pasteur,Rue Pasteur
8,D3 - Avenue Jean Jaurès,Avenue Jean Jaurès
9,D938 - Boulevard du 11 Novembre,Boulevard du 11 Novembre
23,D43 - Avenue du Val aux Dames,Avenue du Val aux Dames


# Final saving

In [22]:
from validate_dataset import validate_dataset

df = validate_dataset(df)

Number of NaN values for AADT: 0
Number of NaN values for geometry: 0
Number of low outliers for AADT: 0
Number of high outliers for AADT: 0


In [23]:
# Saving
for year in range(2015, 2025):
    # Extract corresponding year
    sub = df[df.annee == year]
    print(year, sub.shape[0])
    sub[[
        'AADT', 'AAWT', 'TR_pct_AADT', 'TR_pct_AAWT', 'geometry',
        'raw_name', 'raw_oneway', 'raw_direction', 'osm_name', 'osm_type', 'osm_lanes', 'osm_oneway', 'osm_distance', 'osm_maxspeed', 'osmid'
        ]].to_file(f'treated/Rouen_AADT_AAWT_{year}.geojson', index=False)

2015 77
2016 151
2017 225
2018 307
2019 306
2020 226
2021 291
2022 221
2023 240
2024 45
