🚌 Projet MDM - Mobilité Durable en Montagne ⛰️

*Author : Nicolas Grosjean*

*Date : 13/09/2025*

**Description :**

This Jupyter Notebook analyses the OSM data

In [1]:
import geopandas as gpd
import pandas as pd

In [2]:
%cd ../..
print("Working directory set to the root of the project")

D:\Documents\GitHub\mobilite_durable
Working directory set to the root of the project


In [3]:
from src.processors.osm import OSMBusLinesProcessor, OSMBusStopsProcessor

In [4]:
def get_markdown_dtype(df: pd.DataFrame):
    markdown_table = "| Column | Dtype |\n|--------|-------|\n"
    for col in df.columns:
        non_null = df[col].count()
        dtype = df[col].dtype
        markdown_table += f"| {col} | {dtype} |\n"
    return markdown_table

In [5]:
bus_data = OSMBusStopsProcessor.fetch(reload_pipeline=False)
stops_df = pd.DataFrame(bus_data["features"])
stops_df.head()

Unnamed: 0,type,geometry,properties,id
0,Feature,"{'type': 'Point', 'coordinates': [5.7762176, 4...","{'bus': 'yes', 'highway': 'bus_stop', 'name': ...",135296
1,Feature,"{'type': 'Point', 'coordinates': [5.7423146, 4...","{'bus': 'yes', 'highway': 'bus_stop', 'name': ...",135930
2,Feature,"{'type': 'Point', 'coordinates': [5.6815922, 4...","{'bus': 'yes', 'description': 'Arrêt de régula...",136570
3,Feature,"{'type': 'Point', 'coordinates': [5.6627217, 4...","{'bus': 'yes', 'highway': 'bus_stop', 'name': ...",136597
4,Feature,"{'type': 'Point', 'coordinates': [5.604304, 45...","{'bus': 'yes', 'highway': 'bus_stop', 'name': ...",137073


In [6]:
expanded = stops_df["properties"].apply(pd.Series)
expanded_stops_df = pd.concat([stops_df.drop(columns=["properties"]), expanded], axis=1)
expanded_stops_df[expanded_stops_df.columns[:10]].head()

Unnamed: 0,type,geometry,id,bus,highway,name,network,public_transport,wheelchair,description
0,Feature,"{'type': 'Point', 'coordinates': [5.7762176, 4...",135296,yes,bus_stop,Université - IUT-STAPS,M réso,stop_position,yes,
1,Feature,"{'type': 'Point', 'coordinates': [5.7423146, 4...",135930,yes,bus_stop,Hôpital Couple Enfant,M réso,stop_position,yes,
2,Feature,"{'type': 'Point', 'coordinates': [5.6815922, 4...",136570,yes,bus_stop,Cap des H',M réso,stop_position,,"Arrêt de régulation, non commercial."
3,Feature,"{'type': 'Point', 'coordinates': [5.6627217, 4...",136597,yes,bus_stop,Place de la Libération,M réso,stop_position,yes,
4,Feature,"{'type': 'Point', 'coordinates': [5.604304, 45...",137073,yes,bus_stop,Centr'Alp 2,M réso,stop_position,,


In [7]:
expanded_stops_df.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 6874 entries, 0 to 6873
Data columns (total 78 columns):
 #   Column                         Non-Null Count  Dtype 
---  ------                         --------------  ----- 
 0   type                           6874 non-null   object
 1   geometry                       6874 non-null   object
 2   id                             6874 non-null   int64 
 3   bus                            5924 non-null   object
 4   highway                        6874 non-null   object
 5   name                           6460 non-null   object
 6   network                        4907 non-null   object
 7   public_transport               6840 non-null   object
 8   wheelchair                     1322 non-null   object
 9   description                    285 non-null    object
 10  disused                        83 non-null     object
 11  network:wikidata               416 non-null    object
 12  fixme                          68 non-null     object
 13  sou

In [8]:
print(
    get_markdown_dtype(expanded_stops_df[expanded_stops_df.columns[:10]]).replace(
        "object", "string"
    )
)

| Column | Dtype |
|--------|-------|
| type | string |
| geometry | string |
| id | int64 |
| bus | string |
| highway | string |
| name | string |
| network | string |
| public_transport | string |
| wheelchair | string |
| description | string |



In [9]:
line_data = OSMBusLinesProcessor.fetch(reload_pipeline=False)
lines_df = pd.DataFrame(line_data)
lines_df.head()

Unnamed: 0,id,tags,stops
0,2067887,"{'colour': 'e53b1a', 'from': 'Gare de Saint-Cl...","[1659415935, 8874916309, 11146173165, 11146173..."
1,2073673,"{'disused:type': 'route', 'name': 'Tad 4', 'ne...",[]
2,2569190,"{'colour': '#ee0064', 'from': 'Grenoble - Gare...","[2617010911, 474827289, 6074566590]"
3,2569239,"{'colour': '#ee0064', 'from': 'Aéroport Lyon S...","[6074566590, 457759141, 2617010911]"
4,2920548,"{'colour': '#1f72b9', 'description': 'Circule ...","[2299463674, 513946287, 513946283, 513946279, ..."


In [10]:
expanded = lines_df["tags"].apply(pd.Series)
expanded_lines_df = pd.concat([lines_df.drop(columns=["tags"]), expanded], axis=1)
expanded_lines_df.head()

Unnamed: 0,id,stops,colour,from,name,network,operator,public_transport:version,ref,route,...,charge,url,comment,fixme,name:pt,bus,check_date,name:eu,duration,public_transport
0,2067887,"[1659415935, 8874916309, 11146173165, 11146173...",e53b1a,Gare de Saint-Clair-Les-Roches,Ligne A : Gare de Saint-Clair-Les-Roches ⇒ Ron...,TPR,Courriers Rhodaniens / Fayard,2,A,bus,...,,,,,,,,,,
1,2073673,[],,,Tad 4,TPR,Courriers Rhodaniens / Fayard,1,4,bus,...,,,,,,,,,,
2,2569190,"[2617010911, 474827289, 6074566590]",#ee0064,Grenoble - Gare Routière,Ouibus 70 : Grenoble Gare Routière -> Aéroport...,BlaBlaBus,Faure Vercors,2,70,bus,...,,,,,,,,,,
3,2569239,"[6074566590, 457759141, 2617010911]",#ee0064,Aéroport Lyon Saint-Exupéry - Terminal 1,Ouibus 70 : Aéroport Lyon Saint-Exupéry -> Pla...,BlaBlaBus,Faure Vercors,2,70,bus,...,,,,,,,,,,
4,2920548,"[2299463674, 513946287, 513946283, 513946279, ...",#1f72b9,Saint Ismier - Bois Français,15 : Bois Français => Grenoble (via Chenevières),M réso,VFD,2,15,bus,...,,,,,,,,,,


In [11]:
expanded_lines_df.loc[4, expanded_lines_df.columns[:25]]

id                                                                    2920548
stops                       [2299463674, 513946287, 513946283, 513946279, ...
colour                                                                #1f72b9
from                                             Saint Ismier - Bois Français
name                         15 : Bois Français => Grenoble (via Chenevières)
network                                                                M réso
operator                                                                  VFD
public_transport:version                                                    2
ref                                                                        15
route                                                                     bus
to                                               Grenoble - Verdun-Préfecture
type                                                                    route
via                                                      Domène 

In [12]:
expanded_lines_df.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 1867 entries, 0 to 1866
Data columns (total 70 columns):
 #   Column                    Non-Null Count  Dtype 
---  ------                    --------------  ----- 
 0   id                        1867 non-null   int64 
 1   stops                     1867 non-null   object
 2   colour                    1440 non-null   object
 3   from                      1862 non-null   object
 4   name                      1867 non-null   object
 5   network                   1861 non-null   object
 6   operator                  1595 non-null   object
 7   public_transport:version  1864 non-null   object
 8   ref                       1864 non-null   object
 9   route                     1867 non-null   object
 10  to                        1862 non-null   object
 11  type                      1867 non-null   object
 12  via                       150 non-null    object
 13  disused:type              1 non-null      object
 14  note                    

In [13]:
print(
    get_markdown_dtype(expanded_lines_df[expanded_lines_df.columns[:25]]).replace(
        "object", "string"
    )
)

| Column | Dtype |
|--------|-------|
| id | int64 |
| stops | string |
| colour | string |
| from | string |
| name | string |
| network | string |
| operator | string |
| public_transport:version | string |
| ref | string |
| route | string |
| to | string |
| type | string |
| via | string |
| disused:type | string |
| note | string |
| network:wikidata | string |
| network:wikipedia | string |
| old_name | string |
| opening_hours | string |
| description | string |
| wheelchair | string |
| old_ref | string |
| not:network:wikidata | string |
| source | string |
| gtfs_id | string |



**TODO**
- Filter disabled lines and stops