In [1]:
import json
from pathlib import Path

# Filters zmg.geojson for features where properties.highway == "bus_stop"

input_path = Path("osm/zmg.geojson")
output_path = Path("zmg_bus_stops.geojson")

data = json.loads(input_path.read_text(encoding="utf-8"))

features = data.get("features", []) if isinstance(data, dict) else []
bus_stops = [
    f for f in features
    if (f.get("properties") or {}).get("highway") == "bus_stop"
]

out = {"type": "FeatureCollection", "features": bus_stops}
# preserve optional metadata if present
for key in ("bbox", "crs"):
    if key in data:
        out[key] = data[key]

output_path.write_text(json.dumps(out, ensure_ascii=False, indent=2), encoding="utf-8")
print(f"Wrote {len(bus_stops)} bus_stop features to {output_path}")

Wrote 6597 bus_stop features to zmg_bus_stops.geojson


In [2]:
import pandas as pd

bus_stops_df = pd.json_normalize(bus_stops)

In [3]:
bus_stops_df.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 6597 entries, 0 to 6596
Data columns (total 33 columns):
 #   Column                       Non-Null Count  Dtype 
---  ------                       --------------  ----- 
 0   type                         6597 non-null   object
 1   geometry.type                6597 non-null   object
 2   geometry.coordinates         6597 non-null   object
 3   properties.bus               6574 non-null   object
 4   properties.highway           6597 non-null   object
 5   properties.name              6569 non-null   object
 6   properties.public_transport  6576 non-null   object
 7   properties.tourism           2 non-null      object
 8   properties.zoo               1 non-null      object
 9   properties.name:en           4 non-null      object
 10  properties.network           6558 non-null   object
 11  properties.operator          6553 non-null   object
 12  properties.ref               6414 non-null   object
 13  properties.gtfs_id           6399

In [4]:
bus_stops_df[bus_stops_df["properties.route_ref"].notna()]["properties.route_ref"]

4                                           T09-Belisario
5                                           T09-Belisario
7                                                    C124
10            C40;C99;C102;C103;C132;C76-V1;C76-V2;C80-V1
19                                                    T10
                              ...                        
6411                                               T06-03
6412                                               T06-03
6416    C126-V2;MP-A05-2;C12-V1;C25;C14-V1;C101;C14-V2...
6433                                                   L3
6439                                              T03-C01
Name: properties.route_ref, Length: 6399, dtype: object

In [5]:
bus_stops_df["route_ref_set"] = bus_stops_df["properties.route_ref"].apply(lambda x: set(str(x).split(";")) if pd.notna(x) else set())

In [6]:
all_route_refs = set().union(*bus_stops_df["route_ref_set"])
print(len(all_route_refs))
print(all_route_refs)

170
{'T13B-V2', 'C47-V1', 'T13A-C03', 'T13B-V1', 'C76-V1', 'T11B', 'C99', 'MP-A05-1', 'C17', 'C28', 'C23', 'C80-V1', 'T03', 'T18-2-Lopez', 'T17-C01', 'C15', 'T13A-C01', 'C11', 'C49-V1', 'MP-C03', 'C115', 'MP-A03', 'C86', 'T16B-1', 'C112', 'C27', 'T14B', 'MC-A18', 'C133-V2', 'C100', 'C58-V1', 'C128A-V1', 'C98', 'T04B-1', 'T10', 'C25', 'C78-V1', 'C10', 'C41-V1', 'C80-V2', 'C14-V1', 'C123-V1', 'T04B-3', 'C114-V1', 'MC-A16', 'T13B-C01-1', 'C126-V1', 'T13A-C02', 'C127-V1', 'C124', 'T18-1-Lopez', 'C50-V2', 'MC-A20', 'C120', 'C111-V2', 'C13-V2', 'T04B-4', 'C105', 'C125-V2', 'T02-A02', 'C111-V3', 'MC-A17', 'C42', 'MP-C02', 'MP-A01', 'C41-V2', 'T13B-C01-2', 'MP-A02', 'C128', 'T02', 'L3', '635', 'MC-A09', 'T14A-C01', 'L2', 'C104', 'MP-A05-2', 'C111-V1', 'T09-Belisario', 'C70', 'T04A-1', 'C96-V2', 'T13-A1', 'MC-A03', 'T16B-C03', 'T17-1', 'T16B', 'C126-V2', 'C48', 'T07-C01', 'C12-V1', 'C47-V2', 'C03', 'T13C-2', 'MC-A21', 'T06-03', 'C67-V1', 'C113-V2', 'C131-V1', 'C125-V1', 'MP-A07', 'T08', 'C132',