# Librairies

In [1]:
import geopandas as gpd
import numpy as np
from tqdm import tqdm
import pandas as pd
import matplotlib.pylab as plt

# Load data

In [2]:
df = pd.read_csv('raw/sid_dav_verkehrszaehlung_miv_od2031_2024.csv')

  df = pd.read_csv('raw/sid_dav_verkehrszaehlung_miv_od2031_2024.csv')


In [3]:
print(df.columns)

translations = [
    "Station_ID",        # MSID
    "Measurement Station Name",      # MSName
    "Counting Station ID",           # ZSID
    "Street_name",         # ZSName
    "Axis",                          # Achse
    "House Number",                  # HNr
    "Height",                        # Hoehe
    "East Coordinate",               # EKoord
    "North Coordinate",              # NKoord
    "Direction",                     # Richtung
    "Counting Number",               # Knummer
    "Counting Name",                 # Kname
    "Number of Detectors",           # AnzDetektoren
    "Detector ID 1",                 # D1ID
    "Detector ID 2",                 # D2ID
    "Detector ID 3",                 # D3ID
    "Detector ID 4",                 # D4ID
    "Time",         # MessungDatZeit
    "Delivery Date",                 # LieferDat
    "Number of Vehicles",            # AnzFahrzeuge
    "Status of Vehicle Count"        # AnzFahrzeugeStatus
]


df.columns = translations

Index(['MSID', 'MSName', 'ZSID', 'ZSName', 'Achse', 'HNr', 'Hoehe', 'EKoord',
       'NKoord', 'Richtung', 'Knummer', 'Kname', 'AnzDetektoren', 'D1ID',
       'D2ID', 'D3ID', 'D4ID', 'MessungDatZeit', 'LieferDat', 'AnzFahrzeuge',
       'AnzFahrzeugeStatus'],
      dtype='object')


In [4]:
df['Station_ID'].unique().size * df["Time"].unique().size

1844430

In [5]:
df.shape[0]

1799789

In [6]:
df['Station_ID'].unique().size

210

In [7]:
df[["East Coordinate",               # EKoord
    "North Coordinate"]].drop_duplicates()

# CRS --> (CH1903+/LV95)

Unnamed: 0,East Coordinate,North Coordinate
0,2683009.890,1243936.200
8784,2683013.180,1243937.640
17568,2682480.510,1243162.500
26352,2682484.910,1243162.410
35136,2681573.790,1242969.910
...,...,...
1786610,2682115.540,1247071.906
1789899,2682106.390,1247095.039
1793188,2685376.041,1251634.045
1796861,2684566.331,1252126.196


In [8]:
df.Time = pd.to_datetime(df.Time)

In [9]:
agg = df.set_index(['Station_ID', 'Time', 'Direction'])[['Number of Vehicles']]

In [10]:
# Some of them are dupliacted, we should take the average
agg = agg.groupby(['Station_ID', 'Time', 'Direction']).mean()

In [11]:
# One detextor corresponds to one direection
for k in agg.index.levels[0]:
    if agg.loc[k].index.get_level_values(1).unique().size != 1 :
        print(k)

In [12]:
import sys
from pathlib import Path

# Add the ../assets directory to sys.path
sys.path.append(str(Path("../../assets").resolve()))

from temporal_agg import hourly_to_aadt

In [13]:
# thanks to the groupby there is no duplicated index anymore
agg.index.duplicated().sum()

0

In [14]:
agg, _, _ = hourly_to_aadt(
    agg.reset_index(level = 2),
    sensor_id_name = 'Station_ID', 
    time_name = 'Time', 
    counts_name = ['Number of Vehicles'], 
    speeds_name = {}
)

Hourly capture rate is 89.7 %
Daily capture rate is 91.9 %


In [15]:
agg
# We have less senors as some were removed due to missing values

Unnamed: 0_level_0,Number of Vehicles,Number of Vehicles_AAWT
Station_ID,Unnamed: 1_level_1,Unnamed: 2_level_1
Z001M001,6525.781931,6904.746725
Z001M002,7983.306250,8540.035088
Z002M001,5347.211111,5747.753876
Z002M002,6437.848611,6862.313953
Z003M001,3765.486111,3980.346899
...,...,...
Z108M005,4076.890728,4431.916667
Z109M002,7258.766423,7147.701031
Z109M003,9108.518248,9269.886598
Z110M003,3478.362745,3643.697248


In [16]:
'Z111M002' in agg.index

False

In [17]:
df[df.Station_ID == 'Z111M002']['Number of Vehicles'].isna().sum() == df[df.Station_ID == 'Z111M002'].shape[0]

True

# Referential

In [18]:
gdf = gpd.GeoDataFrame(
    df.drop_duplicates(subset = ['Station_ID'])[["Station_ID", "Street_name", "Direction"]],
    geometry = gpd.points_from_xy(df.drop_duplicates(subset = ['Station_ID'])["East Coordinate"], df.drop_duplicates(subset = ['Station_ID'])["North Coordinate"]),
    crs = 'EPSG:2056'
).to_crs('epsg:4326')

In [19]:
gdf.Station_ID.unique().size

210

In [20]:
gdf

Unnamed: 0,Station_ID,Street_name,Direction,geometry
0,Z001M001,Seestrasse (Strandbad Wollishofen),auswärts,POINT (8.53707 47.34106)
8784,Z001M002,Seestrasse (Strandbad Wollishofen),einwärts,POINT (8.53711 47.34107)
17568,Z002M001,Albisstrasse (Widmerstrasse),auswärts,POINT (8.52992 47.33417)
26352,Z002M002,Albisstrasse (Widmerstrasse),einwärts,POINT (8.52998 47.33417)
35136,Z003M001,Sood-/Leimbachstrasse,auswärts,POINT (8.51789 47.33255)
...,...,...,...,...
1786610,Z109M002,Manessestrasse - Schimmelstrasse,Schimmelstrasse,POINT (8.52581 47.36937)
1789899,Z109M003,Manessestrasse - Schimmelstrasse,Schimmelstrasse,POINT (8.52569 47.36958)
1793188,Z110M003,Autobahn A1L - Ausfahrt Aubrugg,auswärts,POINT (8.56984 47.40999)
1796861,Z111M001,Hagenholzstrasse (Riedgrabenweg),auswärts,POINT (8.55921 47.41452)


# Map matching with OSM

In [21]:
from map_matching_OSM import points_matching

In [22]:
gdf = points_matching(gdf)

City downloaded


100%|██████████| 210/210 [00:01<00:00, 110.53it/s]

We failed to match 0 sensors
...on a total of 210 sensors





In [23]:
gdf[['Street_name', 'osm_name']].drop_duplicates().dropna()

Unnamed: 0,Street_name,osm_name
0,Seestrasse (Strandbad Wollishofen),Seestrasse
17568,Albisstrasse (Widmerstrasse),Albisstrasse
35136,Sood-/Leimbachstrasse,Soodstrasse
43920,Sood-/Leimbachstrasse,Leimbachstrasse
61488,Birmensdorferstrasse (Triemli),Birmensdorferstrasse
...,...,...
1777826,Tobelhofstrasse (Dreiwiesenstrasse),Tobelhofstrasse
1786610,Manessestrasse - Schimmelstrasse,Manessestrasse
1789899,Manessestrasse - Schimmelstrasse,Schimmelstrasse
1793188,Autobahn A1L - Ausfahrt Aubrugg,Ueberlandstrasse


In [24]:
df = agg.join(
    gdf.set_index('Station_ID')
).rename(
    columns = {'Number of Vehicles' : 'AADT', 'Number of Vehicles_AAWT':'AAWT',
               'Street_name': 'raw_name', 'Direction':'raw_direction'},
)

In [25]:
df['raw_oneway'] = True

# Final saving

In [26]:
df = gpd.GeoDataFrame(
    df, 
    geometry='geometry',
    crs = 'epsg:4326'
)

In [28]:
from validate_dataset import validate_dataset

df = validate_dataset(df)

Number of NaN values for AADT: 0
Number of NaN values for geometry: 0
Number of low outliers for AADT: 0
Number of high outliers for AADT: 0


In [29]:
df.to_file(
    'treated/Zurich_AADT_AAWT_2024.geojson', index = False
)