In [1]:
import pandas as pd
import geopandas as gpd
import glob
import os
import numpy as np

## Traffic detector location data

In [2]:
geo_path = "data/teu_standorte.json" 

gdf_det = gpd.read_file(geo_path)

gdf_det = gdf_det.rename(columns={"teuID": "detector_id"})
gdf_det["detector_id"] = gdf_det["detector_id"].astype(str)

print("Detector locations loaded:", len(gdf_det))

Detector locations loaded: 538


In [3]:
gdf_det.head(5)

Unnamed: 0,detector_id,Position,Location,Direction,Start of Operation,Lane,geometry
0,TEU00002_Det0,A115,AS Spanische Allee – Brücke,Südwest,2003-02-18,Hauptfahrbahn rechte Spur,POINT (13.19258 52.43387)
1,TEU00002_Det1,A115,AS Spanische Allee – Brücke,Südwest,2003-02-18,"Hauptfahrbahn, 2. Spur von rechts",POINT (13.19258 52.43387)
2,TEU00002_Det2,A115,AS Spanische Allee – Brücke,Nordost,2003-02-18,Hauptfahrbahn rechte Spur,POINT (13.19275 52.43381)
3,TEU00002_Det3,A115,AS Spanische Allee – Brücke,Nordost,2003-02-18,"Hauptfahrbahn, 2. Spur von rechts",POINT (13.19275 52.43381)
4,TEU00004_Det0,Clayallee,zwischen Scharfestraße und Propst-Süssmilch-We...,Süd,2003-02-18,Hauptfahrbahn rechte Spur,POINT (13.2613 52.43664)


In [4]:
traffic_path = "data/detektoren_2024_01/"

files = glob.glob(os.path.join(traffic_path, "*.csv"))

traffic_frames = []

for f in files:    
    df = pd.read_csv(f, sep=";", low_memory=False)

    # Extract detector_id from filename
    detector_id = os.path.basename(f).replace(".csv", "")
    df["detector_id"] = detector_id

    # Convert date column
    if "Datum (Ortszeit)" in df.columns:
        df["date_local"] = pd.to_datetime(df["Datum (Ortszeit)"], errors="coerce")
    
    # Convert hour column (should be numeric)
    if "Stunde des Tages (Ortszeit)" in df.columns:
        df["hour_local"] = pd.to_numeric(df["Stunde des Tages (Ortszeit)"], errors="coerce")
    
    # Clean vehicle count columns
    for col in ["qkfz", "qpkw", "qlkw", "vkfz", "vpkw", "vlkw"]:
        if col in df.columns:
            df[col] = pd.to_numeric(df[col], errors="coerce")
    
    # Drop rows where date is missing
    df = df.dropna(subset=["date_local"])

    traffic_frames.append(df)

# Combine all detectors
traffic_raw = pd.concat(traffic_frames, ignore_index=True)

print("Traffic rows loaded:", len(traffic_raw))

Traffic rows loaded: 544025


In [5]:
traffic_raw.head()

Unnamed: 0,Datum (Ortszeit),Stunde des Tages (Ortszeit),Vollständigkeit,ZScore_Det0,ZScore_Det1,ZScore_Det2,hist_cor,localTime,month,qkfz,qlkw,qpkw,utc,vkfz,vlkw,vpkw,detector_id,date_local,hour_local
0,2024-01-01,0,,,,,,,,,,,2023-12-31 23:00:00+00:00,,,,TEU00002_Det0,2024-01-01,0
1,2024-01-01,1,,,,,,,,,,,2024-01-01 00:00:00+00:00,,,,TEU00002_Det0,2024-01-01,1
2,2024-01-01,2,,,,,,,,,,,2024-01-01 01:00:00+00:00,,,,TEU00002_Det0,2024-01-01,2
3,2024-01-01,3,,,,,,,,,,,2024-01-01 02:00:00+00:00,,,,TEU00002_Det0,2024-01-01,3
4,2024-01-01,4,,,,,,,,,,,2024-01-01 03:00:00+00:00,,,,TEU00002_Det0,2024-01-01,4


In [6]:
pip install ydata-profiling

Defaulting to user installation because normal site-packages is not writeable
Collecting ydata-profiling
  Downloading ydata_profiling-4.17.0-py2.py3-none-any.whl.metadata (22 kB)
Collecting visions<0.8.2,>=0.7.5 (from visions[type_image_path]<0.8.2,>=0.7.5->ydata-profiling)
  Downloading visions-0.8.1-py3-none-any.whl.metadata (11 kB)
Collecting minify-html>=0.15.0 (from ydata-profiling)
  Downloading minify_html-0.18.1-cp312-cp312-win_amd64.whl.metadata (18 kB)
Collecting filetype>=1.0.0 (from ydata-profiling)
  Downloading filetype-1.2.0-py2.py3-none-any.whl.metadata (6.5 kB)
Collecting phik<0.13,>=0.11.1 (from ydata-profiling)
  Downloading phik-0.12.5-cp312-cp312-win_amd64.whl.metadata (5.6 kB)
Collecting multimethod<2,>=1.4 (from ydata-profiling)
  Downloading multimethod-1.12-py3-none-any.whl.metadata (9.6 kB)
Collecting typeguard<5,>=3 (from ydata-profiling)
  Downloading typeguard-4.4.4-py3-none-any.whl.metadata (3.3 kB)
Collecting imagehash==4.3.1 (from ydata-profiling)
  Dow