In [9]:
import pandas as pd
import numpy as np
import os
import sys

In [6]:
ROOT_DIR = os.path.abspath(os.path.join(os.getcwd(), ".."))

In [5]:
raw_data_path = os.path.join(ROOT_DIR, "data", "raw", "det_val_hr_2024_12.csv.gz")
df = pd.read_csv(raw_data_path, sep=";", compression="gzip", low_memory=False)
print(df.info())
print(df.head())

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 210144 entries, 0 to 210143
Data columns (total 10 columns):
 #   Column        Non-Null Count   Dtype  
---  ------        --------------   -----  
 0   detid_15      210144 non-null  int64  
 1   tag           210144 non-null  object 
 2   stunde        210144 non-null  int64  
 3   qualitaet     210144 non-null  float64
 4   q_kfz_det_hr  210144 non-null  int64  
 5   v_kfz_det_hr  210144 non-null  int64  
 6   q_pkw_det_hr  210144 non-null  int64  
 7   v_pkw_det_hr  210144 non-null  int64  
 8   q_lkw_det_hr  210144 non-null  int64  
 9   v_lkw_det_hr  210144 non-null  int64  
dtypes: float64(1), int64(8), object(1)
memory usage: 16.0+ MB
None
          detid_15         tag  stunde  qualitaet  q_kfz_det_hr  v_kfz_det_hr  \
0  100101010000167  30.12.2024       6        1.0           114            69   
1  100101010000167  30.12.2024       7        1.0           134            71   
2  100101010000167  30.12.2024       8        1.0 

In [None]:
df["qualitaet"].value_counts()

qualitaet
1.00    166479
0.92     25627
0.83     11101
0.75      6937
Name: count, dtype: int64

In [10]:
if ROOT_DIR not in sys.path:
    sys.path.insert(0, ROOT_DIR)
from scripts.processor.kpi_loader import TrafficKPILoader
from scripts.processor.enricher import TrafficDataEnricher

In [11]:
# Load KPI
kpi_path = os.path.join(ROOT_DIR, "data", "raw", "det_val_hr_2024_12.csv.gz")
kpi_loader = TrafficKPILoader(kpi_path)
df_kpi = kpi_loader.load()
df_kpi

Unnamed: 0,detid_15,tag,qualitaet,q_kfz_det_hr,v_kfz_det_hr,q_pkw_det_hr,v_pkw_det_hr,q_lkw_det_hr,v_lkw_det_hr,hour
0,100101010000167,2024-12-30,1.0,114,69,103,69,11,70,6
1,100101010000167,2024-12-30,1.0,134,71,126,71,8,71,7
2,100101010000167,2024-12-30,1.0,160,65,151,65,9,65,8
3,100101010000167,2024-12-30,1.0,304,60,298,60,6,58,9
4,100101010000167,2024-12-30,1.0,440,61,428,61,12,64,10
...,...,...,...,...,...,...,...,...,...,...
210139,100101010093329,2024-12-31,1.0,151,32,149,32,2,28,19
210140,100101010093329,2024-12-31,1.0,168,30,162,31,6,28,20
210141,100101010093329,2024-12-31,1.0,140,36,138,36,2,28,21
210142,100101010093329,2024-12-31,1.0,140,35,139,35,1,35,22


In [12]:
meta_path = os.path.join(ROOT_DIR, "data", "raw", "Stammdaten_Verkehrsdetektion_2022_07_20.xlsx")
enricher = TrafficDataEnricher(df_kpi, meta_path)
df_enriched = enricher.enrich()
df_enriched

Unnamed: 0,detid_15,tag,qualitaet,q_kfz_det_hr,v_kfz_det_hr,q_pkw_det_hr,v_pkw_det_hr,q_lkw_det_hr,v_lkw_det_hr,hour,...,SPUR,annotation,LÄNGE (WGS84),BREITE (WGS84),INBETRIEBNAHME,ABBAUDATUM,DEINSTALLIERT,KOMMENTAR,lon,lat
0,100101010000167,2024-12-30,1.0,114,69,103,69,11,70,6,...,HF_R,Hauptfahrbahn rechte Spur,13.192578,52.433868,2003-02-18,NaT,,,13.192578,52.433868
1,100101010000167,2024-12-30,1.0,134,71,126,71,8,71,7,...,HF_R,Hauptfahrbahn rechte Spur,13.192578,52.433868,2003-02-18,NaT,,,13.192578,52.433868
2,100101010000167,2024-12-30,1.0,160,65,151,65,9,65,8,...,HF_R,Hauptfahrbahn rechte Spur,13.192578,52.433868,2003-02-18,NaT,,,13.192578,52.433868
3,100101010000167,2024-12-30,1.0,304,60,298,60,6,58,9,...,HF_R,Hauptfahrbahn rechte Spur,13.192578,52.433868,2003-02-18,NaT,,,13.192578,52.433868
4,100101010000167,2024-12-30,1.0,440,61,428,61,12,64,10,...,HF_R,Hauptfahrbahn rechte Spur,13.192578,52.433868,2003-02-18,NaT,,,13.192578,52.433868
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
223527,100101010093329,2024-12-31,1.0,151,32,149,32,2,28,19,...,HF_R,Hauptfahrbahn rechte Spur,13.387089,52.531362,2016-09-04,NaT,,,13.387089,52.531362
223528,100101010093329,2024-12-31,1.0,168,30,162,31,6,28,20,...,HF_R,Hauptfahrbahn rechte Spur,13.387089,52.531362,2016-09-04,NaT,,,13.387089,52.531362
223529,100101010093329,2024-12-31,1.0,140,36,138,36,2,28,21,...,HF_R,Hauptfahrbahn rechte Spur,13.387089,52.531362,2016-09-04,NaT,,,13.387089,52.531362
223530,100101010093329,2024-12-31,1.0,140,35,139,35,1,35,22,...,HF_R,Hauptfahrbahn rechte Spur,13.387089,52.531362,2016-09-04,NaT,,,13.387089,52.531362


In [None]:
# df_enriched.to_parquet("data/processed/kpi_enriched_dec_2024.parquet", index=False)