In [6]:
import os
from datetime import datetime, timedelta
from typing import List

import pandas as pd


def file_paths(base_dir: str, start_date: datetime, end_date: datetime) -> List[str]:
    paths = []
    current_date = start_date
    while current_date <= end_date:
        paths.append(f"{base_dir}/{current_date.strftime('%Y/%m/%d/delays-%H.csv')}")
        current_date += timedelta(hours=1)
    return list(filter(os.path.exists, paths))


def normalize_brigade(brigade: str) -> str:
    return str(int(float(brigade))) if brigade.isdigit() or brigade.replace(".0", "").isdigit() else brigade


def normalize_delay(delay: str) -> int:
    return -int(delay.split()[0]) if "przed czasem" in delay else int(delay.split()[0])


def normalize_outside(outside) -> bool:
    return pd.notna(outside)


paths = file_paths(
    "data/traffic",
    datetime(2024, 12, 8, 0, 0),
    datetime(2025, 1, 2, 23)
)

df = pd.read_csv(paths[0])
for path in paths[1:]:
    df = pd.concat([df, pd.read_csv(path)])
df.drop_duplicates(inplace=True)

df["Brigade"] = df["Brigade"].astype(str).apply(normalize_brigade)
df["Delay"] = df["Delay"].apply(normalize_delay)
df["Outside"] = df["Outside"].apply(normalize_outside)

df.to_csv("data/traffic/delays-merged.csv", index=False)

Unnamed: 0,Type,Vehicle No,Brigade,Route,Trip Headsign,Delay,Stop Name,Outside,Timestamp
63,Tramwaj,3177,558,1,Zjazd do zajezdni Żoliborz,3,Powązkowska 03,False,2024-12-07 23:08:15.650217
103,Tramwaj,3177,9,18,Żerań FSO,2,Pl. Inwalidów 03,False,2024-12-08 04:06:54.808389
664,Tramwaj,3177,9,18,Żerań FSO,-1,Goworka 02,False,2024-12-08 06:08:35.075112
232,Tramwaj,3177,9,18,Żerań FSO,1,Muranów 06,False,2024-12-08 09:09:42.213091
365,Tramwaj,3177,9,18,Żerań FSO,0,Muranowska 08,False,2024-12-08 09:09:42.213091
...,...,...,...,...,...,...,...,...,...
914,Tramwaj,3177,3,24,Gocławek,-1,Rondo Daszyńskiego 05,False,2024-12-24 11:10:06.497997
28,Tramwaj,3177,3,24,Nowe Bemowo,15,Żółkiewskiego 03,False,2024-12-24 12:10:36.131286
803,Tramwaj,3177,3,24,Gocławek,0,Koło 01,False,2024-12-24 13:10:29.848179
843,Tramwaj,3177,3,24,Gocławek,-1,Koło 01,False,2024-12-24 13:10:29.848179
