In [None]:
import pandas as pd
import time
import pickle
import logging
import sys
from datetime import datetime
from datetime import timedelta


In [None]:
# bike trips
filename_DD = f"../data/nextbike/hourly_demand_supply_Dresden 2025-03-19_10-47-56.csv"
filename_FB = f"../data/nextbike/hourly_demand_supply_Freiburg_missing_interpolated_2025-03-19_10-47-56.csv"
df_DD = pd.read_csv(filename_DD, index_col=None, parse_dates=["datetime_hour"])
df_FB = pd.read_csv(filename_FB, index_col=None, parse_dates=["datetime_hour"])
df_DD = df_DD.sort_values("datetime_hour")
df_FB = df_FB.sort_values("datetime_hour")
# events 
filename_events = "../data/events/df_events_with_hex_id_2025-04-08_13-38-19.csv"
df_events = pd.read_csv(filename_events, index_col=None, parse_dates=["StartDateTime", "EndDateTime"])



In [None]:
initial_len_df_DD = len(df_DD)
initial_len_df_FB = len(df_FB)

In [None]:
# weather
filename_weather_DD = "../data/weather/df_Dresden_weather_hourly 2025-03-28_20-51-37.csv"
filename_weather_FB = "../data/weather/df_Freiburg_weather_hourly 2025-03-28_20-51-37.csv"

df_weather_DD = pd.read_csv(filename_weather_DD, index_col=None, parse_dates=["datetime_hour"])
df_weather_FB = pd.read_csv(filename_weather_FB, index_col=None, parse_dates=["datetime_hour"])

df_DD = df_DD.merge(df_weather_DD, on="datetime_hour", how="left")
df_FB = df_FB.merge(df_weather_FB, on="datetime_hour", how="left")

In [None]:
## calendar effects
for i, df_tmp in enumerate([df_DD, df_FB]):
    df_tmp["weekday"] = df_tmp.datetime_hour.dt.dayofweek
    df_tmp["weekday"] = df_tmp["weekday"].map({0: "Mon", 1: "Tue", 2: "Wed", 3: "Thu", 4: "Fri", 5: "Sat", 6: "Sun"})
    weekday_df = pd.get_dummies(df_tmp["weekday"], prefix="weekday", drop_first=False, dtype=int)
    weekday_df.index = df_tmp.index
    weekday_df.drop(columns="weekday_Mon", inplace=True)
    df_tmp[weekday_df.columns] = weekday_df

    df_tmp["is_dayoff"] = df_tmp["weekday_Sat"] + df_tmp["weekday_Sun"]
    # list of german holidays in 2023 and 2024
    if i == 0:
        # holidays for Dresden
        german_holidays = ["2024-01-01", "2024-03-29", "2024-04-01", "2024-05-01", "2024-05-09", "2024-05-20", "2024-10-03", "2024-10-31"]
    else:
        german_holidays = ["2023-06-08", "2024-10-03"]
    german_holidays = [pd.to_datetime(date).date() for date in german_holidays]
    flt = df_tmp.datetime_hour.dt.date.isin(german_holidays)
    len(df_tmp[flt])
    df_tmp.loc[flt, "is_dayoff"] = 1

In [None]:
df_events["StartDateTime_adj"] = df_events.StartDateTime.apply(lambda x: x-timedelta(hours=1) if x.hour == 0 else x.floor("H"))
df_events.query("StartDateTime_adj != StartDateTime")[["StartDateTime", "StartDateTime_adj"]]
df_events["EndDateTime_adj"] = df_events.EndDateTime.apply(lambda x: x if x.hour == 0 else x.floor("H"))
df_events_grouped_start = df_events.groupby(["hex_id", "StartDateTime_adj"]).size().reset_index(name="event_count_start")
df_events_grouped_end = df_events.dropna(subset=["EndDateTime_adj"]).groupby(["hex_id", "EndDateTime_adj"]).size().reset_index(name="event_count_end")

In [None]:
df_DD = df_DD.merge(df_events_grouped_start, how="left", left_on=["datetime_hour", "hex_id"], right_on=["StartDateTime_adj", "hex_id"], indicator=False)
df_DD.event_count_start.fillna(0, inplace=True)
df_DD = df_DD.merge(df_events_grouped_end, how="left", left_on=["datetime_hour", "hex_id"], right_on=["EndDateTime_adj", "hex_id"], indicator=False)
df_DD.drop(columns=["StartDateTime_adj", "EndDateTime_adj"], inplace=True, errors="ignore")
df_DD.event_count_end.fillna(0, inplace=True)


In [None]:
df_FB = df_FB.merge(df_events_grouped_start, how="left", left_on=["datetime_hour", "hex_id"], right_on=["StartDateTime_adj", "hex_id"], indicator=False)
df_FB.event_count_start.fillna(0, inplace=True)
df_FB = df_FB.merge(df_events_grouped_end, how="left", left_on=["datetime_hour", "hex_id"], right_on=["EndDateTime_adj", "hex_id"], indicator=False)
df_FB.drop(columns=["StartDateTime_adj", "EndDateTime_adj"], inplace=True, errors="ignore")
df_FB.event_count_end.fillna(0, inplace=True)


In [None]:
assert len(df_DD) == initial_len_df_DD
assert len(df_FB) == initial_len_df_FB

In [None]:
df_DD

In [None]:
df_DD = df_DD.sort_values("datetime_hour")
df_FB = df_FB.sort_values("datetime_hour")

In [None]:
time = datetime.now().strftime('%Y-%m-%d_%H-%M-%S')
df_DD.to_csv(f"../data/df_DD_{time}.csv", index=False)
df_FB.to_csv(f"../data/df_FB_{time}.csv", index=False)