In [12]:
import pandas as pd
import numpy as np
import holidays

In [22]:
data = pd.read_csv("data/podatki_20231215.csv", date_format="datetime")
data["timestamp"] = pd.to_datetime(data["datetime"])
# drop the first unnamed column
data = data.drop(data.columns[0], axis=1)
data.head(3)

Unnamed: 0,datetime,number,name,address,banking,bonus,bike_stands,available_bike_stands,available_bikes,status,last_update,time_to_last_update,timestamp
0,2023-12-15 19:01:58,59,LIDL BEŽIGRAD,Bežigrad 11,False,False,20,19,1,OPEN,2023-12-15 17:57:36,3862.0,2023-12-15 19:01:58
1,2023-12-15 19:01:58,54,ŠMARTINSKI PARK,Smartinska cesta 58,False,False,20,7,13,OPEN,2023-12-15 17:58:52,3786.0,2023-12-15 19:01:58
2,2023-12-15 19:01:58,45,SAVSKO NASELJE 1-ŠMARTINSKA CESTA,Savska cesta 1,False,False,20,20,0,OPEN,2023-12-15 17:59:17,3761.0,2023-12-15 19:01:58


In [23]:
def create_holiday_features(data):
    slo_holidays = holidays.SI()
    
    data["praznik"] = data.timestamp.apply(lambda x: int(x in slo_holidays))
    
    data['poletne_pocitnice'] = (data.timestamp.dt.month.isin([7, 8]) | ((data.timestamp.dt.month == 6) & (data.timestamp.dt.day >= 24))).astype(int)
    data['zimske_pocitnice'] = ((data.timestamp.dt.month == 12) & (data.timestamp.dt.day > 24)).astype(int)
    
    return data

In [None]:
def create_time_lag_features(data, lag=1):
    data = data.copy()
    
    for i in range(1, lag + 1):
        data[f"lag_{i}"] = data.groupby(["station"])["target"].shift(i)
        
    return data

In [None]:
def create_daily_lag_features(data, lag=1):
    data = data.copy()
    # sort data by timestamp
    
    yesterday = data.timestamp - pd.Timedelta(days=1)
    # round time to every 10 min
    data["yesterday"] = yesterday.dt.floor("10min")

    # round the timestamp to every 10 min
    data["timestamp_rounded"] = data.timestamp.dt.floor("10min")
    
    # save only the time
    data["time_rounded"] = data.timestamp_rounded.dt.time

    # get the day of the week
    data["day_of_week"] = data.timestamp.dt.dayofweek

    data = data.sort_values(["station", "timestamp"])
    for i in range(1, lag + 1):
        data[f"daily_lag_{i}"] = data.groupby(["day_of_week", "station", "time_rounded"])["target"].shift(i)
        
    return data

In [24]:
data = create_holiday_features(data)
data

Unnamed: 0,datetime,number,name,address,banking,bonus,bike_stands,available_bike_stands,available_bikes,status,last_update,time_to_last_update,timestamp,praznik,poletne_pocitnice,zimske_pocitnice
0,2023-12-15 19:01:58,59,LIDL BEŽIGRAD,Bežigrad 11,False,False,20,19,1,OPEN,2023-12-15 17:57:36,3862.0,2023-12-15 19:01:58,0,0,0
1,2023-12-15 19:01:58,54,ŠMARTINSKI PARK,Smartinska cesta 58,False,False,20,7,13,OPEN,2023-12-15 17:58:52,3786.0,2023-12-15 19:01:58,0,0,0
2,2023-12-15 19:01:58,45,SAVSKO NASELJE 1-ŠMARTINSKA CESTA,Savska cesta 1,False,False,20,20,0,OPEN,2023-12-15 17:59:17,3761.0,2023-12-15 19:01:58,0,0,0
3,2023-12-15 19:01:58,68,ČRNUČE,Dunajska cesta,False,False,20,17,3,OPEN,2023-12-15 17:55:21,3997.0,2023-12-15 19:01:58,0,0,0
4,2023-12-15 19:01:58,11,VILHARJEVA CESTA,Vilharjeva cesta 23,False,False,20,2,18,OPEN,2023-12-15 17:57:50,3848.0,2023-12-15 19:01:58,0,0,0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
14947,2023-12-15 22:01:51,15,AMBROŽEV TRG,Poljanska cesta 20c,False,False,18,16,2,OPEN,2023-12-15 21:00:00,3711.0,2023-12-15 22:01:51,0,0,0
14948,2023-12-15 22:01:51,77,VOJKOVA - GASILSKA BRIGADA,Vojkova cesta,False,False,20,15,5,OPEN,2023-12-15 20:59:10,3761.0,2023-12-15 22:01:51,0,0,0
14949,2023-12-15 22:01:51,61,RAKOVNIK,Rakovnik,False,False,20,5,15,OPEN,2023-12-15 20:57:55,3836.0,2023-12-15 22:01:51,0,0,0
14950,2023-12-15 22:01:51,50,PREGLOV TRG,Nove Fuzine 33,False,False,20,9,11,OPEN,2023-12-15 20:54:40,4031.0,2023-12-15 22:01:51,0,0,0


In [25]:
# save to csv
data.to_csv("podatki_better.csv", index=False)

In [26]:
data

Unnamed: 0,datetime,number,name,address,banking,bonus,bike_stands,available_bike_stands,available_bikes,status,last_update,time_to_last_update,timestamp,praznik,poletne_pocitnice,zimske_pocitnice
0,2023-12-15 19:01:58,59,LIDL BEŽIGRAD,Bežigrad 11,False,False,20,19,1,OPEN,2023-12-15 17:57:36,3862.0,2023-12-15 19:01:58,0,0,0
1,2023-12-15 19:01:58,54,ŠMARTINSKI PARK,Smartinska cesta 58,False,False,20,7,13,OPEN,2023-12-15 17:58:52,3786.0,2023-12-15 19:01:58,0,0,0
2,2023-12-15 19:01:58,45,SAVSKO NASELJE 1-ŠMARTINSKA CESTA,Savska cesta 1,False,False,20,20,0,OPEN,2023-12-15 17:59:17,3761.0,2023-12-15 19:01:58,0,0,0
3,2023-12-15 19:01:58,68,ČRNUČE,Dunajska cesta,False,False,20,17,3,OPEN,2023-12-15 17:55:21,3997.0,2023-12-15 19:01:58,0,0,0
4,2023-12-15 19:01:58,11,VILHARJEVA CESTA,Vilharjeva cesta 23,False,False,20,2,18,OPEN,2023-12-15 17:57:50,3848.0,2023-12-15 19:01:58,0,0,0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
14947,2023-12-15 22:01:51,15,AMBROŽEV TRG,Poljanska cesta 20c,False,False,18,16,2,OPEN,2023-12-15 21:00:00,3711.0,2023-12-15 22:01:51,0,0,0
14948,2023-12-15 22:01:51,77,VOJKOVA - GASILSKA BRIGADA,Vojkova cesta,False,False,20,15,5,OPEN,2023-12-15 20:59:10,3761.0,2023-12-15 22:01:51,0,0,0
14949,2023-12-15 22:01:51,61,RAKOVNIK,Rakovnik,False,False,20,5,15,OPEN,2023-12-15 20:57:55,3836.0,2023-12-15 22:01:51,0,0,0
14950,2023-12-15 22:01:51,50,PREGLOV TRG,Nove Fuzine 33,False,False,20,9,11,OPEN,2023-12-15 20:54:40,4031.0,2023-12-15 22:01:51,0,0,0
