In [1]:
import pandas as pd
import matplotlib.pyplot as plt
import datetime as dt
import numpy as np
import csv

In [2]:
df_hospital=pd.read_csv("processed_data/dublinbikes_james_hospital.csv")
df_portobello=pd.read_csv("processed_data/dublinbikes_portobello.csv")

Lets get some basic features like day of week, weekend, time

In [3]:
def get_basic_features(df:pd.DataFrame):
    df["TIME"]=pd.to_datetime(df["TIME"])
    df["DAY OF WEEK"]=df["TIME"].dt.dayofweek-1
    df["HOUR"]=df["TIME"].dt.strftime("%H").astype(int)
    df["MINUTE"]=df["TIME"].dt.strftime("%M").astype(int)

    
    return df

In [4]:
df_hospital = get_basic_features(df_hospital)
df_portobello = get_basic_features(df_portobello)

In [5]:
df_hospital[500:520]

Unnamed: 0,TIME,BIKE STANDS,AVAILABLE BIKES,DAY OF WEEK,HOUR,MINUTE
500,2020-01-29 17:40:02,40,12,1,17,40
501,2020-01-29 17:45:02,40,10,1,17,45
502,2020-01-29 17:50:02,40,12,1,17,50
503,2020-01-29 17:55:02,40,12,1,17,55
504,2020-01-29 18:00:02,40,13,1,18,0
505,2020-01-29 18:05:02,40,13,1,18,5
506,2020-01-29 18:10:02,40,15,1,18,10
507,2020-01-29 18:15:02,40,15,1,18,15
508,2020-01-29 18:20:02,40,14,1,18,20
509,2020-01-29 18:25:02,40,14,1,18,25


Lets add some more complex features using weekly, daily and short-term seasonality trends in the data

For short term trends lets use intervals of 15 min, and get the last three intervals

In [6]:
def get_trend_features(df:pd.DataFrame, prediction_time : int):
    
    prediction_int = np.trunc(prediction_time/5) - 1
    prediction_int=prediction_int.astype(int)
    
    df["5MIN"] = df["AVAILABLE BIKES"].shift(1+prediction_int)
    df["10MIN"] = df["AVAILABLE BIKES"].shift(2+prediction_int)
    df["15MIN"] = df["AVAILABLE BIKES"].shift(3+prediction_int)
    df["1DAY"] = df["AVAILABLE BIKES"].shift(12*24)
    df["2DAY"] = df["AVAILABLE BIKES"].shift(12*24*2)
    df["3DAY"] = df["AVAILABLE BIKES"].shift(12*24*3)
    df["1WEEK"] = df["AVAILABLE BIKES"].shift(12*24*7)
    df.dropna(inplace=True)
    return df

In [7]:
def normalize_features(df:pd.DataFrame):
    df["AVAILABLE BIKES"] = df["AVAILABLE BIKES"]/df["BIKE STANDS"]
    df["DAY OF WEEK"] = df["DAY OF WEEK"] / 6
    df["HOUR"] = df["HOUR"] / max(df["HOUR"])
    df["MINUTE"] = df["MINUTE"] / max(df["MINUTE"])
    return df

In [8]:
df_hospital=normalize_features(df_hospital)
df_portobello=normalize_features(df_portobello)


In [12]:
prediction_time = 60
df_hospital=get_trend_features(df_hospital, prediction_time)
df_portobello=get_trend_features(df_portobello, prediction_time)