In [6]:
import pandas as pd
import numpy as np
import seaborn as sns
import matplotlib.pyplot as plt
from matplotlib import cm
import statsmodels.api as sm
from statsmodels.graphics.tsaplots import plot_acf, plot_pacf
from statsmodels.tsa.arima.model import ARIMA
from statsmodels.tsa.api import SimpleExpSmoothing
from statsmodels.tsa.seasonal import seasonal_decompose
from statsmodels.tsa.stattools import adfuller
from statsmodels.tsa.ar_model import AutoReg
from statsmodels.tools.eval_measures import rmse
from sklearn.linear_model import LinearRegression as LinReg
from sklearn.ensemble import RandomForestRegressor as RFR
from sklearn.ensemble import GradientBoostingRegressor as GBR
from sklearn.model_selection import train_test_split
from sklearn import metrics
from sklearn.metrics import mean_squared_error
from prophet import Prophet
from prophet.plot import plot_plotly

In [14]:
df = pd.read_csv("data/bicing_lunes_2.csv")

In [15]:
df.isna().sum()

dateTime                             0
bikesInUsage                         0
electricalBikesInUsage               0
mechanicalBikesInUsage               0
is_weekend_or_bank_holiday           0
is_eve_of_weekend_or_bank_holiday    0
time_of_day                          0
temperature_2m                       0
rain                                 0
dtype: int64

In [16]:
df

Unnamed: 0,dateTime,bikesInUsage,electricalBikesInUsage,mechanicalBikesInUsage,is_weekend_or_bank_holiday,is_eve_of_weekend_or_bank_holiday,time_of_day,temperature_2m,rain
0,2018-08-01 17:00:00,662.500000,15.000000,647.500000,False,False,Afternoon,30.689999,0.0
1,2018-08-01 18:00:00,799.750000,12.250000,787.500000,False,False,Evening,30.240000,0.0
2,2018-08-01 19:00:00,754.750000,5.000000,749.750000,False,False,Evening,28.890000,0.0
3,2018-08-01 20:00:00,679.000000,7.000000,672.000000,False,False,Evening,27.390000,0.0
4,2018-08-01 21:00:00,577.500000,5.000000,572.500000,False,False,Evening,26.689999,0.0
...,...,...,...,...,...,...,...,...,...
3650,2018-12-31 19:00:00,266.250000,3.833333,262.416667,False,False,Evening,7.890000,0.0
3651,2018-12-31 20:00:00,214.083333,2.166667,211.916667,False,False,Evening,7.090000,0.0
3652,2018-12-31 21:00:00,135.166667,0.500000,134.666667,False,False,Evening,6.690000,0.0
3653,2018-12-31 22:00:00,75.333333,0.250000,75.083333,False,False,Evening,6.390000,0.0


In [17]:
dummy_columns = ['is_weekend_or_bank_holiday', 'is_eve_of_weekend_or_bank_holiday', 'time_of_day']
df = pd.get_dummies(df, columns=dummy_columns, dtype=int)

In [18]:
df.head(1)

Unnamed: 0,dateTime,bikesInUsage,electricalBikesInUsage,mechanicalBikesInUsage,temperature_2m,rain,is_weekend_or_bank_holiday_False,is_weekend_or_bank_holiday_True,is_eve_of_weekend_or_bank_holiday_False,is_eve_of_weekend_or_bank_holiday_True,time_of_day_Afternoon,time_of_day_Evening,time_of_day_Morning,time_of_day_Night
0,2018-08-01 17:00:00,662.5,15.0,647.5,30.689999,0.0,1,0,1,0,1,0,0,0


In [19]:
df = df[['dateTime', 'bikesInUsage', 'is_weekend_or_bank_holiday_True', 'is_eve_of_weekend_or_bank_holiday_True',
         'time_of_day_Morning', 'time_of_day_Afternoon', 'time_of_day_Evening', 'time_of_day_Night',
         'temperature_2m', 'rain']]
df.columns = ['ds', 'y', 'is_weekend', 'is_eve_of_weekend', 'morning', 'afternoon', 'evening', 'night', 'temperature', 'rain']
df['ds'] = pd.to_datetime(df['ds'])

# Split the data into training and test sets
train = df.iloc[:len(df) - 731]
test = df.iloc[len(df) - 731:]

# Initialize and fit the model
m = Prophet()

# Add additional regressors
m.add_regressor('is_weekend')
m.add_regressor('is_eve_of_weekend')
m.add_regressor('morning')
m.add_regressor('afternoon')
m.add_regressor('evening')
m.add_regressor('night')
m.add_regressor('temperature')
m.add_regressor('rain')

# Fit the model with training data
m.fit(train)

# Create a future dataframe with the same columns
future = pd.DataFrame({'ds': pd.date_range(start=train['ds'].min(), periods=len(test), freq='H')})

# Add regressor values to the future dataframe
future['is_weekend'] = df['is_weekend']
future['is_eve_of_weekend'] = df['is_eve_of_weekend']
future['morning'] = df['morning']
future['afternoon'] = df['afternoon']
future['evening'] = df['evening']
future['night'] = df['night']
future['temperature'] = df['temperature']
future['rain'] = df['rain']

# Make predictions
forecast = m.predict(future)

# Extract predictions for the test period
predictions = forecast.iloc[-731:]['yhat']

10:28:51 - cmdstanpy - INFO - Chain [1] start processing
10:28:54 - cmdstanpy - INFO - Chain [1] done processing


In [20]:
print("RMSE:", np.sqrt(mean_squared_error(predictions, test['y'])))
print("r2:", metrics.r2_score(predictions, test['y']))

RMSE: 237.02652696356822
r2: -0.8378020693123742


In [41]:
df = df[["dateTime", "bikesInUsage"]]