In [8]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
import optuna
from optuna import Trial
from fbprophet import Prophet
from sklearn.metrics import mean_squared_error
import warnings
warnings.filterwarnings("ignore")

In [9]:
df = pd.read_csv(
    "../input/2s/15day_0201_0215.csv", usecols=["ds", "y"], parse_dates=["ds"]
)
print(df.shape)
df.head()

(648000, 2)


Unnamed: 0,ds,y
0,2021-02-01 19:00:00,12.0
1,2021-02-01 19:00:02,7.0
2,2021-02-01 19:00:04,1.0
3,2021-02-01 19:00:06,16.0
4,2021-02-01 19:00:08,19.0


In [10]:
df.tail()

Unnamed: 0,ds,y
647995,2021-02-16 18:59:50,19.0
647996,2021-02-16 18:59:52,19.0
647997,2021-02-16 18:59:54,19.0
647998,2021-02-16 18:59:56,19.0
647999,2021-02-16 18:59:58,19.0


In [11]:
train = df[df['ds'] < '2021-2-10']
valid = df[df['ds'] >= '2021-2-10']

In [12]:
cap = np.max(train.y)
floor = np.min(train.y)

In [13]:
def objective(trial: Trial) -> float:
    params = {
        "changepoint_range": trial.suggest_discrete_uniform(
            "changepoint_range", 0.8, 0.95, 0.001
        ),
        "n_changepoints": trial.suggest_int("n_changepoints", 20, 35),
        "changepoint_prior_scale": trial.suggest_discrete_uniform(
            "changepoint_prior_scale", 0.001, 0.5, 0.001
        ),
        "seasonality_prior_scale": trial.suggest_discrete_uniform(
            "seasonality_prior_scale", 1, 25, 0.5
        ),
#         "yearly_fourier": trial.suggest_int("yearly_fourier", 5, 15),
#         "monthly_fourier": trial.suggest_int("monthly_fourier", 3, 12),
#         "weekly_fourier": trial.suggest_int("weekly_fourier", 3, 7),
#         "quaterly_fourier": trial.suggest_int("quaterly_fourier", 3, 10),
#         "yearly_prior": trial.suggest_discrete_uniform("yearly_prior", 1, 25, 0.5),
#         "monthly_prior": trial.suggest_discrete_uniform("monthly_prior", 1, 25, 0.5),
#         "weekly_prior": trial.suggest_discrete_uniform("weekly_prior", 1, 25, 0.5),
#         "quaterly_prior": trial.suggest_discrete_uniform("quaterly_prior", 1, 25, 0.5),
        "growth": "logistic",
        "seasonality_mode": "additive",
        "yearly_seasonality": False,
        "weekly_seasonality": True,
        "daily_seasonality": True,
    }
    # fit_model
    m = Prophet(**params)
    train["cap"] = cap
    train["floor"] = floor
    m.fit(train)
    future = m.make_future_dataframe(periods=7)
    print(future)
    future["cap"] = cap
    future["floor"] = floor

    forecast = m.predict(future)
    print(forecast)
    valid_forecast = forecast.tail(7)
    val_rmse = mean_squared_error(valid.y, valid_forecast.yhat, squared=False)

    return val_rmse

In [14]:
study = optuna.create_study(direction="minimize", sampler=optuna.samplers.TPESampler(seed=42))
study.optimize(objective, n_trials=15)

[32m[I 2021-05-15 17:23:29,283][0m A new study created in memory with name: no-name-b696552b-4607-4dc9-bbff-e5ccf6d6822a[0m


                        ds
0      2021-02-01 19:00:00
1      2021-02-01 19:00:02
2      2021-02-01 19:00:04
3      2021-02-01 19:00:06
4      2021-02-01 19:00:08
...                    ...
354602 2021-02-12 23:59:58
354603 2021-02-13 23:59:58
354604 2021-02-14 23:59:58
354605 2021-02-15 23:59:58
354606 2021-02-16 23:59:58

[354607 rows x 1 columns]


[33m[W 2021-05-15 17:26:54,873][0m Trial 0 failed because of the following error: ValueError('Found input variables with inconsistent numbers of samples: [293400, 7]')
Traceback (most recent call last):
  File "/home/leewook/anaconda3/envs/DsProject/lib/python3.8/site-packages/optuna/_optimize.py", line 217, in _run_trial
    value_or_values = func(trial)
  File "<ipython-input-13-918ba38cd2cc>", line 40, in objective
    val_rmse = mean_squared_error(valid.y, valid_forecast.yhat, squared=False)
  File "/home/leewook/anaconda3/envs/DsProject/lib/python3.8/site-packages/sklearn/utils/validation.py", line 72, in inner_f
    return f(**kwargs)
  File "/home/leewook/anaconda3/envs/DsProject/lib/python3.8/site-packages/sklearn/metrics/_regression.py", line 255, in mean_squared_error
    y_type, y_true, y_pred, multioutput = _check_reg_targets(
  File "/home/leewook/anaconda3/envs/DsProject/lib/python3.8/site-packages/sklearn/metrics/_regression.py", line 84, in _check_reg_targets
    chec

                        ds      trend   cap  floor  yhat_lower  yhat_upper  \
0      2021-02-01 19:00:00  11.118334  98.0    0.0   -8.156237   26.567408   
1      2021-02-01 19:00:02  11.118348  98.0    0.0   -7.809374   24.501779   
2      2021-02-01 19:00:04  11.118362  98.0    0.0   -6.906516   26.379446   
3      2021-02-01 19:00:06  11.118376  98.0    0.0   -8.575483   25.144945   
4      2021-02-01 19:00:08  11.118389  98.0    0.0   -7.821857   25.265530   
...                    ...        ...   ...    ...         ...         ...   
354602 2021-02-12 23:59:58   7.437503  98.0    0.0   -6.117745   51.928304   
354603 2021-02-13 23:59:58   6.203004  98.0    0.0  -14.908942   68.910137   
354604 2021-02-14 23:59:58   5.161731  98.0    0.0  -12.657050   83.448310   
354605 2021-02-15 23:59:58   4.287090  98.0    0.0  -10.093307   93.060751   
354606 2021-02-16 23:59:58   3.554979  98.0    0.0   -5.746662  101.281119   

        trend_lower  trend_upper  additive_terms  additive_term

ValueError: Found input variables with inconsistent numbers of samples: [293400, 7]

In [None]:
prophet_params = {}
prophet_params["growth"] = "logistic"
prophet_params["seasonality_mode"] = "additive"
prophet_params["weekly_seasonality"] = True
prophet_params["daily_seasonality"] = True
prophet_params["yearly_seasonality"] = False
m = Prophet(**prophet_params)

In [None]:
train['cap'] = cap
train['floor'] = floor

m.fit(train)

future = m.make_future_dataframe(periods=24, freq="m")
future['cap'] = cap
future['floor'] = floor

In [None]:
forecast = m.predict(future)
forecast_plot = m.plot(forecast)

In [None]:
cap = np.max(df.y)
floor = np.min(df.y)

In [None]:
df['cap'] = cap
df['floor'] = floor

m = Prophet(**prophet_params)
m.fit(df)

In [None]:
future = m.make_future_dataframe(periods=7, freq="d")
future['cap'] = cap
future['floor'] = floor
forecast1 = m.predict(future)
fig1 = m.plot(forecast1)

In [None]:
m.plot_components(forecast1)