<a href="https://colab.research.google.com/github/bongkyunSON/Project/blob/main/dacon_bike_221202.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [None]:
from google.colab import drive
drive.mount('/content/drive')

Mounted at /content/drive


In [None]:
from prophet import Prophet
from prophet.diagnostics import cross_validation
from prophet.diagnostics import performance_metrics
from prophet.plot import plot_cross_validation_metric


import itertools
import numpy as np
import pandas as pd
import os
import holidays
import matplotlib.pyplot as plt
import seaborn as sns

In [None]:
base_path = '/content/drive/MyDrive/Colab Notebooks/Project/data/bike/'
train = pd.read_csv(base_path + 'train.csv')
submission = pd.read_csv(base_path + 'sample_submission.csv')
save_path = '/content/drive/MyDrive/Colab Notebooks/Project/dacon/bike/submission/'

In [None]:
train['일시'] = pd.to_datetime(train['일시'], format='%Y%m%d')

In [None]:
regions = ['광진구', '동대문구', '성동구', '중랑구']

In [None]:
def predict2(train: pd.DataFrame, region: str, param_grid: dict):
    """
    Upgraded prediction model with hyper-parameter tuning
    """
    # Make time series dataframe for each region
    df = pd.DataFrame()
    df['ds'] = train['일시']
    df['y'] = train[region]

    # Generate all combinations of parameters
    all_params = [dict(zip(param_grid.keys(), v)) for v in itertools.product(*param_grid.values())]
    maes = []  # Store the RMSEs for each params here

    # Use cross validation to evaluate all parameters
    for params in all_params:
        model = Prophet(**params)
        model.fit(df)  # Fit model with given params
        df_cv = cross_validation(model, horizon='365 days', parallel="processes")
        df_p = performance_metrics(df_cv)
        maes.append(df_p['mae'].values[0])

    # Find the best parameters
    tuning_results = pd.DataFrame(all_params)
    tuning_results['mae'] = maes

    # Sorted by rmes values
    tuning_results = tuning_results.sort_values(by=['mae'])

    # Pick the optimized(having minimum rmse value) hyper-parameter combination
    final_params = tuning_results.iloc[0, :-1].to_dict()

    # Train the final model with optimized params
    m = Prophet(**final_params).fit(df)
    future = m.make_future_dataframe(periods=334, freq='D')
    forecast = m.predict(future)
    
    # Make the predict column for submission
    return final_params, forecast.loc[1461:, 'yhat'].values

In [None]:
import holidays

holiday = pd.DataFrame([])
for date, name in sorted(holidays.KR(years=[2018,2019,2020, 2021]).items()):
    holiday = holiday.append(pd.DataFrame({'ds': date, 'holiday': "KR-Holidays"}, index=[0]), ignore_index=True)
holiday['ds'] = pd.to_datetime(holiday['ds'], format='%Y-%m-%d', errors='ignore')
holiday

Unnamed: 0,ds,holiday
0,2018-01-01,KR-Holidays
1,2018-02-15,KR-Holidays
2,2018-02-16,KR-Holidays
3,2018-02-17,KR-Holidays
4,2018-03-01,KR-Holidays
...,...,...
67,2021-10-03,KR-Holidays
68,2021-10-04,KR-Holidays
69,2021-10-09,KR-Holidays
70,2021-10-11,KR-Holidays


In [None]:
final_params = []
# Set the candidates of hyper-parameters
param_grid = {
    'holidays' : [holiday],
    'changepoint_prior_scale': [0.001, 0.005, 0.01, 0.05, 0.1, 0.5],
    'seasonality_prior_scale': [0.01, 0.05, 0.1, 0.5, 1.0, 5.0, 10.0],
    'seasonality_mode': ['additive', 'multiplicative'],
}
for region in regions:
    temp, submission[region] = predict2(train, region, param_grid)
    final_params.append(temp)

[1;30;43m스트리밍 출력 내용이 길어서 마지막 5000줄이 삭제되었습니다.[0m
INFO:cmdstanpy:Chain [1] start processing
10:14:39 - cmdstanpy - INFO - Chain [1] done processing
INFO:cmdstanpy:Chain [1] done processing
INFO:prophet:Making 1 forecasts with cutoffs between 2020-12-31 00:00:00 and 2020-12-31 00:00:00
INFO:prophet:Applying in parallel with <concurrent.futures.process.ProcessPoolExecutor object at 0x7f8c8e213c10>
DEBUG:cmdstanpy:input tempfile: /tmp/tmp2vt7p5zr/sk_u6uvd.json
DEBUG:cmdstanpy:input tempfile: /tmp/tmp2vt7p5zr/yelzazos.json
DEBUG:cmdstanpy:idx 0
DEBUG:cmdstanpy:running CmdStan, num_threads: None
DEBUG:cmdstanpy:CmdStan args: ['/usr/local/lib/python3.8/dist-packages/prophet/stan_model/prophet_model.bin', 'random', 'seed=83374', 'data', 'file=/tmp/tmp2vt7p5zr/sk_u6uvd.json', 'init=/tmp/tmp2vt7p5zr/yelzazos.json', 'output', 'file=/tmp/tmp2vt7p5zr/prophet_modeltajh6uqv/prophet_model-20221202101439.csv', 'method=optimize', 'algorithm=lbfgs', 'iter=10000']
10:14:39 - cmdstanpy - INFO - Chain [1] 

In [None]:
# Print hyper-parameters after tuning
for region, params in zip(regions, final_params):
    print(f"--------------- Hyper-parameters of {region} ---------------")
    print('changepoint_prior_scale : ', params['changepoint_prior_scale'])
    print('seasonality_prior_scale : ', params['seasonality_prior_scale'])
    print('seasonality_mode        : ', params['seasonality_mode'])

# Check submission file name and define file name
if 'submission_bike.csv' in os.listdir(save_path):
    count = 0
    for name in os.listdir(save_path):
        if 'submission_bike' in name:
            count += 1
    filename = f"submission_bike{count + 1}.csv"
else:
    filename = 'submission_bike.csv'

# Export submission file
submission.to_csv(save_path + filename, index=False)

--------------- Hyper-parameters of 광진구 ---------------
changepoint_prior_scale :  0.1
seasonality_prior_scale :  5.0
seasonality_mode        :  multiplicative
--------------- Hyper-parameters of 동대문구 ---------------
changepoint_prior_scale :  0.05
seasonality_prior_scale :  1.0
seasonality_mode        :  multiplicative
--------------- Hyper-parameters of 성동구 ---------------
changepoint_prior_scale :  0.1
seasonality_prior_scale :  10.0
seasonality_mode        :  multiplicative
--------------- Hyper-parameters of 중랑구 ---------------
changepoint_prior_scale :  0.005
seasonality_prior_scale :  5.0
seasonality_mode        :  multiplicative
