In [48]:
#!/usr/bin/python           
""" 
In this notebook, the functions for training and predicting with Prophet are defined.
"""

import os
import pandas as pd
from fbprophet import Prophet
from cslib import fetch_ts
import re
from logger import update_predict_log, update_train_log
import time
import numpy as np
import random

from sklearn.metrics import mean_squared_error, mean_absolute_error

In [2]:
MODEL_VERSION = 0.1
MODEL_VERSION_NOTE = "Prophet POC"

### HyperParameter Tuning using ParameterGrid

In [69]:
#### see this : https://www.kaggle.com/manovirat/timeseries-using-prophet-hyperparameter-tuning

    
from sklearn.model_selection import ParameterGrid
params_grid = {'seasonality_mode':('multiplicative','additive')
#                ,'changepoint_prior_scale':[0.1,0.3,0.5]
#           'n_changepoints' : [100,200]
              }
grid = ParameterGrid(params_grid)
cnt = 0
for p in grid:
    cnt = cnt+1

print('Total Possible Models',cnt)

Total Possible Models 2


### Prophet Model Tuning

In [70]:
country = "spain"
filename="./data/cs_train/data/ts-"+country+".csv"
df = pd.read_csv(filename)
print( df.date.min(),df.date.max())    

df.rename( columns={'date':'ds', 'revenue':'y'}, inplace=True)
X = df[['ds','y']].copy()
size = int(len(X) * 0.70)
train, test = X[0:size], X[size:]

# train.rename( columns={'date':'ds', 'revenue':'y'}, inplace=True)
# test.rename( columns={'date':'ds', 'revenue':'y'}, inplace=True)

print(train.shape, test.shape)

2017-11-01 2019-06-30
(424, 2) (183, 2)


In [75]:
  
strt =   df.ds.min()
end  =   df.iloc[size]['ds']
end_  =   df.iloc[train.shape[0]+60]['ds']   # test cv data

model_parameters = pd.DataFrame(columns = ['MSE','Parameters'])
for p in grid:
    test_ = pd.DataFrame()
    print(p)
    random.seed(0)
    train_model =Prophet(
#         changepoint_prior_scale = p['changepoint_prior_scale'],
#                          n_changepoints = p['n_changepoints'],
                         seasonality_mode = p['seasonality_mode'],
                         weekly_seasonality=True,
                         daily_seasonality = True,
                         yearly_seasonality = True,
                         interval_width=0.95)
    train_model.fit(train)
    train_forecast = train_model.make_future_dataframe(periods=60, freq='D',include_history = False)
    train_forecast = train_model.predict(train_forecast)
    
    test_=train_forecast[['ds','yhat']]
    Actual = df[(df['ds']> train.ds.max()) & (df['ds']< end_)]
    
    
    MSE = mean_squared_error(Actual['y'],abs(test_['yhat']))
    print('Mean Square Error(MSE)------------------------------------',MSE)
    model_parameters = model_parameters.append({'MSE':MSE,'Parameters':p},ignore_index=True)    

{'seasonality_mode': 'multiplicative'}



Conversion of the second argument of issubdtype from `float` to `np.floating` is deprecated. In future, it will be treated as `np.float64 == np.dtype(float).type`.



Mean Square Error(MSE)------------------------------------ 101058.98727235243
{'seasonality_mode': 'additive'}



Conversion of the second argument of issubdtype from `float` to `np.floating` is deprecated. In future, it will be treated as `np.float64 == np.dtype(float).type`.



Mean Square Error(MSE)------------------------------------ 95859.11271378021


In [76]:
parameters = model_parameters.sort_values(by=['MSE'])
parameters = parameters.reset_index(drop=True)
parameters.head()

Unnamed: 0,MSE,Parameters
0,95859.112714,{'seasonality_mode': 'additive'}
1,101058.987272,{'seasonality_mode': 'multiplicative'}


In [114]:
# parameters['Parameters'][0]['seasonality_mode']
parameters['Parameters'][0]
          

{'seasonality_mode': 'additive'}

In [112]:
def model_train():
    ## start timer for runtime
    time_start = time.time()
    data_dir = os.path.join("data","cs_train","data")
    ts_data = fetch_ts(data_dir)

    for country,df in ts_data.items():
        m =Prophet(
#         changepoint_prior_scale = p['changepoint_prior_scale'],
#                          n_changepoints = p['n_changepoints'],
        seasonality_mode = parameters['Parameters'][0]['seasonality_mode'],
        weekly_seasonality=True, daily_seasonality = True, yearly_seasonality = True,interval_width=0.95)
            
        df2=df[["date","revenue"]]
        df2.columns = ['ds', 'y']
        m.fit(df2)
        future = m.make_future_dataframe(periods=120) # predict future, 120 days
        forecast = m.predict(future)
        forecast['country'] = country
        
        forecast[['ds', 'yhat', 'yhat_lower', 'yhat_upper']].tail()
#         filename="data/forecasts/forecast_" + country+".csv"
#         forecast.to_csv(filename)
    
    ## update the log file
    m, s = divmod(time.time()-time_start, 60)
    h, m = divmod(m, 60)
    runtime = "%03d:%02d:%02d"%(h, m, s)
    test=False
    update_train_log(forecast.shape, runtime, MODEL_VERSION, MODEL_VERSION_NOTE, test)

    return True


In [81]:
def model_predict(country, year, month, day):
    time_start = time.time()
    data_dir = os.path.join("data","cs_train","data")
    ts_data = fetch_ts(data_dir)
    countries=[]
    for c,df in ts_data.items():
        countries.append(c)

    if(country not in countries):
        text="Could not find country called "+ country+".csv"
        return(text)
    else:
        filename="./data/forecasts/forecast_"+country+".csv"
        forecasts = pd.read_csv(filename)
        date_str=year + "-" + month + "-" + day
        row=forecasts.loc[forecasts['ds'] == date_str]
        return row


In [82]:
model_predict("spain","2018","01","01")          # show forecast row for a country and date


... loading ts data from files


Unnamed: 0.1,Unnamed: 0,ds,trend,yhat_lower,yhat_upper,trend_lower,trend_upper,additive_terms,additive_terms_lower,additive_terms_upper,weekly,weekly_lower,weekly_upper,multiplicative_terms,multiplicative_terms_lower,multiplicative_terms_upper,yhat,country
61,61,2018-01-01,26.102484,-139.309748,196.317002,26.102484,26.102484,7.490659,7.490659,7.490659,7.490659,7.490659,7.490659,0.0,0.0,0.0,33.593143,spain


In [113]:
model_train()

... loading ts data from files



Conversion of the second argument of issubdtype from `float` to `np.floating` is deprecated. In future, it will be treated as `np.float64 == np.dtype(float).type`.



True

## Enf notebook