In [1]:
import os
import json
import itertools
import random
import timeit

#import libraries for data wrangling
import pandas as pd
import numpy as np

#import custom libraries for adding lag-features and plotting
from little_helpers import *
import feature_functions as ff

#import facebook prophet modules
from fbprophet import Prophet
#import libraries to store and load prophet models
import pickle



In [2]:
#import train, validation and test data
train_df, validation_df, test_df = pd.read_csv('data/train_fil_3.csv',index_col=0),pd.read_csv('data/validation_fil_3.csv',index_col=0),pd.read_csv('data/test_fil_3.csv',index_col=0)
for df in [train_df,validation_df,test_df]:
    df.columns = [int(col) for col in df.columns]
    df.index = pd.to_datetime(df.index)

In [3]:
#function to get dataframe into prophet format. Assumes datetime index
def df_to_prophet(dataframe,y_column):
    df = dataframe.copy()
    df['ds'] = df.index
    df['y'] = df[y_column]
    df = df[['ds','y']]
    return df

In [4]:
#create search grit
param_grid = {'growth': ['linear'],
               'changepoint_prior_scale': [0.001, 0.01, 0.1, 0.5],
               'seasonality_prior_scale': [round(x,1) for x in np.linspace(0.1, 10, num = 5)],
               'holidays_prior_scale': [round(x,1) for x in np.linspace(0.1, 10, num = 5)]    
                }

all_params = [dict(zip(param_grid.keys(), v)) for v in itertools.product(*param_grid.values())]

#create tiny grid to test algorithm
tiny_grid = all_params[5:7]



In [5]:
def prophet_step_forward(history,params,data=1,regressors=[], holidays=None):
    hist = history
    predictions = []
    last_model = Prophet(**params)
    last_model.add_country_holidays(country_name='DE')
    
    #if data is inserted, predict for all observations. Else, predict only 1 day
    if type(data)==type(1):
        steps = 1
    else:
        steps = len(data)
    
    for i in range(1,steps):
        model = Prophet(**params)
        model.add_country_holidays(country_name='DE')
        
        for reg in regressors:
            model.add_regressor(reg)
        
        
        model.fit(history)
        future = model.make_future_dataframe(periods = 1)
        forecast = model.predict(future)
        predictions.append(forecast['yhat'])
        
        history = history.append(data[i-1:i])
    
    return predictions

In [6]:
def save_model(model,path):
    model.stan_backend.logger = None
    with open(path,'wb') as pkl:
        pickle.dump(model,pkl)
        
def load_model(path):
    with open(path,'rb') as pkl:
        return pickle.load(pkl)

In [7]:
def prophet_prediction(history,params,data,regressors=[], holidays=None, return_model=False):
        
    #initialize model
    model = Prophet(**params)
    
    #add public holidays
    model.add_country_holidays(country_name='DE')
        
    for reg in regressors:
        model.add_regressor(reg)
        
        
    model.fit(history)
    future = model.make_future_dataframe(periods = len(data),include_history=False)
    
    for reg in regressors:
        #print(f'Adding regressor: {reg}')
        future[reg] = np.array(data[reg])
        #print(future[reg])
    
    forecast = model.predict(future)
    
    if return_model == True:
        return model
    else:
        return np.array(forecast['yhat'])
        

In [8]:
#create random search function for prophet. When no time or iteration stop is given, it is a classical grid search with the exception, that combinations are chosen in random order
def random_search_prophet(history,param_grid,future_data,regressors=[],holidays=None,optimize='MAPE',iter_stop = 500):
    start = pd.datetime.now()    
    time = 0
    
    iterations = 0
    n_combinations = len(param_grid)
    
    grid = param_grid.copy()
    best_model = []
    best_error = []
    best_params = []
    
    #here is room to add other error functions in the future such as mse
    if optimize == 'MAPE':
        #use custom MAPE function
        errorfun = mape
    
    for combination in param_grid:
        #calculate current time in minutes
        time = (pd.datetime.now() - start).seconds / 60
        print(f'starting combi {combination}')
        if iterations <= iter_stop:
            #print('choosing random params')
            #randomly choose an entry from param grid, use it and delete it from remaining grid
            rand_index = random.choice(range(len(grid)))
            parameter = grid.pop(rand_index)

            y_predict = prophet_prediction(history=history,params=parameter,data=future_data,regressors=regressors)
            error = errorfun(future_data['y'],y_predict)
            iterations += 1

            if len(best_error) == 0: 
                best_error = [error]
                best_model = [prophet_prediction(history=history,params=parameter,data=future_data,regressors=regressors,return_model=True)]
                best_params = [parameter]   

            elif error < best_error[0]:
                #print('added new best model')
                best_error = [error]
                best_model = [prophet_prediction(history=history,params=parameter,data=future_data,regressors=regressors,return_model=True)]
                best_params = [parameter]
        
        else: return best_model[0],best_error[0],best_params[0]
    
    return best_model[0],best_error[0],best_params[0]

In [9]:
for col in test_df.columns[:3]:
    regressors = ['lag_7', 'lag_14', 'weekdaymean_2_weeks', 'weekdaymean_4_weeks']
    grid = all_params
    iter_stop = np.sqrt(len(grid))*3
    
    #get dataframe into prophet format
    history = df_to_prophet(train_df,col)
    future = df_to_prophet(validation_df,col)
    
    #add lag-features and moving averages
    history = ff.apply_time_featurefun(history,'y')
    future = ff.apply_time_featurefun(future,'y',history_df=history)
  
    #add column 'ds' that is lost in featurefun
    history['ds'] = history.index
    history.dropna(inplace=True)
    future['ds'] = future.index

 
    best_model, best_error, best_params = random_search_prophet(history=history,param_grid=grid,future_data=future,regressors=regressors,iter_stop=iter_stop)
    y_predict = prophet_prediction(history, best_params, future,regressors=regressors)
    
    
    save_model(model=best_model,path= f'models/3_{col}_SimpleProphet_{round(best_error)}.pkl')
    pd.DataFrame.from_dict({f'3_{col}_SimpleProphet_validation':y_predict}).to_csv(f'predictions/3_{col}_SimpleProphet_validation.csv')
    print(f'Saved prediction for article {col} validation set with MAPE {round(best_error)} \n###########################################################################')

starting combi {'growth': 'linear', 'changepoint_prior_scale': 0.001, 'seasonality_prior_scale': 0.1, 'holidays_prior_scale': 0.1}


INFO:fbprophet:Disabling daily seasonality. Run prophet with daily_seasonality=True to override this.

Conversion of the second argument of issubdtype from `float` to `np.floating` is deprecated. In future, it will be treated as `np.float64 == np.dtype(float).type`.



0        0.000000
1        0.000000
2      107.127150
3      123.547785
4       96.412388
5       79.960061
6        0.000000
7        0.000000
8      124.485251
9       76.555788
10     118.471408
11     115.201827
12      90.615958
13       0.000000
14      93.714252
15     124.367332
16     111.816976
17     124.396005
18      88.660113
19     100.265628
20       0.000000
21     104.440486
22     105.442185
23      98.061935
24     124.689385
25     110.142376
26     109.658172
27       0.000000
28     118.054553
29     110.473932
          ...    
335     96.729025
336    112.533806
337    109.356492
338    104.295688
339    111.262286
340    121.081876
341    106.972669
342     76.838798
343    133.348133
344    105.825789
345    102.247263
346    133.807019
347    116.627416
348    128.660684
349     90.939650
350    117.668696
351    109.436590
352    125.390943
353    122.576306
354    107.552603
355     90.863536
356      0.000000
357    121.884508
358     91.028676
359     95

INFO:fbprophet:Disabling daily seasonality. Run prophet with daily_seasonality=True to override this.

Conversion of the second argument of issubdtype from `float` to `np.floating` is deprecated. In future, it will be treated as `np.float64 == np.dtype(float).type`.



0        0.000000
1        0.000000
2      107.127150
3      123.547785
4       96.412388
5       79.960061
6        0.000000
7        0.000000
8      124.485251
9       76.555788
10     118.471408
11     115.201827
12      90.615958
13       0.000000
14      93.714252
15     124.367332
16     111.816976
17     124.396005
18      88.660113
19     100.265628
20       0.000000
21     104.440486
22     105.442185
23      98.061935
24     124.689385
25     110.142376
26     109.658172
27       0.000000
28     118.054553
29     110.473932
          ...    
335     96.729025
336    112.533806
337    109.356492
338    104.295688
339    111.262286
340    121.081876
341    106.972669
342     76.838798
343    133.348133
344    105.825789
345    102.247263
346    133.807019
347    116.627416
348    128.660684
349     90.939650
350    117.668696
351    109.436590
352    125.390943
353    122.576306
354    107.552603
355     90.863536
356      0.000000
357    121.884508
358     91.028676
359     95

INFO:fbprophet:Disabling daily seasonality. Run prophet with daily_seasonality=True to override this.


starting combi {'growth': 'linear', 'changepoint_prior_scale': 0.001, 'seasonality_prior_scale': 0.1, 'holidays_prior_scale': 2.6}



Conversion of the second argument of issubdtype from `float` to `np.floating` is deprecated. In future, it will be treated as `np.float64 == np.dtype(float).type`.



0        0.000000
1        0.000000
2      107.127150
3      123.547785
4       96.412388
5       79.960061
6        0.000000
7        0.000000
8      124.485251
9       76.555788
10     118.471408
11     115.201827
12      90.615958
13       0.000000
14      93.714252
15     124.367332
16     111.816976
17     124.396005
18      88.660113
19     100.265628
20       0.000000
21     104.440486
22     105.442185
23      98.061935
24     124.689385
25     110.142376
26     109.658172
27       0.000000
28     118.054553
29     110.473932
          ...    
335     96.729025
336    112.533806
337    109.356492
338    104.295688
339    111.262286
340    121.081876
341    106.972669
342     76.838798
343    133.348133
344    105.825789
345    102.247263
346    133.807019
347    116.627416
348    128.660684
349     90.939650
350    117.668696
351    109.436590
352    125.390943
353    122.576306
354    107.552603
355     90.863536
356      0.000000
357    121.884508
358     91.028676
359     95

INFO:fbprophet:Disabling daily seasonality. Run prophet with daily_seasonality=True to override this.

Conversion of the second argument of issubdtype from `float` to `np.floating` is deprecated. In future, it will be treated as `np.float64 == np.dtype(float).type`.



0        0.000000
1        0.000000
2      107.127150
3      123.547785
4       96.412388
5       79.960061
6        0.000000
7        0.000000
8      124.485251
9       76.555788
10     118.471408
11     115.201827
12      90.615958
13       0.000000
14      93.714252
15     124.367332
16     111.816976
17     124.396005
18      88.660113
19     100.265628
20       0.000000
21     104.440486
22     105.442185
23      98.061935
24     124.689385
25     110.142376
26     109.658172
27       0.000000
28     118.054553
29     110.473932
          ...    
335     96.729025
336    112.533806
337    109.356492
338    104.295688
339    111.262286
340    121.081876
341    106.972669
342     76.838798
343    133.348133
344    105.825789
345    102.247263
346    133.807019
347    116.627416
348    128.660684
349     90.939650
350    117.668696
351    109.436590
352    125.390943
353    122.576306
354    107.552603
355     90.863536
356      0.000000
357    121.884508
358     91.028676
359     95

INFO:fbprophet:Disabling daily seasonality. Run prophet with daily_seasonality=True to override this.

Conversion of the second argument of issubdtype from `float` to `np.floating` is deprecated. In future, it will be treated as `np.float64 == np.dtype(float).type`.



starting combi {'growth': 'linear', 'changepoint_prior_scale': 0.001, 'seasonality_prior_scale': 0.1, 'holidays_prior_scale': 5.0}
0        0.000000
1        0.000000
2      107.127150
3      123.547785
4       96.412388
5       79.960061
6        0.000000
7        0.000000
8      124.485251
9       76.555788
10     118.471408
11     115.201827
12      90.615958
13       0.000000
14      93.714252
15     124.367332
16     111.816976
17     124.396005
18      88.660113
19     100.265628
20       0.000000
21     104.440486
22     105.442185
23      98.061935
24     124.689385
25     110.142376
26     109.658172
27       0.000000
28     118.054553
29     110.473932
          ...    
335     96.729025
336    112.533806
337    109.356492
338    104.295688
339    111.262286
340    121.081876
341    106.972669
342     76.838798
343    133.348133
344    105.825789
345    102.247263
346    133.807019
347    116.627416
348    128.660684
349     90.939650
350    117.668696
351    109.436590
352  

INFO:fbprophet:Disabling daily seasonality. Run prophet with daily_seasonality=True to override this.

Conversion of the second argument of issubdtype from `float` to `np.floating` is deprecated. In future, it will be treated as `np.float64 == np.dtype(float).type`.



starting combi {'growth': 'linear', 'changepoint_prior_scale': 0.001, 'seasonality_prior_scale': 0.1, 'holidays_prior_scale': 7.5}
0        0.000000
1        0.000000
2      107.127150
3      123.547785
4       96.412388
5       79.960061
6        0.000000
7        0.000000
8      124.485251
9       76.555788
10     118.471408
11     115.201827
12      90.615958
13       0.000000
14      93.714252
15     124.367332
16     111.816976
17     124.396005
18      88.660113
19     100.265628
20       0.000000
21     104.440486
22     105.442185
23      98.061935
24     124.689385
25     110.142376
26     109.658172
27       0.000000
28     118.054553
29     110.473932
          ...    
335     96.729025
336    112.533806
337    109.356492
338    104.295688
339    111.262286
340    121.081876
341    106.972669
342     76.838798
343    133.348133
344    105.825789
345    102.247263
346    133.807019
347    116.627416
348    128.660684
349     90.939650
350    117.668696
351    109.436590
352  

INFO:fbprophet:Disabling daily seasonality. Run prophet with daily_seasonality=True to override this.


starting combi {'growth': 'linear', 'changepoint_prior_scale': 0.001, 'seasonality_prior_scale': 0.1, 'holidays_prior_scale': 10.0}



Conversion of the second argument of issubdtype from `float` to `np.floating` is deprecated. In future, it will be treated as `np.float64 == np.dtype(float).type`.



0        0.000000
1        0.000000
2      107.127150
3      123.547785
4       96.412388
5       79.960061
6        0.000000
7        0.000000
8      124.485251
9       76.555788
10     118.471408
11     115.201827
12      90.615958
13       0.000000
14      93.714252
15     124.367332
16     111.816976
17     124.396005
18      88.660113
19     100.265628
20       0.000000
21     104.440486
22     105.442185
23      98.061935
24     124.689385
25     110.142376
26     109.658172
27       0.000000
28     118.054553
29     110.473932
          ...    
335     96.729025
336    112.533806
337    109.356492
338    104.295688
339    111.262286
340    121.081876
341    106.972669
342     76.838798
343    133.348133
344    105.825789
345    102.247263
346    133.807019
347    116.627416
348    128.660684
349     90.939650
350    117.668696
351    109.436590
352    125.390943
353    122.576306
354    107.552603
355     90.863536
356      0.000000
357    121.884508
358     91.028676
359     95

INFO:fbprophet:Disabling daily seasonality. Run prophet with daily_seasonality=True to override this.


starting combi {'growth': 'linear', 'changepoint_prior_scale': 0.001, 'seasonality_prior_scale': 2.6, 'holidays_prior_scale': 0.1}



Conversion of the second argument of issubdtype from `float` to `np.floating` is deprecated. In future, it will be treated as `np.float64 == np.dtype(float).type`.



0        0.000000
1        0.000000
2      107.127150
3      123.547785
4       96.412388
5       79.960061
6        0.000000
7        0.000000
8      124.485251
9       76.555788
10     118.471408
11     115.201827
12      90.615958
13       0.000000
14      93.714252
15     124.367332
16     111.816976
17     124.396005
18      88.660113
19     100.265628
20       0.000000
21     104.440486
22     105.442185
23      98.061935
24     124.689385
25     110.142376
26     109.658172
27       0.000000
28     118.054553
29     110.473932
          ...    
335     96.729025
336    112.533806
337    109.356492
338    104.295688
339    111.262286
340    121.081876
341    106.972669
342     76.838798
343    133.348133
344    105.825789
345    102.247263
346    133.807019
347    116.627416
348    128.660684
349     90.939650
350    117.668696
351    109.436590
352    125.390943
353    122.576306
354    107.552603
355     90.863536
356      0.000000
357    121.884508
358     91.028676
359     95

INFO:fbprophet:Disabling daily seasonality. Run prophet with daily_seasonality=True to override this.


starting combi {'growth': 'linear', 'changepoint_prior_scale': 0.001, 'seasonality_prior_scale': 2.6, 'holidays_prior_scale': 2.6}



Conversion of the second argument of issubdtype from `float` to `np.floating` is deprecated. In future, it will be treated as `np.float64 == np.dtype(float).type`.



0        0.000000
1        0.000000
2      107.127150
3      123.547785
4       96.412388
5       79.960061
6        0.000000
7        0.000000
8      124.485251
9       76.555788
10     118.471408
11     115.201827
12      90.615958
13       0.000000
14      93.714252
15     124.367332
16     111.816976
17     124.396005
18      88.660113
19     100.265628
20       0.000000
21     104.440486
22     105.442185
23      98.061935
24     124.689385
25     110.142376
26     109.658172
27       0.000000
28     118.054553
29     110.473932
          ...    
335     96.729025
336    112.533806
337    109.356492
338    104.295688
339    111.262286
340    121.081876
341    106.972669
342     76.838798
343    133.348133
344    105.825789
345    102.247263
346    133.807019
347    116.627416
348    128.660684
349     90.939650
350    117.668696
351    109.436590
352    125.390943
353    122.576306
354    107.552603
355     90.863536
356      0.000000
357    121.884508
358     91.028676
359     95

INFO:fbprophet:Disabling daily seasonality. Run prophet with daily_seasonality=True to override this.


starting combi {'growth': 'linear', 'changepoint_prior_scale': 0.001, 'seasonality_prior_scale': 2.6, 'holidays_prior_scale': 5.0}



Conversion of the second argument of issubdtype from `float` to `np.floating` is deprecated. In future, it will be treated as `np.float64 == np.dtype(float).type`.



0        0.000000
1        0.000000
2      107.127150
3      123.547785
4       96.412388
5       79.960061
6        0.000000
7        0.000000
8      124.485251
9       76.555788
10     118.471408
11     115.201827
12      90.615958
13       0.000000
14      93.714252
15     124.367332
16     111.816976
17     124.396005
18      88.660113
19     100.265628
20       0.000000
21     104.440486
22     105.442185
23      98.061935
24     124.689385
25     110.142376
26     109.658172
27       0.000000
28     118.054553
29     110.473932
          ...    
335     96.729025
336    112.533806
337    109.356492
338    104.295688
339    111.262286
340    121.081876
341    106.972669
342     76.838798
343    133.348133
344    105.825789
345    102.247263
346    133.807019
347    116.627416
348    128.660684
349     90.939650
350    117.668696
351    109.436590
352    125.390943
353    122.576306
354    107.552603
355     90.863536
356      0.000000
357    121.884508
358     91.028676
359     95

INFO:fbprophet:Disabling daily seasonality. Run prophet with daily_seasonality=True to override this.


starting combi {'growth': 'linear', 'changepoint_prior_scale': 0.001, 'seasonality_prior_scale': 2.6, 'holidays_prior_scale': 7.5}



Conversion of the second argument of issubdtype from `float` to `np.floating` is deprecated. In future, it will be treated as `np.float64 == np.dtype(float).type`.



SystemError: <class 'stanfit4anon_model_888a84912910fa0a45b9d614b75bb8a8_1204144898456148612.PyStanHolder'> returned a result with an error set

In [None]:
jo = pd.read_csv('predictions/3_6_SimpleProphet_validation.csv',index_col=0)

In [None]:
y_predict= jo['3_6_SimpleProphet_validation']
y_true = validation_df[6]
mape(y_true,y_predict)

In [None]:
plot_prediction(y_true,y_predict)