### Test for constrained model with HMC/NUTS sampling

In [1]:
import os
import numpy as np
import pandas as pd

from fbprophet import Prophet

Importing plotly failed. Interactive plots will not work.


In [30]:
df = pd.read_csv(os.path.join('fbprophet','tests' ,'cp_20200613.txt'))

In [31]:
df.columns

Index(['y', 'ds', 'store_count', 'home_rec', 'discount_money', 'discount_gift',
       'attend_restrict_redeem', 'is_global_redeem', 'is_redeem',
       'is_flashsale', 'is_plan_market', 'weighted_redeem_level',
       'global_redeem_benefit', 'restrict_redeem_benefit',
       'weighted_discount_level', 'auto_clear_rate', 'in_stock_rate'],
      dtype='object')

In [32]:
df.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 396 entries, 0 to 395
Data columns (total 17 columns):
 #   Column                   Non-Null Count  Dtype  
---  ------                   --------------  -----  
 0   y                        396 non-null    float64
 1   ds                       396 non-null    object 
 2   store_count              396 non-null    int64  
 3   home_rec                 396 non-null    int64  
 4   discount_money           396 non-null    int64  
 5   discount_gift            396 non-null    int64  
 6   attend_restrict_redeem   396 non-null    int64  
 7   is_global_redeem         396 non-null    float64
 8   is_redeem                396 non-null    float64
 9   is_flashsale             396 non-null    int64  
 10  is_plan_market           396 non-null    int64  
 11  weighted_redeem_level    396 non-null    int64  
 12  global_redeem_benefit    396 non-null    float64
 13  restrict_redeem_benefit  396 non-null    float64
 14  weighted_discount_level  3

In [33]:
df.head()

Unnamed: 0,y,ds,store_count,home_rec,discount_money,discount_gift,attend_restrict_redeem,is_global_redeem,is_redeem,is_flashsale,is_plan_market,weighted_redeem_level,global_redeem_benefit,restrict_redeem_benefit,weighted_discount_level,auto_clear_rate,in_stock_rate
0,27925.0,2020-05-17,42,0,0,0,0,0.0,0.0,3,16,0,0.0,0.0,42.039286,0.0,0.854349
1,1738.0,2020-05-11,42,0,0,0,0,0.0,0.0,0,0,0,0.0,0.0,0.0,0.0,0.916997
2,3156.0,2019-10-07,40,0,0,0,0,0.0,0.0,0,0,0,0.0,0.0,0.0,0.0,0.864427
3,3204.0,2020-03-15,42,0,0,0,0,0.0,0.0,0,0,0,0.0,0.0,0.0,0.0,0.698065
4,2354.0,2020-04-17,42,0,0,0,0,0.0,0.0,0,16,0,0.0,0.0,0.0,0.0,0.854861


In [34]:
df['weighted_discount_level'] = df['weighted_discount_level'].clip(0)
additive_regressor = list(set(df.columns) - set(['ds', 'y']))

In [35]:
additive_regressor

['weighted_redeem_level',
 'restrict_redeem_benefit',
 'is_global_redeem',
 'auto_clear_rate',
 'store_count',
 'attend_restrict_redeem',
 'is_plan_market',
 'is_redeem',
 'is_flashsale',
 'discount_money',
 'global_redeem_benefit',
 'weighted_discount_level',
 'discount_gift',
 'in_stock_rate',
 'home_rec']

In [36]:
model = Prophet(daily_seasonality=False, 
                weekly_seasonality=True, 
                yearly_seasonality=True,
                uncertainty_samples=False,
                stan_backend='PYSTAN',
                mcmc_samples=50,
               )

for i in additive_regressor:
    if i !='in_stock_rate':
        model.add_regressor(i, mode='additive', standardize='auto', constraints=[0, 1e5])
    else:
        model.add_regressor(i, mode='additive', standardize='auto', constraints=[0, 1e5])

In [37]:
model.fit(df, **{'init_r': 0.01})

/home/alexander/Documents/py_projects/git/prophet/fbprophet/stan_model/contrib/prophet_normal_truncated.pkl
Stan backend: PYSTAN




<fbprophet.forecaster.Prophet at 0x7f2fe07b01d0>

### Making quick diagnostics from stansummary
More: 
* https://betanalpha.github.io/assets/case_studies/rstan_workflow.html

* https://arxiv.org/pdf/1903.08008.pdf

In [38]:
print(model.fitted.stansummary())

Inference for Stan model: anon_model_2e615812b02877d4b6be58eb42aec10e.
4 chains, each with iter=50; warmup=25; thin=1; 
post-warmup draws per chain=25, total post-warmup draws=100.

                          mean se_mean     sd    2.5%     25%     50%     75%   97.5%  n_eff   Rhat
k                        -0.39    0.02    0.2   -0.83   -0.52   -0.42   -0.23    0.03    155   0.99
m                         0.16  5.9e-3   0.04    0.08    0.14    0.16    0.18    0.24     40   1.06
delta[1]                  0.01  8.4e-3   0.08   -0.13   -0.03  3.0e-3    0.05    0.21     96   0.99
delta[2]                  0.02  7.2e-3   0.08   -0.15   -0.03    0.02    0.07    0.18    137    1.0
delta[3]                  0.02  7.6e-3   0.07   -0.13   -0.02    0.02    0.06     0.2     90   1.01
delta[4]                  0.04  9.4e-3   0.08   -0.11 -6.7e-3    0.02    0.06    0.29     71   1.05
delta[5]                  0.01  6.2e-3   0.06   -0.11   -0.03    0.01    0.05    0.13     96   1.01
delta[6]          

In [39]:
def get_stansummary(model):
    """
    Make a bit better stan summary
    """
    df = model.train_component_cols.copy(deep=True)
    df.drop(columns='multiplicative_terms', inplace=True)
    real_regname_map = dict()
    for i in df.columns:
        number_of_beta = df.iloc[np.flatnonzero(df[i])].index.values[0]
        real_regname_map.update({'beta[{}]'.format(number_of_beta+1) : i})
    strings_array = model.fitted.stansummary().split(sep='\n')[4:-5]
    values_array = [string.split() for string in strings_array]
    values_array[0].insert(0, 'var')
    dict_stansummary = dict(zip(values_array[0], zip(*values_array[1:])))
    help_list = list(dict_stansummary['var'])
    rev_subs = {beta: name for beta, name in real_regname_map.items()}
    help_list = [rev_subs.get(item, item) for item in help_list]
    dict_stansummary['var'] = tuple(help_list)
    df_stan_results = pd.DataFrame.from_dict(dict_stansummary, orient='columns')
    df_stan_results['Rhat'] = df_stan_results['Rhat'].astype(float)
    return df_stan_results

In [40]:
df_stan_summary = get_stansummary(model)
df_stan_summary

Unnamed: 0,var,mean,se_mean,sd,2.5%,25%,50%,75%,97.5%,n_eff,Rhat
0,k,-0.39,0.02,0.2,-0.83,-0.52,-0.42,-0.23,0.03,155,0.99
1,m,0.16,5.9e-3,0.04,0.08,0.14,0.16,0.18,0.24,40,1.06
2,delta[1],0.01,8.4e-3,0.08,-0.13,-0.03,3.0e-3,0.05,0.21,96,0.99
3,delta[2],0.02,7.2e-3,0.08,-0.15,-0.03,0.02,0.07,0.18,137,1.00
4,delta[3],0.02,7.6e-3,0.07,-0.13,-0.02,0.02,0.06,0.2,90,1.01
...,...,...,...,...,...,...,...,...,...,...,...
106,weighted_discount_level,6.7e-4,1.1e-4,6.4e-4,1.9e-5,1.9e-4,4.4e-4,1.1e-3,2.1e-3,32,1.13
107,discount_gift,0.51,0.08,0.25,0.05,0.28,0.56,0.74,0.87,10,1.19
108,in_stock_rate,6.5e-3,4.4e-4,3.8e-3,6.9e-4,3.5e-3,6.2e-3,9.1e-3,0.02,74,0.98
109,home_rec,8.6e-3,4.0e-4,3.4e-3,3.1e-3,5.3e-3,8.6e-3,0.01,0.02,75,0.98


In [41]:
# Regressors with high difference in beetween-chain and within-chain variance
df_stan_summary[df_stan_summary['Rhat']>1.1]

Unnamed: 0,var,mean,se_mean,sd,2.5%,25%,50%,75%,97.5%,n_eff,Rhat
28,beta_constrained[1],0.53,0.13,0.33,0.02,0.21,0.51,0.86,0.99,6,1.74
29,beta_constrained[2],0.42,0.08,0.28,0.04,0.18,0.33,0.69,0.93,12,1.14
30,beta_constrained[3],0.3,0.07,0.23,0.06,0.12,0.22,0.42,0.83,12,1.17
31,beta_constrained[4],0.36,0.06,0.27,0.02,0.13,0.31,0.56,0.89,18,1.48
33,beta_constrained[6],0.57,0.06,0.27,0.03,0.34,0.6,0.83,0.95,18,1.18
35,beta_constrained[8],0.56,0.13,0.27,0.07,0.3,0.57,0.8,0.99,4,1.68
37,beta_constrained[10],0.45,0.06,0.29,0.05,0.19,0.41,0.7,0.93,20,1.27
38,beta_constrained[11],0.53,0.1,0.27,0.05,0.3,0.55,0.71,0.95,8,1.22
39,beta_constrained[12],0.00067,0.00011,0.00064,1.9e-05,0.00019,0.00044,0.0011,0.0021,32,1.13
40,beta_constrained[13],0.51,0.08,0.25,0.05,0.28,0.56,0.74,0.87,10,1.19


Lets add more samples for Markov chains

In [43]:
model2 = Prophet(daily_seasonality=False, 
                weekly_seasonality=True, 
                yearly_seasonality=True,
                uncertainty_samples=False,
                stan_backend='PYSTAN',
                mcmc_samples=300,
               )

for i in additive_regressor:
    if i !='in_stock_rate':
        model2.add_regressor(i, mode='additive', standardize='auto', constraints=[0, 1e5])
    else:
        model2.add_regressor(i, mode='additive', standardize='auto', constraints=[0, 1e5])

In [44]:
model2.fit(df, **{'init_r': 0.01})

/home/alexander/Documents/py_projects/git/prophet/fbprophet/stan_model/contrib/prophet_normal_truncated.pkl
Stan backend: PYSTAN




<fbprophet.forecaster.Prophet at 0x7f2fe0426710>

In [48]:
df_stan_summary2 = get_stansummary(model2)
df_stan_summary2

Unnamed: 0,var,mean,se_mean,sd,2.5%,25%,50%,75%,97.5%,n_eff,Rhat
0,k,-0.36,0.01,0.22,-0.81,-0.51,-0.36,-0.23,0.1,318,1.00
1,m,0.15,2.0e-3,0.04,0.08,0.13,0.15,0.18,0.22,320,1.00
2,delta[1],2.2e-3,3.0e-3,0.07,-0.15,-0.03,3.9e-3,0.04,0.16,549,1.00
3,delta[2],0.02,3.0e-3,0.08,-0.13,-0.03,9.2e-3,0.06,0.19,698,1.00
4,delta[3],0.02,2.4e-3,0.08,-0.13,-0.02,0.01,0.06,0.2,1069,1.00
...,...,...,...,...,...,...,...,...,...,...,...
106,weighted_discount_level,7.0e-4,2.1e-5,6.7e-4,2.7e-5,2.1e-4,5.1e-4,1.0e-3,2.4e-3,966,1.00
107,discount_gift,0.45,0.01,0.29,0.02,0.19,0.43,0.7,0.96,513,1.00
108,in_stock_rate,6.5e-3,1.5e-4,3.9e-3,5.8e-4,3.6e-3,6.0e-3,8.7e-3,0.02,710,1.00
109,home_rec,8.4e-3,1.7e-4,3.7e-3,1.2e-3,5.8e-3,8.1e-3,0.01,0.02,496,1.00


In [49]:
# Regressors with high difference in beetween-chain and within-chain variance
df_stan_summary2[df_stan_summary2['Rhat']>1.1]

Unnamed: 0,var,mean,se_mean,sd,2.5%,25%,50%,75%,97.5%,n_eff,Rhat


It seems that model2 converges. So the model specification fits the process in the data.

In [50]:
# Check statistics over posterior distributions for 'home_rec', 'is_plan_market', 'is_flashsale', 'store_count'
df_stan_summary2[df_stan_summary2['var'].isin(['home_rec', 'is_plan_market', 'is_flashsale', 'store_count'])]

Unnamed: 0,var,mean,se_mean,sd,2.5%,25%,50%,75%,97.5%,n_eff,Rhat
99,store_count,0.0099,0.00029,0.0084,0.00023,0.0034,0.0074,0.01,0.03,822,1.0
101,is_plan_market,0.03,0.00014,0.0042,0.02,0.02,0.03,0.03,0.04,866,1.0
103,is_flashsale,0.06,0.00012,0.0041,0.05,0.06,0.06,0.07,0.07,1076,1.0
109,home_rec,0.0084,0.00017,0.0037,0.0012,0.0058,0.0081,0.01,0.02,496,1.0


In [51]:
future = model2.make_future_dataframe(periods=0)
for reg in additive_regressor:
    future[reg] = df[reg]
df_forecast = model2.predict(future)

In [52]:
df_forecast.head()

Unnamed: 0,ds,trend,additive_terms,attend_restrict_redeem,auto_clear_rate,discount_gift,discount_money,extra_regressors_additive,global_redeem_benefit,home_rec,...,is_plan_market,is_redeem,restrict_redeem_benefit,store_count,weekly,weighted_discount_level,weighted_redeem_level,yearly,multiplicative_terms,yhat
0,2019-05-08,9428.06085,8798.454134,0.0,0.0,0.0,0.0,13350.205591,0.0,-150.308513,...,1842.016608,0.0,0.0,1011.203414,709.686992,24.660704,0.0,-5261.43845,0.0,18226.514984
1,2019-05-09,9371.692273,-7347.191189,0.0,0.0,0.0,0.0,-1290.449177,0.0,-150.308513,...,-1076.46732,0.0,0.0,1011.203414,-712.878189,-10.553742,0.0,-5343.863822,0.0,2024.501084
2,2019-05-10,9315.323696,-8923.364816,0.0,0.0,0.0,0.0,-2784.149161,0.0,-150.308513,...,-1076.46732,0.0,0.0,-369.612282,-752.650188,-10.553742,0.0,-5386.565467,0.0,391.958881
3,2019-05-11,9258.95512,-6752.061205,0.0,0.0,0.0,0.0,-1760.562199,0.0,-150.308513,...,-1076.46732,0.0,0.0,1011.203414,398.478933,-10.553742,0.0,-5389.977939,0.0,2506.893915
4,2019-05-12,9202.586543,-2750.850711,0.0,0.0,0.0,0.0,1494.609425,0.0,-150.308513,...,1842.016608,0.0,0.0,1011.203414,1109.82362,-10.553742,0.0,-5355.283756,0.0,6451.735832
