In [1]:
import os

#import libraries for data wrangling
import pandas as pd
import numpy as np

#import libraries for plotting data
import matplotlib.pyplot as plt
import seaborn as sns
import plotly.express as px
import plotly.graph_objects as go
from plotly.subplots import make_subplots

#import libraries for time series analysis
from fbprophet import Prophet

#import libraries to evaluate model performance
import sklearn.metrics as metrics

In [2]:
train_df, validation_df, test_df = pd.read_csv('data/train_fil_3.csv',index_col=0),pd.read_csv('data/validation_fil_3.csv',index_col=0),pd.read_csv('data/test_fil_3.csv',index_col=0)
for df in [train_df,validation_df,test_df]:
    df.columns = [int(col) for col in df.columns]

In [3]:
#function to calculate MAPE for all observations where y_true is not 0
def mape(y_true, y_predict):
    '''Returns mean percentage error for all predictions where y_true is not 0. Where y_true is 0, the percentage error is 0 as well '''
    return np.mean([np.absolute(y_true[idx] - y_predict[idx])/y_true[idx] * 100 if y_true[idx] != 0 else 0 for idx,_ in enumerate(y_true) ])

def median_pe(y_true, y_predict):
    '''Returns mean percentage error for all predictions where y_true is not 0. Where y_true is 0, the percentage error is 0 as well '''
    return np.median([np.absolute(y_true[idx] - y_predict[idx])/y_true[idx] * 100 if y_true[idx] != 0 else 0 for idx,_ in enumerate(y_true) ])

In [4]:
#function to call fb prophet with only y-column and date index
def low_prophet(ds,y):
    data = pd.DataFrame.from_dict({'ds':ds,'y':y})
    model = Prophet()
    model.fit(data)
    future = model.make_future_dataframe(periods=365)
    prediction = model.predict(future)
    y_true = data['y']
    y_predict = prediction['yhat'][:len(y_true)]
    print(f'Filiale {store}, Article {article}, MAPE:{round(mape(y_true,y_predict),1)}')
    out = pd.Series(prediction['yhat'][len(y_true):])
    out.index = pd.to_datetime(prediction['ds'][len(y_true):])
    model.plot(prediction)
    return(out)

In [5]:
def simple_prophet_forecast(history,stepsize = 1):
    '''History must be dataframe with at least the date column "ds" and the y column "yhat" '''
    model = Prophet()
    model_fit = model.fit(history)
    future = model_fit.make_future_dataframe(periods = stepsize)
    yhat = model_fit.predict(future)
    return yhat
    

In [8]:
test = simple_prophet_forecast(pd.DataFrame.from_dict({'ds':train_df[6].index,'y':train_df[6]}),stepsize=365)

INFO:fbprophet:Disabling daily seasonality. Run prophet with daily_seasonality=True to override this.

Conversion of the second argument of issubdtype from `float` to `np.floating` is deprecated. In future, it will be treated as `np.float64 == np.dtype(float).type`.



In [9]:
test

Unnamed: 0,ds,trend,yhat_lower,yhat_upper,trend_lower,trend_upper,additive_terms,additive_terms_lower,additive_terms_upper,weekly,weekly_lower,weekly_upper,yearly,yearly_lower,yearly_upper,multiplicative_terms,multiplicative_terms_lower,multiplicative_terms_upper,yhat
0,2015-01-01,518.410546,229.666337,531.105552,518.410546,518.410546,-142.163116,-142.163116,-142.163116,-89.351454,-89.351454,-89.351454,-52.811662,-52.811662,-52.811662,0.0,0.0,0.0,376.247430
1,2015-01-02,518.368621,166.009822,455.734170,518.368621,518.368621,-197.034126,-197.034126,-197.034126,-142.617218,-142.617218,-142.617218,-54.416908,-54.416908,-54.416908,0.0,0.0,0.0,321.334495
2,2015-01-03,518.326695,480.179187,762.714745,518.326695,518.326695,107.805446,107.805446,107.805446,163.478308,163.478308,163.478308,-55.672862,-55.672862,-55.672862,0.0,0.0,0.0,626.132141
3,2015-01-04,518.284769,884.326915,1179.783683,518.284769,518.284769,512.500450,512.500450,512.500450,569.074100,569.074100,569.074100,-56.573651,-56.573651,-56.573651,0.0,0.0,0.0,1030.785219
4,2015-01-05,518.242843,152.713278,432.180732,518.242843,518.242843,-216.108034,-216.108034,-216.108034,-158.990041,-158.990041,-158.990041,-57.117993,-57.117993,-57.117993,0.0,0.0,0.0,302.134809
5,2015-01-06,518.200917,134.701472,425.303443,518.200917,518.200917,-236.307710,-236.307710,-236.307710,-178.998655,-178.998655,-178.998655,-57.309055,-57.309055,-57.309055,0.0,0.0,0.0,281.893207
6,2015-01-07,518.158991,145.514497,452.169767,518.158991,518.158991,-219.749247,-219.749247,-219.749247,-162.595039,-162.595039,-162.595039,-57.154208,-57.154208,-57.154208,0.0,0.0,0.0,298.409744
7,2015-01-08,518.117066,221.367281,520.990527,518.117066,518.117066,-146.016149,-146.016149,-146.016149,-89.351454,-89.351454,-89.351454,-56.664694,-56.664694,-56.664694,0.0,0.0,0.0,372.100917
8,2015-01-09,518.075140,177.895291,456.780804,518.075140,518.075140,-198.472426,-198.472426,-198.472426,-142.617218,-142.617218,-142.617218,-55.855208,-55.855208,-55.855208,0.0,0.0,0.0,319.602714
9,2015-01-10,518.033214,489.066928,773.762589,518.033214,518.033214,108.734903,108.734903,108.734903,163.478308,163.478308,163.478308,-54.743404,-54.743404,-54.743404,0.0,0.0,0.0,626.768117


In [17]:
mape(validation_df[6],test['yhat'][len(train_df):].values)

10.874769742804403

In [16]:
len(test['yhat'][len(train_df):]) - len(validation_df[6])

0

In [21]:
mape(validation_df[6],[np.mean(validation_df[6]) for i in range(len(validation_df))])

40.03500857919871