In [1]:
%load_ext autoreload
%autoreload 2

In [2]:
import sys
sys.path.append("../scripts")

# Utils

In [3]:
%%writefile "../scripts/utils.py"

import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import progressbar
import time

class ProgressBar:
    def __init__(self, max_value):
        time.sleep(0.5)
        self.bar = progressbar.ProgressBar(
            min_value=0,
            max_value=max_value,
            initial_value=0,
            widgets = [progressbar.SimpleProgress(), 
                       progressbar.Bar(), 
                       progressbar.Percentage()])
        self.bar.update(0)
        self.counter = 0
    
    def update(self):
        self.bar.update(self.counter + 1)
        self.counter += 1
        
    def finish(self):
        self.bar.finish()       

def flatten(x):
    return [z for y in x for z in y]

def plot_grid(df, n_cols, figsize):
    n_rows = int(np.ceil(len(df.columns)/n_cols))
    df.plot(subplots=True, layout=(n_rows, n_cols), figsize=figsize)

def plot_fcst(fcst, train=None, test=None, ax=None):
    lower = fcst.yhat_lower.interpolate()
    upper = fcst.yhat_upper.interpolate()
    if ax is None:
        f, ax = plt.subplots(1, 1)
    if train is not None:
        train.plot(style="k.", ax=ax)
    if test is not None:
        test.plot(style="r.", ax=ax)
    fcst.yhat.plot(ax=ax)
    ax.fill_between(fcst.index, y1=lower, y2=upper, alpha=0.3)

def get_amount_info(df):
    amount_info = df.notna().sum().sort_values() / len(df)
    return amount_info

def get_forecastables(df, T=0.5, N=None):
    amount_info = get_amount_info(df)
    forecastable = (amount_info > T)
    if N is not None:
        forecastable = forecastable.tail(N)
    return df.loc[:, forecastable.index].copy()


Overwriting ../scripts/utils.py


# Forecaster

In [58]:
%%writefile "../scripts/forecaster.py"

import numpy as np
import pandas as pd
from scipy.stats import boxcox
from scipy.special import inv_boxcox
from fbprophet import Prophet
pd.plotting.register_matplotlib_converters()
import pmdarima as pm
from utils import ProgressBar

class ProphetForecaster:
    def __init__(
        self,
        use_boxcox=True,
        yearly_seasonality="auto",
        weekly_seasonality=False,
        daily_seasonality=False,
        confidence_interval=0.8,
        holidays=None,
        country_holidays=None,
        **kwargs):

        self.use_boxcox = use_boxcox
        self.yearly_seasonality = yearly_seasonality
        self.weekly_seasonality = weekly_seasonality
        self.daily_seasonality = daily_seasonality
        self.holidays = holidays
        self.country_holidays = country_holidays
        self.prophet_config = kwargs
        self.models = dict()
        self.fcst = dict()
        self.lmbda_boxcox = dict()

    def fit(self, train_df, regressors=None):
        print("Fitting...")
        progress_bar = ProgressBar(len(train_df.columns))
        for item in train_df.columns:
            self.models[item] = Prophet(
                yearly_seasonality=self.yearly_seasonality,
                weekly_seasonality=self.weekly_seasonality,
                daily_seasonality=self.daily_seasonality,
                **self.prophet_config)
            target = train_df[item].dropna()
            if self.use_boxcox:
                idx = target.index
                target, self.lmbda_boxcox[item] = boxcox(target)
                target = pd.Series(target, index=idx)
            target.index.name = "ds"
            target.name = "y"
            if self.country_holidays is not None:
                self.models[item].add_country_holidays(country_name=self.country_holidays)
            if regressors is not None:
                target = pd.merge(target, regressors, left_index=True, right_index=True, how="left")
                for reg in regressors.columns:
                    self.models[item].add_regressor(reg)
            target = target.reset_index()
            self.models[item].fit(target)
            progress_bar.update()
        progress_bar.finish()
        return self.models
            
    def predict(self, steps, freq="D", regressors=None):
        print("Forecasting...")
        progress_bar = ProgressBar(len(self.models.items()))
        for item, model in self.models.items():
            future = model.make_future_dataframe(steps, freq=freq).set_index("ds")
            if regressors is not None:
                future = pd.merge(future, regressors, left_index=True, right_index=True, how="left")
            pred = model.predict(future.reset_index()).set_index("ds")
            pred = pred[["yhat", "yhat_lower", "yhat_upper"]]
            self.fcst[item] = pred
            if self.use_boxcox:
                self.fcst[item] = inv_boxcox(
                    self.fcst[item], 
                    self.lmbda_boxcox[item])
            progress_bar.update()
        progress_bar.finish()
        fcst_df = pd.concat(self.fcst, axis=1).sort_index(axis=1)
        return fcst_df

class ARIMAForecaster:
    def __init__(
        self,
        use_boxcox=True,
        n_fourier_terms=10,
        seasonality=[365.25],
        confidence_interval=0.8,
        **kwargs):
        
        self.use_boxcox = use_boxcox
        self.n_fourier_terms = n_fourier_terms
        self.seasonality = seasonality
        self.confidence_interval = confidence_interval
        self.arima_config = kwargs
        self.models = dict()
        self.fcst = dict()
        self.lmbda_boxcox = dict()

    def fit(self, train_df):
        self.train_ds = train_df.index
        print("Fitting...")
        progress_bar = ProgressBar(len(train_df.columns))
        for item in train_df.columns:
            target = train_df[item].interpolate().bfill()
            if self.use_boxcox:
                idx = target.index
                target, self.lmbda_boxcox[item] = boxcox(target)
                target = pd.Series(target, index=idx)
            self.models[item] = pm.auto_arima(
                target,
                seasonal=False,
                exogenous=fourier(
                    len(target), 
                    seasonality=self.seasonality, 
                    n_terms=self.n_fourier_terms), 
                method="bfgs",
                suppress_warnings=True,
                **self.arima_config)
            progress_bar.update()
        progress_bar.finish()
        return self.models
            
    def predict(self, steps):
        print("Forecasting...")
        progress_bar = ProgressBar(len(self.models.items()))
        self.fcst_ds = pd.date_range(
            start=self.train_ds.min(), 
            freq="D", 
            periods=len(self.train_ds)+steps)[-365:]
        for item, model in self.models.items():
            pred = model.predict(
                exogenous=fourier(
                    steps, 
                    seasonality=self.seasonality, 
                    n_terms=self.n_fourier_terms),
                n_periods=steps, 
                return_conf_int=True,
                alpha=(1.0 - self.confidence_interval))
            self.fcst[item] = pd.DataFrame(
                {"yhat":pred[0],
                 "yhat_lower":pred[1][:,0],
                 "yhat_upper":pred[1][:,1]},
                index=self.fcst_ds)
            if self.use_boxcox:
                self.fcst[item] = inv_boxcox(
                    self.fcst[item], 
                    self.lmbda_boxcox[item])
            progress_bar.update()
        progress_bar.finish()
        return pd.concat(self.fcst, axis=1)
    
def fourier(steps, seasonality, n_terms=10):
    coeff_list = []
    t = np.arange(0, steps)
    for period in seasonality:
        coeff_M = np.zeros((steps, 2*n_terms))
        for k in range(n_terms):
            coeff_M[:, 2*k] = np.sin(2*np.pi*(k+1)*t/period)
            coeff_M[:, 2*k+1] = np.cos(2*np.pi*(k+1)*t/period)
        coeff_list.append(coeff_M)
    coeff = np.concatenate(coeff_list, axis=1)
    return coeff

Overwriting ../scripts/forecaster.py


# Misc

In [5]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import utils

In [6]:
plt.rcParams["figure.figsize"] = (14,4)

In [7]:
data = pd.read_csv("../data/base_limpia.csv", parse_dates=["tiempo", "fecha pedido", "fecha liq"])

In [9]:
u_producto = pd.pivot_table(
    data, 
    values="u pedidas", 
    index="tiempo", 
    columns="producto", 
    aggfunc="sum").asfreq("D")

In [11]:
u_producto = u_producto.reindex(index=pd.date_range(start="2017-01-01", end="2019-12-31", freq="D"))

In [13]:
u_producto_forecastable = utils.get_forecastables(u_producto, T=0.5, N=6)

In [14]:
train, test = u_producto_forecastable[:"2018-12-31"].copy(), u_producto_forecastable["2019-01-01":].copy()