In [1]:
import pandas as pd
import numpy as np

import matplotlib.pyplot as plt
import seaborn as sns
from itertools import cycle
color_cycle = cycle(plt.rcParams['axes.prop_cycle'].by_key()['color'])

In [2]:
INPUT_DIR = 'C:/ZhangLI/Codes/DataSet/m5-forecasting-accuracy'
calendar = pd.read_csv(f'{INPUT_DIR}/calendar.csv')
selling_prices = pd.read_csv(f'{INPUT_DIR}/sell_prices.csv')
sample_submission = pd.read_csv(f'{INPUT_DIR}/sample_submission.csv')
sales_train_val = pd.read_csv(f'{INPUT_DIR}/sales_train_validation.csv')

In [5]:
# train / val
ids = sorted(list(set(sales_train_val['id'])))  # all items
d_cols = [c for c in sales_train_val.columns if 'd_' in c]  # how many sale in every day
train_dataset = sales_train_val[d_cols[-100:-30]]
val_dataset = sales_train_val[d_cols[-30:]]

In [12]:
val_dataset
predictions = []
predictions.append(train_dataset[train_dataset.columns[-1]].values)
predictions

[array([1, 1, 1, ..., 2, 1, 2], dtype=int64)]

In [32]:
# moving average  
# 正儿八经的滑动平均
predictions = []
for i in range(len(val_dataset.columns)):
    if i == 0:
        predictions.append(np.mean(train_dataset[train_dataset.columns[-30:]].values, axis=1))
    if i < 31 and i > 0:
        predictions.append(0.5 * (np.mean(train_dataset[train_dataset.columns[-30+i:]].values, axis=1) + \
                                  np.mean(predictions[:i], axis=0)))  # This is imp code
    if i > 31:
        predictions.append(np.mean([predictions[:i]], axis=1))
np.transpose(np.array([row.tolist() for row in predictions]))

array([[1.1       , 1.11896552, 1.12616995, ..., 1.08170268, 0.8302437 ,
        1.07454984],
       [0.26666667, 0.25402299, 0.25517241, ..., 0.46434573, 0.63462527,
        0.64092484],
       [0.46666667, 0.47471264, 0.48534483, ..., 0.37516289, 0.45774927,
        0.70847773],
       ...,
       [1.2       , 1.22068966, 1.24802956, ..., 0.8926793 , 1.05531002,
        1.5543564 ],
       [0.96666667, 0.98333333, 1.00535714, ..., 1.17753957, 1.01365493,
        1.0134195 ],
       [1.83333333, 1.86494253, 1.88885468, ..., 2.66855553, 2.2637599 ,
        2.017833  ]])

In [34]:
from tqdm.notebook import tqdm as tqdm
from statsmodels.robust import mad
from statsmodels.tsa.api import ExponentialSmoothing, SimpleExpSmoothing, Holt
import statsmodels
from scipy import signal
import statsmodels.api as sm

predictions = []
for row in tqdm(train_dataset[train_dataset.columns[-30:]].values[:3]):
    fit = Holt(row).fit(smoothing_level = 0.3, smoothing_slope = 0.01)
    predictions.append(fit.forecast(30))
predictions = np.array(predictions).reshape((-1, 30))
error_holt = np.linalg.norm(predictions - val_dataset.values[:len(predictions)])/len(predictions[0])
error_holt

  0%|          | 0/3 [00:00<?, ?it/s]

  fit = Holt(row).fit(smoothing_level = 0.3, smoothing_slope = 0.01)


0.25864250829361135

In [36]:
predictions = []
for row in tqdm(train_dataset[train_dataset.columns[-30:]].values[:3]):
    fit = ExponentialSmoothing(row, seasonal_periods=3).fit()
    predictions.append(fit.forecast(30))
predictions = np.array(predictions).reshape((-1, 30))
error_exponential = np.linalg.norm(predictions[:3] - val_dataset.values[:3])/len(predictions[0])
error_exponential

  0%|          | 0/3 [00:00<?, ?it/s]

0.2430515872991969

In [37]:
predictions = []
for row in tqdm(train_dataset[train_dataset.columns[-30:]].values[:3]):
    fit = sm.tsa.statespace.SARIMAX(row, seasonal_order=(0, 1, 1, 7)).fit()
    predictions.append(fit.forecast(30))
predictions = np.array(predictions).reshape((-1, 30))
error_arima = np.linalg.norm(predictions[:3] - val_dataset.values[:3])/len(predictions[0])

  0%|          | 0/3 [00:00<?, ?it/s]

  warn('Non-invertible starting seasonal moving average'


In [39]:
from fbprophet import Prophet
dates = ["2007-12-" + str(i) for i in range(1, 31)]
predictions = []
for row in tqdm(train_dataset[train_dataset.columns[-30:]].values[:3]):
    df = pd.DataFrame(np.transpose([dates, row]))
    df.columns = ["ds", "y"]
    model = Prophet(daily_seasonality=True)
    model.fit(df)
    future = model.make_future_dataframe(periods=30)
    forecast = model.predict(future)["yhat"].loc[30:].values
    predictions.append(forecast)
predictions = np.array(predictions).reshape((-1, 30))
error_prophet = np.linalg.norm(predictions[:3] - val_dataset.values[:3])/len(predictions[0])

ModuleNotFoundError: No module named 'fbprophet'