In [1]:
import numpy as np
import matplotlib.pyplot as plt
import pandas as pd
from datetime import timedelta
from scipy import stats
from statsmodels.tsa import stattools
from copy import deepcopy

# don't forget to upload utils file to the colab session
# from utils import qualityWAPE, qualityRMSE
import warnings, pylab

warnings.filterwarnings('ignore')

# setting of the plotting style, registers pandas date converters for matplotlib and the default figure size
import seaborn as sns
sns.set_style("darkgrid")
pd.plotting.register_matplotlib_converters()
# Default figure size
sns.mpl.rc("figure", figsize=(25, 5))
sns.mpl.rc("font", size=14)

  from pandas import (to_datetime, Int64Index, DatetimeIndex, Period,
  from pandas import (to_datetime, Int64Index, DatetimeIndex, Period,


In [2]:
from itertools import product
from tqdm import tqdm
from ipywidgets import IntProgress
from IPython.display import display, clear_output


def percentile(n):
    '''Calculate n - percentile of data'''
    def percentile_(x):
        return np.nanpercentile(x, n)
    percentile_.__name__ = 'pctl%s' % n
    return percentile_

# train[['SKU_id', 'Store_id', 'Demand']].groupby(['SKU_id', 'Store_id']).agg({'mean', 'std', percentile(50)})

# добавляем лаги
def lagged_features(df
                    , target_var = 'demand'
                    , by_store = False
                    , lags = [7, 14, 21, 28]
                    , windows = [7, 14]
                    , aggregation_methods = {'mean', 'median', percentile(10),  percentile(90)}
                    , filters = None
                    ):
    '''Calculate lagged features '''
    ''' df - data frame
        target_var - column name which is used to calculate lagged features
        by_store - whether calculate stats by stores or not
        lags - 
        windows - list of windows, calculation is performed within time range length of window
        aggregation_methods - method of aggregation, e.g. 'mean', 'median', 'std', etc.
        filter = dict of dict: {<column_name>:{'postfix':'condition of the filter'}}
    '''
    out_df = deepcopy(df)

    if filters is None:
        filters = {'':{''}}
        
    total = 1
    for k, v in flts.items():
         total *= len(flts[k])
    
    progress = IntProgress(min=0, max=total) # instantiate the bar
    display(progress) # display the bar
    
    keys, values = zip(*filters.items())
    for bundle in product(*values):
        # print(bundle)
        # define dates for each SKU and Store pairs        
        condition = ' & '.join([keys[i] + filters[keys[i]][bundle[i]] for i in range(len(keys))])
        name =  '_'.join([bundle[i] for i in range(len(keys))])
        if len(condition) > 0:
            _idx = df.eval(condition)
        else:
            _idx = df.index >= 0

        if len(df[_idx].index) > 0:
            for w in windows:
                for i, method in enumerate(aggregation_methods):
                    if by_store:
                        lf_df = data.set_index('Date').sort_index().groupby(['Store_id', 'Date']).\
                        agg(method).rolling(window=w, min_periods=1).agg(method)
                    else:
                        lf_df = data.set_index('Date').sort_index().groupby(['SKU_id', 'Date']).\
                        agg(method).rolling(window=w, min_periods=1).agg(method)
                    for l in lags:
                        if i > 1:
                            method_name = method.__name__
                        else:
                            method_name = method
                        new_names = {x: "lag{0}_wdw{1}_{2}_{3}_{4}".
                                     format(l, w, x, name, method_name) for x in lf_df.columns}

                        out_df = pd.merge(out_df, lf_df['Demand'].shift(l).reset_index().rename(columns = new_names),
                                          how='left', on=['SKU_id', 'Date'])
        progress.value += 1
    return out_df

In [3]:
data = pd.read_csv('https://raw.githubusercontent.com/aromanenko/ATSF/main/data/retail_2products.csv', sep=';')
data['Date'] = pd.to_datetime(data['Date'])
data["weekday"] = data.Date.dt.weekday
data["monthday"] = data.Date.dt.day
data['is_weekend'] = data.weekday.isin([5,6])*1
data = data.sort_values('Date')
data.head()

Unnamed: 0,Store_id,SKU_id,Date,Promo,Demand,Regular_Price,Promo_Price,weekday,monthday,is_weekend
0,1,1,2015-01-01,,22.0,163.78,,3,1,0
9144,19,1,2015-01-01,,25.0,163.78,,3,1,0
63037,39,2,2015-01-01,,3.0,135.78,,3,1,0
63545,40,2,2015-01-01,,0.0,135.78,,3,1,0
8636,18,1,2015-01-01,,13.0,163.78,,3,1,0


In [103]:
flts = {'Promo': {'oprm':'>0', 'npromo':'==0', 'aprm':'>-1'}, 'weekday' : {'md':'==0', 'tue':'==1', 'wd':'==2', 'th':'==3', 'fr':'==4', 'sa':'==5', 'su':'==6', 'anyday':'>-1'}}
# data with added lagged features
data_lagged_features = lagged_features(data, target_var = 'Demand'
                    , by_store=False
                    , lags = [22, 28]
                    , windows = [14, 28, 56]
                    , aggregation_methods = ['mean', 'median', percentile(10),  percentile(90)]
                    , filters = flts
                    )

IntProgress(value=0, max=24)

KeyboardInterrupt: 

In [3]:
data_lagged_features = pd.read_csv('lagged.csv')

In [4]:
data_lagged_features

Unnamed: 0.1,Unnamed: 0,Date,Store_id,SKU_id,Promo,Demand,Regular_Price,Promo_Price,Actual_Price,weekday,...,lag22_wdw28_Demand_aprm_anyday_pctl90,lag28_wdw28_Demand_aprm_anyday_pctl90,lag22_wdw56_Demand_aprm_anyday_mean,lag28_wdw56_Demand_aprm_anyday_mean,lag22_wdw56_Demand_aprm_anyday_median,lag28_wdw56_Demand_aprm_anyday_median,lag22_wdw56_Demand_aprm_anyday_pctl10,lag28_wdw56_Demand_aprm_anyday_pctl10,lag22_wdw56_Demand_aprm_anyday_pctl90,lag28_wdw56_Demand_aprm_anyday_pctl90
0,0,2015-01-01,1,1,0.0,22.0,163.78,,163.78,3,...,,,,,,,,,,
1,1,2015-01-02,1,1,0.0,41.0,163.78,,163.78,4,...,,,,,,,,,,
2,2,2015-01-03,1,1,0.0,35.0,163.78,,163.78,5,...,,,,,,,,,,
3,3,2015-01-04,1,1,0.0,72.0,163.78,,163.78,6,...,,,,,,,,,,
4,4,2015-01-05,1,1,0.0,25.0,163.78,,163.78,0,...,,,,,,,,,,
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
92881,92881,2016-06-15,110,2,0.0,,133.06,,133.06,2,...,218.70,218.42,28.608624,27.657118,3.0,3.0,0.0,0.0,169.58,167.7
92882,92882,2016-06-16,110,2,0.0,,133.06,,133.06,3,...,218.84,218.42,29.094693,27.686978,3.0,3.0,0.0,0.0,170.52,167.7
92883,92883,2016-06-17,110,2,0.0,,133.06,,133.06,4,...,218.98,218.42,29.602205,27.719650,3.0,3.0,0.0,0.0,171.46,167.7
92884,92884,2016-06-18,110,2,0.0,,133.06,,133.06,5,...,219.12,218.42,30.122816,27.731160,3.0,3.0,0.0,0.0,172.40,167.7


In [5]:
# data.set_index('Date', inplace=True)

In [6]:
data_lagged_features = data_lagged_features.set_index(['Date', 'Store_id', 'SKU_id'])

In [7]:
df_train = data_lagged_features[data_lagged_features['Demand'].notna()]
df_test = data_lagged_features[data_lagged_features['Demand'].isna()]

In [8]:
df_train

Unnamed: 0_level_0,Unnamed: 1_level_0,Unnamed: 2_level_0,Unnamed: 0,Promo,Demand,Regular_Price,Promo_Price,Actual_Price,weekday,monthday,is_weekend,lag22_wdw14_Demand_oprm_md_mean,...,lag22_wdw28_Demand_aprm_anyday_pctl90,lag28_wdw28_Demand_aprm_anyday_pctl90,lag22_wdw56_Demand_aprm_anyday_mean,lag28_wdw56_Demand_aprm_anyday_mean,lag22_wdw56_Demand_aprm_anyday_median,lag28_wdw56_Demand_aprm_anyday_median,lag22_wdw56_Demand_aprm_anyday_pctl10,lag28_wdw56_Demand_aprm_anyday_pctl10,lag22_wdw56_Demand_aprm_anyday_pctl90,lag28_wdw56_Demand_aprm_anyday_pctl90
Date,Store_id,SKU_id,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1,Unnamed: 22_level_1,Unnamed: 23_level_1
2015-01-01,1,1,0,0.0,22.0,163.78,,163.78,3,1,0,,...,,,,,,,,,,
2015-01-02,1,1,1,0.0,41.0,163.78,,163.78,4,2,0,,...,,,,,,,,,,
2015-01-03,1,1,2,0.0,35.0,163.78,,163.78,5,3,1,,...,,,,,,,,,,
2015-01-04,1,1,3,0.0,72.0,163.78,,163.78,6,4,1,,...,,,,,,,,,,
2015-01-05,1,1,4,0.0,25.0,163.78,,163.78,0,5,0,,...,,,,,,,,,,
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
2016-05-18,106,2,86911,0.0,9.0,138.50,,138.50,2,18,0,41.809723,...,138.63,14.80,20.056237,13.520812,3.0,3.0,0.0,0.0,131.25,106.10
2016-05-19,106,2,86912,0.0,3.0,138.50,,138.50,3,19,0,42.174824,...,138.63,53.17,20.151568,14.772018,3.0,3.0,0.0,0.0,131.25,108.20
2016-05-20,106,2,86913,0.0,13.0,138.50,,138.50,4,20,0,42.235437,...,138.63,135.06,20.163594,16.393050,3.0,3.0,0.0,0.0,131.25,109.10
2016-05-21,106,2,86914,0.0,3.0,138.50,,138.50,5,21,1,42.260459,...,138.63,138.63,20.170368,17.694009,3.0,3.0,0.0,0.0,131.25,121.45


In [9]:
df_train = df_train.drop('Promo_Price', axis=1).dropna()
df_test = df_test.drop('Promo_Price', axis=1)

In [10]:
target = 'Demand'

In [11]:
target_mean = df_train[target].mean()
target_stdev = df_train[target].std()

for c in df_train.columns:
    mean = df_train[c].mean()
    stdev = df_train[c].std()
    df_train[c] = (df_train[c] - mean) / stdev
    df_test[c] = (df_test[c] - mean) / stdev

In [12]:
df_train

Unnamed: 0_level_0,Unnamed: 1_level_0,Unnamed: 2_level_0,Unnamed: 0,Promo,Demand,Regular_Price,Actual_Price,weekday,monthday,is_weekend,lag22_wdw14_Demand_oprm_md_mean,lag28_wdw14_Demand_oprm_md_mean,...,lag22_wdw28_Demand_aprm_anyday_pctl90,lag28_wdw28_Demand_aprm_anyday_pctl90,lag22_wdw56_Demand_aprm_anyday_mean,lag28_wdw56_Demand_aprm_anyday_mean,lag22_wdw56_Demand_aprm_anyday_median,lag28_wdw56_Demand_aprm_anyday_median,lag22_wdw56_Demand_aprm_anyday_pctl10,lag28_wdw56_Demand_aprm_anyday_pctl10,lag22_wdw56_Demand_aprm_anyday_pctl90,lag28_wdw56_Demand_aprm_anyday_pctl90
Date,Store_id,SKU_id,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1,Unnamed: 22_level_1,Unnamed: 23_level_1
2015-01-29,1,1,-1.741767,-0.475059,-0.010593,0.983746,0.983382,-0.007158,1.537024,-0.635889,-0.600987,-0.809777,...,-0.697653,-0.876651,-0.686270,-0.937421,-0.482959,-0.804644,-0.568219,-0.838902,-0.790353,-0.976952
2015-01-30,1,1,-1.741727,-0.475059,-0.069543,0.983746,0.983382,0.493031,1.650926,-0.635889,-0.587591,-0.730626,...,-0.700927,-0.806601,-0.670573,-0.846260,-0.452736,-0.683667,-0.522376,-0.793036,-0.793605,-0.907981
2015-01-31,1,1,-1.741688,-0.475059,-0.128492,0.983746,0.983382,0.993220,1.764827,1.572582,-0.574466,-0.676236,...,-0.704202,-0.747512,-0.655194,-0.783617,-0.422513,-0.562691,-0.476533,-0.747171,-0.796856,-0.849802
2015-02-01,1,1,-1.741648,-0.475059,-0.104376,0.983746,0.983382,1.493409,-1.652210,1.572582,-0.563822,-0.645238,...,-0.682961,-0.739321,-0.642722,-0.747916,-0.370702,-0.523806,-0.430691,-0.701305,-0.775766,-0.841736
2015-02-02,1,1,-1.741608,-0.475059,-0.085620,0.983746,0.983382,-1.507726,-1.538309,-0.635889,-0.553671,-0.617410,...,-0.683512,-0.713872,-0.630828,-0.715865,-0.318891,-0.484921,-0.384848,-0.655440,-0.776313,-0.816679
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
2016-05-18,106,2,1.720426,-0.475059,-0.508983,-0.016663,0.260244,-0.507348,0.284111,-0.635889,-0.698015,-0.812759,...,-0.730412,-0.900263,-0.916159,-0.951568,-0.914717,-0.916979,-0.926122,-0.921791,-0.833496,-0.869176
2016-05-19,106,2,1.720465,-0.475059,-0.525060,-0.016663,0.260244,-0.007158,0.398012,-0.635889,-0.696350,-0.789069,...,-0.730412,-0.844337,-0.915650,-0.944872,-0.914717,-0.916979,-0.926122,-0.921791,-0.833496,-0.866162
2016-05-20,106,2,1.720505,-0.475059,-0.498265,-0.016663,0.260244,0.493031,0.511913,-0.635889,-0.696074,-0.758964,...,-0.730412,-0.724979,-0.915585,-0.936197,-0.914717,-0.916979,-0.926122,-0.921791,-0.833496,-0.864870
2016-05-21,106,2,1.720545,-0.475059,-0.525060,-0.016663,0.260244,0.993220,0.625815,1.572582,-0.695960,-0.734876,...,-0.730412,-0.719775,-0.915549,-0.929235,-0.914717,-0.916979,-0.926122,-0.921791,-0.833496,-0.847147


In [13]:
import torch
from torch.utils.data import Dataset

class SequenceDataset(Dataset):
    def __init__(self, dataframe, target, features, sequence_length=5):
        self.features = features
        self.target = target
        self.sequence_length = sequence_length
        self.y = torch.tensor(dataframe[target].values).float()
        self.X = torch.tensor(dataframe[features].values).float()

    def __len__(self):
        return self.X.shape[0]

    def __getitem__(self, i): 
        if i >= self.sequence_length - 1:
            i_start = i - self.sequence_length + 1
            x = self.X[i_start:(i + 1), :]
        else:
            padding = self.X[0].repeat(self.sequence_length - i - 1, 1)
            x = self.X[0:(i + 1), :]
            x = torch.cat((padding, x), 0)

        return x, self.y[i]

In [14]:
features = df_train.drop(['Demand', 'Promo'], axis=1).columns

In [15]:
from torch.utils.data import DataLoader

torch.manual_seed(101)

batch_size = 128
sequence_length = 30

train_dataset = SequenceDataset(
    df_train,
    target=target,
    features=features,
    sequence_length=sequence_length
)
test_dataset = SequenceDataset(
    df_test,
    target=target,
    features=features,
    sequence_length=sequence_length
)

train_loader = DataLoader(train_dataset, batch_size=batch_size, shuffle=True)
test_loader = DataLoader(test_dataset, batch_size=512, shuffle=False)

X, y = next(iter(train_loader))

print("Features shape:", X.shape)
print("Target shape:", y.shape)

Features shape: torch.Size([128, 30, 582])
Target shape: torch.Size([128])


In [16]:
from torch import nn

class ShallowRegressionLSTM(nn.Module):
    def __init__(self, num_sensors, hidden_units):
        super().__init__()
        self.num_sensors = num_sensors  # this is the number of features
        self.hidden_units = hidden_units
        self.num_layers = 1

        self.lstm = nn.LSTM(
            input_size=num_sensors,
            hidden_size=hidden_units,
            batch_first=True,
            num_layers=self.num_layers
        )

        self.linear = nn.Sequential(
            nn.Linear(in_features=self.hidden_units, out_features=self.hidden_units),
            nn.Dropout(0.3),
            nn.ReLU(),
            nn.BatchNorm1d(self.hidden_units),
            nn.Linear(in_features=self.hidden_units, out_features=1)
        )

    def forward(self, x):
        batch_size = x.shape[0]
        h0 = torch.zeros(self.num_layers, batch_size, self.hidden_units).requires_grad_()
        c0 = torch.zeros(self.num_layers, batch_size, self.hidden_units).requires_grad_()

        _, (hn, _) = self.lstm(x, (h0, c0))
        out = self.linear(hn[0]).flatten()  # First dim of Hn is num_layers, which is set to 1 above.

        return out

In [17]:
from tqdm import tqdm
import random

def train_model(data_loader, model, loss_function, optimizer):
    num_batches = len(data_loader)
    total_loss = 0
    model.train()

    for X, y in tqdm(data_loader):
        output = model(X)
        loss = loss_function(output, y)

        optimizer.zero_grad()
        loss.backward()
        optimizer.step()

        total_loss += loss.item()
        
    scheduler.step()

    avg_loss = total_loss / num_batches
    print(f"Train loss: {avg_loss}")
    
def predict(data_loader, model):

    output = torch.tensor([])
    model.eval()
    with torch.no_grad():
        for X, _ in tqdm(data_loader):
            y_star = model(X)
            output = torch.cat((output, y_star), 0)

    return output

def smape(a, f):
    return 1/len(a) * np.sum(2 * np.abs(f-a) / (np.abs(a) + np.abs(f))*100)

def set_random_seed(seed):
    torch.backends.cudnn.deterministic = True
    torch.manual_seed(seed)
    torch.cuda.manual_seed(seed)
    np.random.seed(seed)
    random.seed(seed)

In [18]:
set_random_seed(7)

In [19]:
learning_rate = 1e-3
weight_decay = 1e-5
num_hidden_units = 64

model = ShallowRegressionLSTM(num_sensors=len(features), hidden_units=num_hidden_units)
#model = GRU(num_sensors=len(features), hidden_units=num_hidden_units)

loss_function = nn.L1Loss()
optimizer = torch.optim.Adam(model.parameters(), lr=learning_rate, weight_decay=weight_decay)
scheduler = torch.optim.lr_scheduler.ExponentialLR(optimizer, gamma=0.9995)

In [20]:
for ix_epoch in range(1):
    print(f"Epoch {ix_epoch}\n---------")
    train_model(train_loader, model, loss_function, optimizer=optimizer)
    
    train_eval_loader = DataLoader(train_dataset, batch_size=batch_size, shuffle=False)

    ystar_col = "Model forecast"
    df_train[ystar_col] = predict(train_eval_loader, model).numpy()

    df_out = df_train[[target, ystar_col]]

    for c in df_out.columns:
        df_out[c] = df_out[c] * target_stdev + target_mean

    smape_ = smape(df_out.dropna()['Demand'], df_out.dropna()['Model forecast'])
    print('Smape =', smape_)
        
    print()

  0%|                                                                                          | 0/652 [00:00<?, ?it/s]

Epoch 0
---------


100%|████████████████████████████████████████████████████████████████████████████████| 652/652 [00:34<00:00, 19.17it/s]
  1%|▌                                                                                 | 4/652 [00:00<00:19, 32.52it/s]

Train loss: 0.34935010427986185


100%|████████████████████████████████████████████████████████████████████████████████| 652/652 [00:17<00:00, 37.12it/s]


Smape = 88.9914929600938



In [21]:
train_eval_loader = DataLoader(train_dataset, batch_size=batch_size, shuffle=False)

ystar_col = "Model forecast"
df_train[ystar_col] = predict(train_eval_loader, model).numpy()
df_test[ystar_col] = predict(test_loader, model).numpy()

df_out = pd.concat((df_train, df_test))[[target, ystar_col]]

for c in df_out.columns:
    df_out[c] = df_out[c] * target_stdev + target_mean

df_out

100%|████████████████████████████████████████████████████████████████████████████████| 652/652 [00:17<00:00, 37.47it/s]
100%|██████████████████████████████████████████████████████████████████████████████████| 12/12 [00:01<00:00, 10.66it/s]


Unnamed: 0_level_0,Unnamed: 1_level_0,Unnamed: 2_level_0,Demand,Model forecast
Date,Store_id,SKU_id,Unnamed: 3_level_1,Unnamed: 4_level_1
2015-01-29,1,1,195.0,36.482742
2015-01-30,1,1,173.0,43.084610
2015-01-31,1,1,151.0,44.499222
2015-02-01,1,1,160.0,73.338074
2015-02-02,1,1,167.0,93.118790
...,...,...,...,...
2016-06-15,110,2,,8.129028
2016-06-16,110,2,,8.146469
2016-06-17,110,2,,8.162537
2016-06-18,110,2,,8.148834


In [22]:
test = pd.read_csv('test_kaggle.csv')
test['Date'] = test['Date'].str.split('.').apply(lambda x : '-'.join(x[::-1]))
test['Date'] = pd.to_datetime(test['Date'])
#test = test.set_index(['Date', 'Store_id', 'SKU_id'])
test.head()

Unnamed: 0,Store_id,SKU_id,Date,Promo,Demand,Regular_Price,Promo_Price
0,1,1,2016-05-23,1.0,,128.98,119.6
1,1,1,2016-05-24,,,128.98,
2,1,1,2016-05-25,,,131.7,
3,1,1,2016-05-26,,,131.7,
4,1,1,2016-05-27,,,131.7,


In [23]:
df_out = df_out.sort_index()

ans = df_out[df_out['Demand'].isna()]
ans = ans.drop('Demand', axis=1)

#ans.index = [np.arange(5970)]
# ans.reset_index(inplace=True)
ans.rename(columns={'Model forecast': 'Forecast'}, inplace=True)
ans.to_csv('try_one.csv', index_label='id')

In [24]:
ans = ans.reset_index()
ans['Date'] = pd.to_datetime(ans['Date'])
ans = ans.set_index(['Date', 'Store_id', 'SKU_id'])
ans.head()

Unnamed: 0_level_0,Unnamed: 1_level_0,Unnamed: 2_level_0,Forecast
Date,Store_id,SKU_id,Unnamed: 3_level_1
2016-05-23,1,1,189.083801
2016-05-23,1,2,48.254486
2016-05-23,2,1,199.084808
2016-05-23,2,2,8.36055
2016-05-23,3,1,199.071152


In [25]:
test

Unnamed: 0,Store_id,SKU_id,Date,Promo,Demand,Regular_Price,Promo_Price
0,1,1,2016-05-23,1.0,,128.98,119.6
1,1,1,2016-05-24,,,128.98,
2,1,1,2016-05-25,,,131.70,
3,1,1,2016-05-26,,,131.70,
4,1,1,2016-05-27,,,131.70,
...,...,...,...,...,...,...,...
5965,110,2,2016-06-15,,,133.06,
5966,110,2,2016-06-16,,,133.06,
5967,110,2,2016-06-17,,,133.06,
5968,110,2,2016-06-18,,,133.06,


In [26]:
ans['Forecast'] = pd.merge(test.reset_index(), ans.reset_index(), on=['Date', 'Store_id', 'SKU_id'], how='left')['Forecast'].values
ans.index = [np.arange(5970)]
ans.rename(columns={'Forecast' : 'Demand'}, inplace=True)
ans.to_csv('try_one.csv', index_label='id')
ans.head()

Unnamed: 0,Demand
0,189.083801
1,171.939789
2,147.995056
3,148.949417
4,151.876999


In [27]:
df_out.reset_index().drop(['Store_id', 'SKU_id'], axis=1).set_index('Date').sort_index()

Unnamed: 0_level_0,Demand,Model forecast
Date,Unnamed: 1_level_1,Unnamed: 2_level_1
2015-01-01,9.0,201.491379
2015-01-01,0.0,7.119080
2015-01-01,3.0,7.652710
2015-01-01,0.0,7.770065
2015-01-01,0.0,7.464111
...,...,...
2016-06-19,,8.172348
2016-06-19,,236.232422
2016-06-19,,8.164734
2016-06-19,,236.217163


In [None]:
df_out.reset_index().drop(['Store_id', 'SKU_id'], axis=1).set_index('Date').sort_index().plot()

In [None]:
ans.plot()

In [None]:
smape(df_out.dropna()['Demand'], df_out.dropna()['Model forecast'])

https://www.kaggle.com/c/atsf-fall21-hw3

## -----------------------------------------------------------------------------------------------------------------------------