In [1]:
from datetime import date, timedelta
import tensorflow as tf
import pandas as pd
import numpy as np
from sklearn.preprocessing import LabelEncoder
from sklearn.metrics import mean_squared_error
from sklearn.preprocessing import StandardScaler

from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense, Dropout, Activation
from tensorflow.keras.layers import PReLU
from tensorflow.keras.layers import BatchNormalization
from tensorflow.keras.layers import LSTM
from tensorflow.keras import callbacks
from tensorflow.keras import optimizers
from tensorflow.keras.callbacks import ModelCheckpoint, EarlyStopping, ReduceLROnPlateau
import gc
import pickle
from tqdm.auto import tqdm

In [6]:
%%time
input_dir = '../inputs/favorita-grocery-sales-forecasting'

df_train = pd.read_pickle('df_train_favorita.pkl')
df_test = pd.read_pickle('df_test_favorita.pkl')
items = pd.read_pickle('items_favorita.pkl')
stores = pd.read_pickle('stores_favorita.pkl')


df_2017 = df_train.loc[df_train.date>=pd.datetime(2017,1,1)]
del df_train

promo_2017_train = df_2017.set_index(
    ["store_nbr", "item_nbr", "date"])[["onpromotion"]].unstack(
        level=-1).fillna(False)
promo_2017_train.columns = promo_2017_train.columns.get_level_values(1)
promo_2017_test = df_test[["onpromotion"]].unstack(level=-1).fillna(False)
promo_2017_test.columns = promo_2017_test.columns.get_level_values(1)
promo_2017_test = promo_2017_test.reindex(promo_2017_train.index).fillna(False)
promo_2017 = pd.concat([promo_2017_train, promo_2017_test], axis=1)
del promo_2017_test, promo_2017_train

df_2017 = df_2017.set_index(
    ["store_nbr", "item_nbr", "date"])[["unit_sales"]].unstack(
        level=-1).fillna(0)
df_2017.columns = df_2017.columns.get_level_values(1)

items = items.reindex(df_2017.index.get_level_values(1))



CPU times: total: 28.5 s
Wall time: 29.2 s


In [7]:
def get_timespan(df, dt, minus, periods, freq='D'):
    return df[pd.date_range(dt - timedelta(days=minus), periods=periods, freq=freq)]

def prepare_dataset(t2017, is_train=True):
    X = pd.DataFrame({
        "day_1_2017": get_timespan(df_2017, t2017, 1, 1).values.ravel(),
        "mean_3_2017": get_timespan(df_2017, t2017, 3, 3).mean(axis=1).values,
        "mean_7_2017": get_timespan(df_2017, t2017, 7, 7).mean(axis=1).values,
        "mean_14_2017": get_timespan(df_2017, t2017, 14, 14).mean(axis=1).values,
        "mean_30_2017": get_timespan(df_2017, t2017, 30, 30).mean(axis=1).values,
        "mean_60_2017": get_timespan(df_2017, t2017, 60, 60).mean(axis=1).values,
        "mean_140_2017": get_timespan(df_2017, t2017, 140, 140).mean(axis=1).values,
        "promo_14_2017": get_timespan(promo_2017, t2017, 14, 14).sum(axis=1).values,
        "promo_60_2017": get_timespan(promo_2017, t2017, 60, 60).sum(axis=1).values,
        "promo_140_2017": get_timespan(promo_2017, t2017, 140, 140).sum(axis=1).values
    })
    for i in range(7):
        X['mean_4_dow{}_2017'.format(i)] = get_timespan(df_2017, t2017, 28-i, 4, freq='7D').mean(axis=1).values
        X['mean_20_dow{}_2017'.format(i)] = get_timespan(df_2017, t2017, 140-i, 20, freq='7D').mean(axis=1).values
    for i in range(16):
        X["promo_{}".format(i)] = promo_2017[f'{t2017 + timedelta(days=i)}'].values.astype(np.uint8)
    if is_train:
        y = df_2017[
            pd.date_range(t2017, periods=16)
        ].values
        return X, y
    return X

In [8]:
print("Preparing dataset...")
t2017 = date(2017, 5, 31)
num_days= 8
X_l, y_l = [], []
for i in range(num_days):
    delta = timedelta(days=7 * i)
    X_tmp, y_tmp = prepare_dataset(t2017 + delta)
    X_l.append(X_tmp)
    y_l.append(y_tmp)
X_train = pd.concat(X_l, axis=0)
y_train = np.concatenate(y_l, axis=0)
del X_l, y_l
X_val, y_val = prepare_dataset(date(2017, 7, 26))
X_test = prepare_dataset(date(2017, 8, 16), is_train=False)

stores_items = pd.DataFrame(index=df_2017.index)
test_ids = df_test[['id']]

items = items.reindex( stores_items.index.get_level_values(1) )

Preparing dataset...


In [9]:
# 표준화를 사용 - 특성들을 정규분포로 만드는것이 정규화보다 좋다고 판단
scaler = StandardScaler()  #MinMaxScaler() , RobustScaler()

# 훈련데이터의 분포를 학습
scaler.fit(pd.concat([X_train, X_val, X_test]))
#데이터 적용
X_train[:] = scaler.transform(X_train)
X_val[:] = scaler.transform(X_val)
X_test[:] = scaler.transform(X_test)

In [10]:
X_train = X_train.values
X_test = X_test.values
X_val = X_val.values
X_train = X_train.reshape((X_train.shape[0], 1, X_train.shape[1]))
X_test = X_test.reshape((X_test.shape[0], 1, X_test.shape[1]))
X_val = X_val.reshape((X_val.shape[0], 1, X_val.shape[1]))

In [9]:
# model = Sequential()
# model.add(LSTM(32, input_shape=(X_train.shape[1],X_train.shape[2])))
# model.add(Dropout(.1))
# model.add(Dense(32))
# model.add(Dropout(.2))
# model.add(Dense(1))
# model.compile(loss = 'mse', optimizer='adam', metrics=['mse'])

In [13]:
def build_model():
    model = Sequential()

    model.add(LSTM(256, input_shape=(X_train.shape[1],X_train.shape[2])))
    model.add(PReLU())
    model.add(BatchNormalization())
    model.add(Dropout(.2))

    model.add(Dense(128))
    model.add(PReLU())
    model.add(BatchNormalization())
    model.add(Dropout(.1))

    model.add(Dense(128))
    model.add(PReLU())
    model.add(BatchNormalization())
    model.add(Dropout(.1))

    model.add(Dense(64))
    model.add(PReLU())
    model.add(BatchNormalization())
    model.add(Dropout(.05))

    model.add(Dense(32))
    model.add(PReLU())
    model.add(BatchNormalization())
    model.add(Dropout(.05))

    model.add(Dense(16))
    model.add(PReLU())
    model.add(BatchNormalization())
    model.add(Dropout(.05))

    model.add(Dense(1))

    return model



In [15]:
N_EPOCHS = 50

val_pred = []
test_pred = []
# wtpath = 'weights.hdf5'  # To save best epoch. But need Keras bug to be fixed first.
sample_weights=np.array( pd.concat([items["perishable"]] * num_days) * 0.25 + 1 )
with tf.device('/device:GPU:0'):
    for i in range(16):
        model = build_model()
        opt = optimizers.Adam(learning_rate=0.01)
        model.compile(loss='mse', optimizer=opt, metrics=['mse'])

        callbacks = [
            EarlyStopping(monitor='val_loss', patience=10, verbose=0),
            ReduceLROnPlateau(monitor='val_loss', factor=0.1, patience=5, verbose=1, min_delta=1e-6, mode='min')
            ]
        print("=" * 50)
        print("Step %d" % (i+1))
        print("=" * 50)
        y = y_train[:, i]
        xv = X_val
        y_mean = y.mean()
        yv = y_val[:, i]
        model.fit(X_train, y  - y_mean, batch_size = 1024, epochs = N_EPOCHS, verbose=2,
                   sample_weight=sample_weights, validation_data=(xv,yv - y_mean), callbacks=callbacks ) 
        val_pred.append(model.predict(X_val) + y_mean)
        test_pred.append(model.predict(X_test) + y_mean)

Step 1
Epoch 1/50
1309/1309 - 23s - loss: 0.3556 - mse: 0.3368 - val_loss: 0.2959 - val_mse: 0.2959 - lr: 0.0100 - 23s/epoch - 17ms/step
Epoch 2/50
1309/1309 - 20s - loss: 0.3333 - mse: 0.3162 - val_loss: 0.3025 - val_mse: 0.3025 - lr: 0.0100 - 20s/epoch - 15ms/step
Epoch 3/50
1309/1309 - 20s - loss: 0.3316 - mse: 0.3146 - val_loss: 0.2961 - val_mse: 0.2961 - lr: 0.0100 - 20s/epoch - 15ms/step
Epoch 4/50
1309/1309 - 20s - loss: 0.3300 - mse: 0.3132 - val_loss: 0.3051 - val_mse: 0.3051 - lr: 0.0100 - 20s/epoch - 15ms/step
Epoch 5/50
1309/1309 - 20s - loss: 0.3296 - mse: 0.3128 - val_loss: 0.3002 - val_mse: 0.3002 - lr: 0.0100 - 20s/epoch - 15ms/step
Epoch 6/50

Epoch 00006: ReduceLROnPlateau reducing learning rate to 0.0009999999776482583.
1309/1309 - 20s - loss: 0.3291 - mse: 0.3124 - val_loss: 0.3065 - val_mse: 0.3065 - lr: 0.0100 - 20s/epoch - 15ms/step
Epoch 7/50
1309/1309 - 20s - loss: 0.3225 - mse: 0.3064 - val_loss: 0.2914 - val_mse: 0.2914 - lr: 1.0000e-03 - 20s/epoch - 15ms/ste

Epoch 27/50

Epoch 00027: ReduceLROnPlateau reducing learning rate to 9.999999310821295e-05.
1309/1309 - 20s - loss: 0.3417 - mse: 0.3229 - val_loss: 0.3217 - val_mse: 0.3217 - lr: 1.0000e-03 - 20s/epoch - 15ms/step
Epoch 28/50
1309/1309 - 20s - loss: 0.3415 - mse: 0.3226 - val_loss: 0.3207 - val_mse: 0.3207 - lr: 1.0000e-04 - 20s/epoch - 15ms/step
Epoch 29/50
1309/1309 - 20s - loss: 0.3412 - mse: 0.3224 - val_loss: 0.3208 - val_mse: 0.3208 - lr: 1.0000e-04 - 20s/epoch - 15ms/step
Epoch 30/50
1309/1309 - 20s - loss: 0.3410 - mse: 0.3222 - val_loss: 0.3208 - val_mse: 0.3208 - lr: 1.0000e-04 - 20s/epoch - 15ms/step
Epoch 31/50
1309/1309 - 20s - loss: 0.3410 - mse: 0.3223 - val_loss: 0.3207 - val_mse: 0.3207 - lr: 1.0000e-04 - 20s/epoch - 15ms/step
Epoch 32/50

Epoch 00032: ReduceLROnPlateau reducing learning rate to 9.999999019782991e-06.
1309/1309 - 20s - loss: 0.3409 - mse: 0.3221 - val_loss: 0.3208 - val_mse: 0.3208 - lr: 1.0000e-04 - 20s/epoch - 15ms/step
Step 3
Epoch 1/50
1309/1309 

Epoch 29/50
1309/1309 - 20s - loss: 0.3656 - mse: 0.3463 - val_loss: 0.3479 - val_mse: 0.3479 - lr: 1.0000e-04 - 20s/epoch - 15ms/step
Epoch 30/50
1309/1309 - 20s - loss: 0.3658 - mse: 0.3465 - val_loss: 0.3478 - val_mse: 0.3478 - lr: 1.0000e-04 - 20s/epoch - 15ms/step
Epoch 31/50

Epoch 00031: ReduceLROnPlateau reducing learning rate to 9.999999019782991e-06.
1309/1309 - 20s - loss: 0.3655 - mse: 0.3463 - val_loss: 0.3479 - val_mse: 0.3479 - lr: 1.0000e-04 - 20s/epoch - 15ms/step
Step 5
Epoch 1/50
1309/1309 - 22s - loss: 0.4164 - mse: 0.3938 - val_loss: 0.3732 - val_mse: 0.3732 - lr: 0.0100 - 22s/epoch - 17ms/step
Epoch 2/50
1309/1309 - 20s - loss: 0.3985 - mse: 0.3771 - val_loss: 0.3602 - val_mse: 0.3602 - lr: 0.0100 - 20s/epoch - 15ms/step
Epoch 3/50
1309/1309 - 20s - loss: 0.3960 - mse: 0.3748 - val_loss: 0.3777 - val_mse: 0.3777 - lr: 0.0100 - 20s/epoch - 15ms/step
Epoch 4/50
1309/1309 - 20s - loss: 0.3949 - mse: 0.3739 - val_loss: 0.3620 - val_mse: 0.3620 - lr: 0.0100 - 20s/epoch

Epoch 21/50

Epoch 00021: ReduceLROnPlateau reducing learning rate to 9.999999310821295e-05.
1309/1309 - 20s - loss: 0.3744 - mse: 0.3535 - val_loss: 0.3562 - val_mse: 0.3562 - lr: 1.0000e-03 - 20s/epoch - 15ms/step
Epoch 22/50
1309/1309 - 20s - loss: 0.3731 - mse: 0.3523 - val_loss: 0.3567 - val_mse: 0.3567 - lr: 1.0000e-04 - 20s/epoch - 15ms/step
Epoch 23/50
1309/1309 - 20s - loss: 0.3728 - mse: 0.3520 - val_loss: 0.3569 - val_mse: 0.3569 - lr: 1.0000e-04 - 20s/epoch - 15ms/step
Epoch 24/50
1309/1309 - 20s - loss: 0.3728 - mse: 0.3520 - val_loss: 0.3568 - val_mse: 0.3568 - lr: 1.0000e-04 - 20s/epoch - 15ms/step
Epoch 25/50
1309/1309 - 20s - loss: 0.3724 - mse: 0.3517 - val_loss: 0.3568 - val_mse: 0.3568 - lr: 1.0000e-04 - 20s/epoch - 15ms/step
Epoch 26/50

Epoch 00026: ReduceLROnPlateau reducing learning rate to 9.999999019782991e-06.
1309/1309 - 20s - loss: 0.3726 - mse: 0.3518 - val_loss: 0.3570 - val_mse: 0.3570 - lr: 1.0000e-04 - 20s/epoch - 15ms/step
Step 7
Epoch 1/50
1309/1309 

Epoch 20/50
1309/1309 - 20s - loss: 0.3616 - mse: 0.3421 - val_loss: 0.3769 - val_mse: 0.3769 - lr: 1.0000e-03 - 20s/epoch - 15ms/step
Epoch 21/50

Epoch 00021: ReduceLROnPlateau reducing learning rate to 9.999999310821295e-05.
1309/1309 - 20s - loss: 0.3615 - mse: 0.3420 - val_loss: 0.3843 - val_mse: 0.3843 - lr: 1.0000e-03 - 20s/epoch - 15ms/step
Step 10
Epoch 1/50
1309/1309 - 23s - loss: 0.4012 - mse: 0.3799 - val_loss: 0.3772 - val_mse: 0.3772 - lr: 0.0100 - 23s/epoch - 17ms/step
Epoch 2/50
1309/1309 - 21s - loss: 0.3839 - mse: 0.3640 - val_loss: 0.3680 - val_mse: 0.3680 - lr: 0.0100 - 21s/epoch - 16ms/step
Epoch 3/50
1309/1309 - 21s - loss: 0.3822 - mse: 0.3625 - val_loss: 0.3742 - val_mse: 0.3742 - lr: 0.0100 - 21s/epoch - 16ms/step
Epoch 4/50
1309/1309 - 21s - loss: 0.3807 - mse: 0.3612 - val_loss: 0.3690 - val_mse: 0.3690 - lr: 0.0100 - 21s/epoch - 16ms/step
Epoch 5/50
1309/1309 - 20s - loss: 0.3796 - mse: 0.3602 - val_loss: 0.3772 - val_mse: 0.3772 - lr: 0.0100 - 20s/epoch - 1

Epoch 24/50
1309/1309 - 20s - loss: 0.3907 - mse: 0.3706 - val_loss: 0.3713 - val_mse: 0.3713 - lr: 1.0000e-04 - 20s/epoch - 15ms/step
Epoch 25/50
1309/1309 - 20s - loss: 0.3906 - mse: 0.3705 - val_loss: 0.3715 - val_mse: 0.3715 - lr: 1.0000e-04 - 20s/epoch - 15ms/step
Epoch 26/50
1309/1309 - 20s - loss: 0.3908 - mse: 0.3706 - val_loss: 0.3714 - val_mse: 0.3714 - lr: 1.0000e-04 - 20s/epoch - 16ms/step
Epoch 27/50

Epoch 00027: ReduceLROnPlateau reducing learning rate to 9.999999019782991e-06.
1309/1309 - 20s - loss: 0.3907 - mse: 0.3705 - val_loss: 0.3713 - val_mse: 0.3713 - lr: 1.0000e-04 - 20s/epoch - 15ms/step
Epoch 28/50
1309/1309 - 20s - loss: 0.3905 - mse: 0.3703 - val_loss: 0.3713 - val_mse: 0.3713 - lr: 1.0000e-05 - 20s/epoch - 15ms/step
Epoch 29/50
1309/1309 - 20s - loss: 0.3906 - mse: 0.3705 - val_loss: 0.3713 - val_mse: 0.3713 - lr: 1.0000e-05 - 20s/epoch - 16ms/step
Epoch 30/50
1309/1309 - 20s - loss: 0.3904 - mse: 0.3702 - val_loss: 0.3714 - val_mse: 0.3714 - lr: 1.0000e-0

Epoch 22/50
1309/1309 - 20s - loss: 0.3911 - mse: 0.3697 - val_loss: 0.3745 - val_mse: 0.3745 - lr: 1.0000e-04 - 20s/epoch - 16ms/step
Epoch 23/50
1309/1309 - 20s - loss: 0.3909 - mse: 0.3695 - val_loss: 0.3744 - val_mse: 0.3744 - lr: 1.0000e-04 - 20s/epoch - 16ms/step
Epoch 24/50
1309/1309 - 20s - loss: 0.3907 - mse: 0.3693 - val_loss: 0.3743 - val_mse: 0.3743 - lr: 1.0000e-04 - 20s/epoch - 16ms/step
Epoch 25/50
1309/1309 - 20s - loss: 0.3904 - mse: 0.3691 - val_loss: 0.3743 - val_mse: 0.3743 - lr: 1.0000e-04 - 20s/epoch - 16ms/step
Epoch 26/50

Epoch 00026: ReduceLROnPlateau reducing learning rate to 9.999999019782991e-06.
1309/1309 - 20s - loss: 0.3906 - mse: 0.3692 - val_loss: 0.3742 - val_mse: 0.3742 - lr: 1.0000e-04 - 20s/epoch - 16ms/step
Step 14
Epoch 1/50
1309/1309 - 23s - loss: 0.4276 - mse: 0.4039 - val_loss: 0.3702 - val_mse: 0.3702 - lr: 0.0100 - 23s/epoch - 17ms/step
Epoch 2/50
1309/1309 - 20s - loss: 0.4069 - mse: 0.3849 - val_loss: 0.3663 - val_mse: 0.3663 - lr: 0.0100 

Epoch 30/50

Epoch 00030: ReduceLROnPlateau reducing learning rate to 9.999999019782991e-06.
1309/1309 - 20s - loss: 0.3680 - mse: 0.3507 - val_loss: 0.3459 - val_mse: 0.3459 - lr: 1.0000e-04 - 20s/epoch - 15ms/step
Step 16
Epoch 1/50
1309/1309 - 22s - loss: 0.4165 - mse: 0.3933 - val_loss: 0.3759 - val_mse: 0.3759 - lr: 0.0100 - 22s/epoch - 17ms/step
Epoch 2/50
1309/1309 - 20s - loss: 0.3986 - mse: 0.3768 - val_loss: 0.3770 - val_mse: 0.3770 - lr: 0.0100 - 20s/epoch - 15ms/step
Epoch 3/50
1309/1309 - 20s - loss: 0.3965 - mse: 0.3749 - val_loss: 0.3804 - val_mse: 0.3804 - lr: 0.0100 - 20s/epoch - 15ms/step
Epoch 4/50
1309/1309 - 20s - loss: 0.3960 - mse: 0.3744 - val_loss: 0.3735 - val_mse: 0.3735 - lr: 0.0100 - 20s/epoch - 15ms/step
Epoch 5/50
1309/1309 - 20s - loss: 0.3950 - mse: 0.3735 - val_loss: 0.3802 - val_mse: 0.3802 - lr: 0.0100 - 20s/epoch - 15ms/step
Epoch 6/50
1309/1309 - 20s - loss: 0.3940 - mse: 0.3726 - val_loss: 0.3741 - val_mse: 0.3741 - lr: 0.0100 - 20s/epoch - 15ms/s