In [1]:
%%HTML
<style>
   div#notebook-container    { width: 95%; }
   div#menubar-container     { width: 65%; }
   div#maintoolbar-container { width: 99%; }
</style>

In [2]:
# General imports
import numpy as np
import pandas as pd
import os, sys, gc, time, warnings, pickle, psutil, random
from multiprocessing import Pool        # Multiprocess Runs
from tqdm import tqdm_notebook, tqdm

import tensorflow as tf
from tensorflow import keras
from tensorflow.keras import layers
from attention import attention_3d_block
from tensorflow.keras.models import Model

import seaborn as sns
import matplotlib.pyplot as plt

sys.path.insert(0, os.path.abspath('../'))
from module.prepare_data import *
warnings.filterwarnings('ignore')

In [3]:
tf.__version__

'2.1.0'

In [4]:
tf.config.list_physical_devices('GPU')

[PhysicalDevice(name='/physical_device:GPU:0', device_type='GPU')]

In [5]:
########################### Vars
#################################################################################
VER = 3                          # Our model version
SEED = 42
TARGET      = 'sales'            # Our target
START_TRAIN = 0                  # We can skip some rows (Nans/faster training)
END_TRAIN   = 1913               # End day of our train set
P_HORIZON   = 28                 # Prediction horizon
#STORES ids
ORIGINAL = '../input/m5-forecasting-accuracy/'
STORES_IDS = pd.read_csv(ORIGINAL+'sales_train_validation.csv')['store_id']
STORES_IDS = list(STORES_IDS.unique())
#PATHS for Features
ORIGINAL = '../input/m5-forecasting-accuracy/'
BASE     = '../cache/ori_grid_part_1.pkl'
PRICE    = '../cache/ori_grid_part_2.pkl'
CALENDAR = '../cache/ori_grid_part_3.pkl'
LAGS     = '../cache/ori_lags_df_28.pkl'
MEAN_ENC = '../cache/ori_mean_encoding_df.pkl'
BASE_PATH = '../cache'
FINAL_TARGETS = 'sales'
SAV_BASE_PATH = '../cache/ver3'
PKL_BASE_PATH = BASE_PATH

########################### caculate mean and std
#################################################################################
# diff_series = []
# for id_, group in tqdm(BASE_GRID_DF[['id','d','sales']].groupby('id')):
#     diff_series += group['sales'].diff().dropna().tolist()
# diff_mean = np.mean(diff_series)
# diff_std = np.std(diff_series)
diff_mean, diff_std = 0.00022844736211235283, 2.9674834203072016

In [6]:
########################### dataset sample
#################################################################################
"""
dataset = tf.data.Dataset.range(10)
dataset = dataset.window(5, shift=1, drop_remainder=True)
dataset = dataset.flat_map(lambda window: window.batch(5))
dataset = dataset.map(lambda window: (window[:-1], window[-1:]))
dataset = dataset.shuffle(buffer_size=10)
dataset = dataset.batch(2).prefetch(1)
for x, y in dataset:
    print('x = ', x.numpy())
    print('y = ', y.numpy())
    
a  = gen_dataset(np.arange(100), window_size)
b = a.concatenate(gen_dataset(np.arange(2000,2100), window_size))
b = b.batch(batch_size).prefetch(1)


validaton_span = 28
window_size = 28
batch_size = 5
shuffle_buffer = 2000
train_dataset = train_dataset.shuffle(shuffle_buffer)
train_dataset = train_dataset.batch(5).prefetch(1)
valid_series = train_dataset.batch(1).prefetch(1)

def windowed_dataset(series, window_size, batch_size, shuffle_buffer):
    dataset = tf.data.Dataset.from_tensor_slices(series)
    dataset = dataset.window(window_size + 1, shift=1, drop_remainder=True)
    dataset = dataset.flat_map(lambda window: window.batch(window_size+1))
    dataset = dataset.shuffle(shuffle_buffer).map(lambda window: (window[:-1], window[-1]))
    dataset = dataset.batch(batch_size).prefetch(1)
    return dataset
    
    
"""

window_size = 28
def gen_dataset(series, window_size):
    dataset = tf.data.Dataset.from_tensor_slices(series)
    dataset = dataset.window(window_size + 1, shift=1, drop_remainder=True)
    dataset = dataset.flat_map(lambda window: window.batch(window_size+1))
    dataset = dataset.map(lambda window: (window[:-1], window[-1]))
    return dataset



In [7]:
BASE_GRID_DF = load_base_features(PKL_BASE_PATH, SAV_BASE_PATH, FINAL_TARGETS)
print(BASE_GRID_DF.info())

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 46881677 entries, 0 to 46881676
Data columns (total 44 columns):
 #   Column                           Dtype   
---  ------                           -----   
 0   id                               category
 1   item_id                          category
 2   dept_id                          category
 3   cat_id                           category
 4   store_id                         category
 5   state_id                         category
 6   d                                int16   
 7   sales                            float64 
 8   release                          int16   
 9   sell_price                       float16 
 10  price_max                        float16 
 11  price_min                        float16 
 12  price_std                        float16 
 13  price_mean                       float16 
 14  price_norm                       float16 
 15  price_nunique                    float16 
 16  item_nunique                     i

In [8]:
def get_train_valid(base_grid, store_id):
    
    train_dataset, valid_dataset = None, None
#     X_train, y_train, X_valid, y_valid = np.zeros((0,28)),np.zeros((0,1)),np.zeros((0,28)),np.zeros((0,1))
    X_train, y_train, X_valid, y_valid = [],[],[],[]
    grid = base_grid[base_grid['store_id']==store_id].reset_index(drop=True)
    for id_, group in tqdm(grid[['id','d','sales']].groupby('id')):
        if group.shape[0] == 0:
            continue

        validaton_span = 28
        window_size = 28

        series = group['sales'].diff().dropna().values
        series = (series-diff_mean)/diff_std

        train_series = series[:-validaton_span]
        valid_series = series[-(validaton_span+window_size):]
        
        train_dataset = gen_dataset(train_series, window_size)
        valid_dataset = gen_dataset(valid_series, window_size)
        for x, y in train_dataset:
            X_train.append(x.numpy())# = np.vstack([X_train, x.numpy()])
            y_train.append(y.numpy())# = np.vstack([y_train, y.numpy()])
        for x, y in valid_dataset:
            X_valid.append(x.numpy())# = np.vstack([X_valid, x.numpy()])
            y_valid.append(y.numpy())# = np.vstack([y_valid, y.numpy()])

    X_train = np.array(X_train)
    y_train = np.array(y_train).reshape(-1,1)
    X_valid = np.array(X_valid)
    y_valid = np.array(y_valid).reshape(-1,1)
    
    return X_train, y_train, X_valid, y_valid


def train_model(X_train, y_train, X_valid, y_valid, store_id, base_path):
    # model
    input = tf.keras.layers.Input(shape=[None])
    x = tf.keras.layers.Lambda(lambda x: tf.expand_dims(x, axis=-1), input_shape=[None])(input)
    x = tf.keras.layers.Bidirectional(tf.keras.layers.LSTM(64, return_sequences=True))(x)
    x = tf.keras.layers.Bidirectional(tf.keras.layers.LSTM(64, return_sequences=True))(x)
    x = attention_3d_block(x)
    x = tf.keras.layers.Dense(1)(x)
    output = tf.keras.layers.Lambda(lambda x: x*100)(x)

    model = Model(inputs=[input], outputs=[output])
    lr_schedule = tf.keras.callbacks.LearningRateScheduler(
        lambda epoch: 1e-8 * 10 **(epoch/20)
    )
    optimizer = tf.keras.optimizers.SGD(lr=1e-6, momentum =0.9)
    model.compile(loss='mse', optimizer=optimizer, metrics=[tf.keras.metrics.RootMeanSquaredError()])
    his = model.fit(X_train, y_train, validation_data=(X_valid, y_valid), batch_size=10, epochs=5, verbose=1)
    
    model.save_weights(f'{base_path}/bilstm_{store_id}.h5')
#     existingModel.load_weights('weightsfile.h5')   
#     model.save(f'{base_path}/bilstm_{store_id}.bin')
    return model


def train_model2():
    """
    model = tf.keras.models.Sequential(
        [
            tf.keras.layers.Lambda(lambda x: tf.expand_dims(x, axis=-1), input_shape=[None]),
            tf.keras.layers.Conv1D(filters=32,kernel_size=5,strides=1,padding="causal",activation="relu",input_shape=[None,1]),
            tf.keras.layers.LSTM(64, return_sequences=True),
            tf.keras.layers.LSTM(64),
            tf.keras.layers.Dense(1),
    #         tf.keras.layers.Lambda(lambda x: x*100),
        ]
    )
    optimizer = tf.keras.optimizers.SGD(lr=1e-5, momentum =0.9)
    model.compile(loss=tf.keras.losses.Huber(), optimizer=optimizer, metrics=[tf.keras.metrics.RootMeanSquaredError()])
    model.fit(train_dataset, validation_data=valid_dataset, epochs=20, callbacks=[lr_schedule], verbose=1)
    """
    raise Exception('no implement')

In [9]:
import warnings
warnings.filterwarnings("ignore")

In [10]:
for store_id in ['TX_1']:
    print(store_id)
    if os.path.exists(f'{SAV_BASE_PATH}/X_train_{store_id}_diff.npy'):
        X_train = np.load(f'{SAV_BASE_PATH}/X_train_{store_id}_diff.npy') # load
        y_train = np.load(f'{SAV_BASE_PATH}/y_train_{store_id}_diff.npy') # load
        X_valid = np.load(f'{SAV_BASE_PATH}/X_valid_{store_id}_diff.npy') # load
        y_valid = np.load(f'{SAV_BASE_PATH}/y_valid_{store_id}_diff.npy') # load
    else:
        X_train, y_train, X_valid, y_valid = get_train_valid(BASE_GRID_DF, store_id)
        np.save(f'{SAV_BASE_PATH}/X_train_{store_id}_diff.npy', X_train) # save
        np.save(f'{SAV_BASE_PATH}/y_train_{store_id}_diff.npy', y_train) # save
        np.save(f'{SAV_BASE_PATH}/X_valid_{store_id}_diff.npy', X_valid) # save
        np.save(f'{SAV_BASE_PATH}/y_valid_{store_id}_diff.npy', y_valid) # save
                      
    print(X_train.shape, y_train.shape, X_valid.shape, y_valid.shape)
    model = train_model(X_train, y_train, X_valid, y_valid, store_id, SAV_BASE_PATH)

TX_1
(4538790, 28) (4538790, 1) (85372, 28) (85372, 1)
Train on 4538790 samples, validate on 85372 samples
Epoch 1/5
 279060/4538790 [>.............................] - ETA: 68:47:40 - loss: 0.5421 - root_mean_squared_error: 0.7362

InternalError: GPU sync failed

In [None]:
123