In [None]:
from google.colab import drive 
drive.mount('/content/gdrive')

Mounted at /content/gdrive


In [None]:
import numpy as np
import tensorflow as tf
import matplotlib.pyplot as plt
import pandas as pd
import sklearn.model_selection as ms
from tensorflow.keras.preprocessing import sequence
import tensorflow.keras.preprocessing.text as text
import sklearn.metrics as m
import sklearn.preprocessing as pre
import datetime
import sklearn.utils as u
import pandas as pd
np.random.seed(0)
import tensorflow as tf

tf.keras.backend.set_epsilon(1)
mape = tf.keras.losses.MeanAbsolutePercentageError()

In [None]:
data = pd.read_csv('gdrive/My Drive/train.csv')

In [None]:
def features_target_extraction(data, history, prediction_size):
    total_features = []
    prediction = []

    start = history
    end = len(data) - prediction_size

    for i in range(start, end):
        # Get indices for the past n number of days
        indexes = range(i-history, i)
        # Get the features for the past n number of days
        features = data[indexes]
        total_features.append(np.reshape(features, (len(features), 1)))
        # Get the target values for the n number of days
        prediction.append(data[i:i+prediction_size])

    return np.array(total_features), np.array(prediction)


In [None]:
def features_extraction(dataset, history):
    features = []
    features_to_append = dataset[-history:]
    features.append(np.reshape(features_to_append, (len(features_to_append), 1)))
    return np.array(features)

In [None]:
threshold = '2017-10-03'

In [None]:
history = 360
target = 90
non_sales_features = 2

x_train = np.zeros((0, history, 1))
x2_train = np.zeros((0, non_sales_features))
y_train = np.zeros((0, target))

x_valid = np.zeros((0, history, 1))
x2_valid = np.zeros((0, non_sales_features))
y_valid = np.zeros((0, target))

for storeID in range(1, data.store.max()+1):
    for itemID in range(1, data.item.max()+1):

        # Get the subset of the dataset which contains data for a particular store-item combination
        subset = data[(data.store == storeID) & (data.item == itemID)]

        train_temp = subset[subset.date < threshold]
        valid_temp = subset[subset.date >= threshold]

        # Get sales values for the past 360 days and sales values for the next 90 days
        x_temp_train, y_temp_train = features_target_extraction(train_temp.sales.values, history, target)

        x_temp_valid = train_temp.sales.tail(history).values
        x_temp_valid = np.reshape(x_temp_valid, (1, history, 1))

        y_temp_valid = np.reshape(valid_temp.sales.values, (1, target))

        ## Array containing the store and item ID values
        x2_temp_train = np.repeat(np.array([[s, i]]), repeats=x_temp_train.shape[0], axis=0)
        x2_temp_valid = np.repeat(np.array([[s, i]]), repeats=x_temp_valid.shape[0], axis=0)

        x_train = np.concatenate((x_train, x_temp_train))
        x_valid = np.concatenate((x_valid, x_temp_valid))

        x2_train = np.concatenate((x2_train, x2_temp_train), axis=0)
        x2_valid = np.concatenate((x2_valid, x2_temp_valid), axis=0)

        y_train = np.concatenate((y_train, y_temp_train))
        y_valid = np.concatenate((y_valid, y_temp_valid))


In [None]:
n_features = 1
n_seq = 4
n_steps = 90

x_train_reshaped = x_train.reshape((x_train.shape[0], n_seq, 1, n_steps, n_features))
x_valid_reshaped = x_valid.reshape((x_valid.shape[0], n_seq, 1, n_steps, n_features))

x_train_reshaped.shape, x_valid_reshaped.shape

((643000, 4, 1, 90, 1), (500, 4, 1, 90, 1))

In [None]:
one_hot_encoder = pre.OneHotEncoder(sparse=False, handle_unknown='ignore')
x2_train = pd.DataFrame(one_hot_encoder.fit_transform(x2_train))
x2_valid = pd.DataFrame(one_hot_encoder.transform(x2_valid))

In [None]:
x_train_final, x2_train_final, y_train_final = u.shuffle(x_train_reshaped, x2_train, y_train)

In [None]:
x_valid_final, x2_valid_final, y_valid_final = u.shuffle(x_valid_reshaped, x2_valid, y_valid)

In [None]:
BATCH_SIZE = 250

x_train_dataset = tf.data.Dataset.from_tensor_slices((x_train_final, x2_train_final))
y_train_dataset = tf.data.Dataset.from_tensor_slices(y_train_final)
train_dataset = tf.data.Dataset.zip((x_train_dataset, y_train_dataset))
train_dataset = train_dataset.cache().shuffle(x_train.shape[0]).batch(BATCH_SIZE).repeat()

x_val_dataset = tf.data.Dataset.from_tensor_slices((x_valid_final, x2_valid_final))
y_val_dataset = tf.data.Dataset.from_tensor_slices(y_valid_final)

val_dataset = tf.data.Dataset.zip((x_val_dataset, y_val_dataset))
val_dataset = val_dataset.batch(BATCH_SIZE).repeat()

In [None]:
model_name = 'model.h5'
model_checkpoint_cb = tf.keras.callbacks.ModelCheckpoint(model_name, save_best_only=True, monitor='val_loss', mode='min')

In [None]:
reduce_lr_cb = tf.keras.callbacks.ReduceLROnPlateau(monitor='val_loss', factor=0.2, patience=2, min_lr=0.00001)
adam = tf.keras.optimizers.Adam(learning_rate=0.001)
dropout_rate = 0.35

In [None]:
## Building the model
input1 = tf.keras.layers.Input(shape=(n_seq, 1, n_steps, n_features))
convLSTM = tf.keras.layers.ConvLSTM2D(filters=256, recurrent_dropout=dropout_rate, padding='same', return_sequences=True,  kernel_size=(1,3), activation='relu')(input1)
convLSTM = tf.keras.layers.BatchNormalization()(convLSTM)
convLSTM = tf.keras.layers.Dropout(dropout_rate)(convLSTM)

convLSTM = tf.keras.layers.ConvLSTM2D(filters=256, recurrent_dropout=dropout_rate, padding='same', return_sequences=True,  kernel_size=(1,3), activation='relu')(i1)
convLSTM = tf.keras.layers.BatchNormalization()(convLSTM)
convLSTM = tf.keras.layers.Dropout(dropout_rate)(convLSTM)

convLSTM = tf.keras.layers.ConvLSTM2D(filters=256, recurrent_dropout=dropout_rate, kernel_size=(1,3), activation='relu')(convLSTM)
convLSTM = tf.keras.layers.BatchNormalization()(convLSTM)
convLSTM = tf.keras.layers.Dropout(dropout_rate)(convLSTM)
flatten = tf.keras.layers.Flatten()(convLSTM)

input2 = tf.keras.layers.Input(shape=(x2_train.shape[1]))
ohl = tf.keras.layers.Dense(units=10, activation='relu')(input2)

merge = tf.keras.layers.concatenate([flatten, ohl])
dense = tf.keras.layers.Dense(units=150, activation='relu')(merge)
output = tf.keras.layers.Dense(90)(dense)

final_model = tf.keras.models.Model(inputs=[input1, in2], outputs=output)

final_model.compile(optimizer=adam, loss='mape')


In [None]:
final_model.summary()

Model: "model"
__________________________________________________________________________________________________
 Layer (type)                   Output Shape         Param #     Connected to                     
 input_1 (InputLayer)           [(None, 4, 1, 90, 1  0           []                               
                                )]                                                                
                                                                                                  
 conv_lstm2d_1 (ConvLSTM2D)     (None, 4, 1, 90, 25  790528      ['input_1[0][0]']                
                                6)                                                                
                                                                                                  
 batch_normalization_1 (BatchNo  (None, 4, 1, 90, 25  1024       ['conv_lstm2d_1[0][0]']          
 rmalization)                   6)                                                            

In [None]:
no_of_batches = 600
iterations = 50

history = final_model.fit(train_dataset, 
                epochs=iterations, 
                callbacks=[reduce_lr_cb, model_checkpoint_cb],
                steps_per_epoch=no_of_batches, 
                verbose=1,
                validation_data=val_dataset, validation_steps=100)


model = tf.keras.models.load_model(model_name)

y_pred = pd.DataFrame()
for i in range(1, data.item.max()+1):
    for s in range(1, data.store.max()+1):
        t = data[(data.store == s) & (data.item == i)]

        x_t = features_extraction(t.sales.values, history)
        x_t = x_t.reshape((x_t.shape[0], n_seq, 1, n_steps, n_features))

        x2_t = np.array([[s, i]])
        x2_t = pd.DataFrame(one_hot_encoder.transform(x2_t))

        y_t = pd.DataFrame()
        y_t['sales'] = model.predict([x_t, x2_t]).reshape((-1))
        y_pred = y_pred.append(y_t, ignore_index=True)

y_pred['Id'] = y_pred.index
y_pred


y_pred.to_csv("gdrive/My Drive/submission_CNN_LSTM.csv", index=False)