### Importing Libraries

In [None]:
import pandas as pd
import numpy as np
from keras.layers.advanced_activations import PReLU

In [2]:
import keras
print(keras.__version__)

2.0.8


In [3]:
from random import randint
from numpy import array
from numpy import argmax
from numpy import array_equal
from keras.utils import to_categorical
from keras.models import Model
from keras.layers import Input
from keras.layers import LSTM
from keras.layers import Dense

In [4]:
from sklearn.metrics import mean_squared_error

## Pre-processing data

Attempting to fit on numerical data. The seq to seq method is generally for one-hot encoded variables but here we use it on numerical data.

In [60]:
df_train = pd.read_csv(
    'D:/data mining/infor project/train.csv', usecols=[1, 2, 3, 4, 5],
    dtype={'onpromotion': bool},
    converters={'unit_sales': lambda u: np.log1p(
    float(u)) if float(u) > 0 else 0},
    parse_dates=["date"],
    skiprows=range(1, 66458909)  # 2016-01-01
)

In [61]:
df_test = pd.read_csv(
    "D:/data mining/infor project/test.csv", usecols=[0, 1, 2, 3, 4],
    dtype={'onpromotion': bool},
    parse_dates=["date"]  # , date_parser=parser
).set_index(
    ['store_nbr', 'item_nbr', 'date']
)

items = pd.read_csv(
    "D:/data mining/infor project/items.csv",
).set_index("item_nbr")

In [62]:
df_2017 = df_train.loc[df_train.date>=pd.datetime(2017,1,1)]

In [63]:
del df_train

In [64]:
## Unstacking the date varialbe (pivoting the dates into columns)
df_2017 = df_2017.set_index(
    ["store_nbr", "item_nbr", "date"])[["unit_sales"]].unstack(
        level=-1).fillna(0)

In [65]:
df_2017.columns = df_2017.columns.get_level_values(1)

In [66]:
stores_items = pd.DataFrame(index=df_2017.index)
test_ids = df_test[['id']]

In [67]:
## Repeating and reordering the items table to align with the df_2017 tables 
items = items.reindex( stores_items.index.get_level_values(1))

## Creating the training, validation, and testing data

In [68]:
X1_train = df_2017.iloc[:,145:195].as_matrix()
X1_train = X1_train.reshape((X1_train.shape[0],X1_train.shape[1],1))

In [69]:
y_train = df_2017.iloc[:,195:211].as_matrix()
y_train = y_train.reshape((y_train.shape[0],y_train.shape[1],1))

In [70]:
X2_train = np.insert(y_train[:,:-1],0,[-1],axis =1)
X2_train = X2_train.reshape((X2_train.shape[0],X2_train.shape[1],1))

In [73]:
X1_val = df_2017.iloc[:,161:211].as_matrix()
X1_val = X1_val.reshape((X1_val.shape[0],X1_val.shape[1],1))

In [74]:
y_val = df_2017.iloc[:,211:].as_matrix()
y_val = y_val.reshape((y_val.shape[0],y_val.shape[1],1))

In [75]:
X2_val = np.insert(y_val[:,:-1],0,[-1],axis =1)
X2_val = X2_val.reshape((X2_val.shape[0],X2_val.shape[1],1))

In [76]:
X1_test = df_2017.iloc[:,177:].as_matrix()
X1_test = X1_test.reshape((X1_test.shape[0],X1_test.shape[1],1))

## define model

In [None]:
#Given seq2seq model takes input/output sequences and defines the architecture with 'n_units' of LSTM cells
#encoder layer itself produces not only encoder_outputs but 2 types of states- 'hidden' and 'cell' as well
#While training, we junk encoder_output but employs real values + hidden and cell states of it to decode(process defined here)
#While inference, we use encoder output +hidden and cell states of it to decode(process defined in 'for loop' in prediction code)

# returns train, inference_encoder and inference_decoder
def define_models(n_input, n_output, n_units):
    
	# define training encoder
	encoder_inputs = Input(shape=(None, n_input))
	encoder = LSTM(n_units, return_state=True)
	encoder_outputs, state_h, state_c = encoder(encoder_inputs)
	encoder_states = [state_h, state_c]
    
	# define training decoder
	decoder_inputs = Input(shape=(None, n_output))
	decoder_lstm = LSTM(n_units, return_sequences=True, return_state=True)
	decoder_outputs, _, _ = decoder_lstm(decoder_inputs, initial_state=encoder_states)
	decoder_dense = Dense(n_output, activation='relu')
	decoder_outputs = decoder_dense(decoder_outputs)
	model = Model([encoder_inputs, decoder_inputs], decoder_outputs)
    
	# define inference encoder
	encoder_model = Model(encoder_inputs, encoder_states)
    
	# define inference decoder
	decoder_state_input_h = Input(shape=(n_units,))
	decoder_state_input_c = Input(shape=(n_units,))
	decoder_states_inputs = [decoder_state_input_h, decoder_state_input_c]
	decoder_outputs, state_h, state_c = decoder_lstm(decoder_inputs, initial_state=decoder_states_inputs)
	decoder_states = [state_h, state_c]
	decoder_outputs = decoder_dense(decoder_outputs)
	decoder_model = Model([decoder_inputs] + decoder_states_inputs, [decoder_outputs] + decoder_states)
    
	# return all models
	return model, encoder_model, decoder_model
 

In [77]:
#parametrize the model with 1 encoder/1 decoder layer and 128 LSTM cells
train, infenc, infdec = define_models(1, 1, 128)
train.compile(optimizer='adam', loss='mse', metrics=['mse'])

In [78]:
#giving higher weight to perishable items
sample_weights=np.array( pd.concat([items["perishable"]] * 1) * 0.25 + 1 )

#trainig..
train.fit([X1_train, X2_train], y_train, epochs=5,sample_weight=sample_weights,
          validation_data=([X1_val, X2_val], y_val))

Train on 167515 samples, validate on 167515 samples
Epoch 1/5
Epoch 2/5
Epoch 3/5
Epoch 4/5
Epoch 5/5


<keras.callbacks.History at 0x22f821027f0>

## Predict on Validation

In [79]:
# encode
state = infenc.predict(X1_val)
# start of sequence input
target_seq = array([-1 for _ in range(167515)]).reshape(167515, 1, 1)
# collect predictions
output = list()
for t in range(16):
    # predict next char
    yhat, h, c = infdec.predict([target_seq] + state)
    # store prediction
    output.append(yhat[:,0,0])
    # update state
    state = [h, c]
    # update target sequence (output of LSTM cell serving as input in next timestep)
    target_seq = yhat

In [80]:
y_hat_val =  array(output).transpose()

In [81]:
weights=pd.concat([items["perishable"]]) * 0.25 + 1
print("Unweighted validation mse:", mean_squared_error(
    y_val.reshape((167515,16)), y_hat_val))

print("Full validation mse:       ", mean_squared_error(
     y_val.reshape((167515,16)), y_hat_val, sample_weight=weights))

Unweighted validation mse: 0.405532251137
Full validation mse:        0.406096756002


## Final predictions on Holdout set

In [64]:
# encode
state = infenc.predict(X1_test)
# start of sequence input
target_seq = array([-1 for _ in range(167515)]).reshape(167515, 1, 1)
# collect predictions
output = list()
for t in range(16):
    # predict next char
    yhat, h, c = infdec.predict([target_seq] + state)
    # store prediction
    output.append(yhat[:,0,0])
    # update state
    state = [h, c]
    # update target sequence
    target_seq = yhat

In [65]:
y_test =  array(output).transpose()
df_preds = pd.DataFrame(
    y_test, index=stores_items.index,
    columns=pd.date_range("2017-08-16", periods=16)
).stack().to_frame("unit_sales")
df_preds.index.set_names(["store_nbr", "item_nbr", "date"], inplace=True)

In [29]:
submission = test_ids.join(df_preds, how="left").fillna(0)
submission["unit_sales"] = np.clip(np.expm1(submission["unit_sales"]), 0, 1000)
submission.to_csv('seq2seq1.csv', float_format='%.4f', index=None)

## Get predictions(as feature) to be used in meta model defined comprehensively in other notebook

In [71]:
X1_meta = []
y_meta = []
for i in range(100,131):
    X1_meta_tmp = df_2017.iloc[:,i:(i+50)].as_matrix()
    X1_meta_tmp = X1_meta_tmp.reshape((X1_meta_tmp.shape[0],X1_meta_tmp.shape[1],1))
    X1_meta.append(X1_meta_tmp)
    y_meta_tmp = df_2017.iloc[:,(i+50):(i+66)].as_matrix()
    y_meta_tmp = y_meta_tmp.reshape((y_meta_tmp.shape[0],y_meta_tmp.shape[1],1))
    y_meta.append(y_meta_tmp)
X1_meta = np.concatenate(X1_meta,axis =0)
y_meta = np.concatenate(y_meta,axis =0)

In [82]:
## Predict for meta model
# encode
state = infenc.predict(X1_meta)
# start of sequence input
target_seq = array([-1 for _ in range(X1_meta.shape[0])]).reshape(X1_meta.shape[0], 1, 1)
# collect predictions
output = list()
for t in range(16):
    # predict next char
    yhat, h, c = infdec.predict([target_seq] + state)
    # store prediction
    output.append(yhat[:,0,0])
    # update state
    state = [h, c]
    # update target sequence
    target_seq = yhat

In [83]:
y_hat_meta =  array(output).transpose()

In [85]:
weights=pd.concat([items["perishable"]]*31) * 0.25 + 1
print("Unweighted validation mse:", mean_squared_error(
    y_meta.reshape((167515*31,16)), y_hat_meta))

print("Full validation mse:       ", mean_squared_error(
     y_meta.reshape((167515*31,16)), y_hat_meta, sample_weight=weights))

Unweighted validation mse: 0.411570710818
Full validation mse:        0.412970885846
