In [None]:
num_days_to_predict = 3

In [None]:
import warnings
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
from keras import optimizers
from keras.utils import plot_model
from keras.models import Sequential, Model
from keras.layers.convolutional import Conv1D, MaxPooling1D
from keras.layers import Dense, LSTM, RepeatVector, TimeDistributed, Flatten
from sklearn.metrics import mean_squared_error
from sklearn.model_selection import train_test_split
import plotly.plotly as py
import plotly.graph_objs as go
from plotly.offline import init_notebook_mode, iplot

%matplotlib inline
warnings.filterwarnings("ignore")
init_notebook_mode(connected=True)

# Set seeds to make the experiment more reproducible.
from tensorflow import set_random_seed
from numpy.random import seed
set_random_seed(1)
seed(1)

In [None]:
train = pd.read_csv('../input/demand-forecasting-kernels-only/train.csv', parse_dates=['date'])
test = pd.read_csv('../input/demand-forecasting-kernels-only/test.csv', parse_dates=['date'])

train = train[train.store < 2]
test = test[test.store < 2]
train = train[train.item < 11]
test = test[test.item < 11]
train = train[(train['date'] >= '2017-01-01')]

In [None]:
train.describe()

In [None]:
print(train)


In [None]:
train.head()

In [None]:
print('Min date from train set: %s' % train['date'].min().date())
print('Max date from train set: %s' % train['date'].max().date())

In [None]:
lag_size = num_days_to_predict  #(test['date'].max().date() - train['date'].max().date()).days
print('Max date from train set: %s' % train['date'].max().date())
print('Max date from test set: %s' % test['date'].max().date())
print('Forecast lag size', lag_size)

In [None]:
daily_sales = train.groupby('date', as_index=False)['sales'].sum()
store_daily_sales = train.groupby(['store', 'date'], as_index=False)['sales'].sum()
item_daily_sales = train.groupby(['item', 'date'], as_index=False)['sales'].sum()

In [None]:
daily_sales_sc = go.Scatter(x=daily_sales['date'], y=daily_sales['sales'])
layout = go.Layout(title='Daily sales', xaxis=dict(title='Date'), yaxis=dict(title='Sales'))
fig = go.Figure(data=[daily_sales_sc], layout=layout)
iplot(fig)

In [None]:
store_daily_sales_sc = []
for store in store_daily_sales['store'].unique():
    current_store_daily_sales = store_daily_sales[(store_daily_sales['store'] == store)]
    store_daily_sales_sc.append(go.Scatter(x=current_store_daily_sales['date'], y=current_store_daily_sales['sales'], name=('Store %s' % store)))

layout = go.Layout(title='Store daily sales', xaxis=dict(title='Date'), yaxis=dict(title='Sales'))
fig = go.Figure(data=store_daily_sales_sc, layout=layout)
iplot(fig)

In [None]:
item_daily_sales_sc = []
i = 0
for item in item_daily_sales['item'].unique():
    if i > 9:
        break
    current_item_daily_sales = item_daily_sales[(item_daily_sales['item'] == item)]
    item_daily_sales_sc.append(go.Scatter(x=current_item_daily_sales['date'], y=current_item_daily_sales['sales'], name=('Item %s' % item)))
    i = i + 1

layout = go.Layout(title='Item daily sales', xaxis=dict(title='Date'), yaxis=dict(title='Sales'))
fig = go.Figure(data=item_daily_sales_sc, layout=layout)
iplot(fig)

In [None]:
train_gp = train.sort_values('date').groupby(['item', 'store', 'date'], as_index=False)
print(type(train_gp))
train_gp = train_gp.agg({'sales':['mean']})
train_gp.columns = ['item', 'store', 'date', 'sales']
print(train_gp.shape)


train_gp[train_gp.date == '2017-12-31']

print(train_gp.head())

In [None]:
cols_to_focus = [364,729,1094,1459,1824,2189,2554,2919,3284,3649]

In [None]:
train_gp.shape

In [None]:
def series_to_supervised(data, window=1, lag=1, dropnan=True):
    cols, names = list(), list()
    # Input sequence (t-n, ... t-1)
    for i in range(window, 0, -1):
        cols.append(data.shift(i))
        names += [('%s(t-%d)' % (col, i)) for col in data.columns]
    # Current timestep (t=0)
    cols.append(data)
    names += [('%s(t)' % (col)) for col in data.columns]
    # Target timestep (t=lag)
    cols.append(data.shift(-lag))
    names += [('%s(t+%d)' % (col, lag)) for col in data.columns]
    # Put it all together
    agg = pd.concat(cols, axis=1)
    agg.columns = names
    # Drop rows with NaN values
    if dropnan:
        agg.dropna(inplace=True)
    return agg

In [None]:
window = 29
lag = lag_size
series = series_to_supervised(train_gp.drop('date', axis=1), window=window, lag=lag)
series.head()
series.shape
print(series)

In [None]:
last_item = 'item(t-%d)' % window
last_store = 'store(t-%d)' % window
series = series[(series['store(t)'] == series[last_store])]
series = series[(series['item(t)'] == series[last_item])]

In [None]:
#columns_to_drop = [('%s(t+%d)' % (col, lag)) for col in ['item', 'store']]
#for i in range(window, 0, -1):
#    columns_to_drop += [('%s(t-%d)' % (col, i)) for col in ['item', 'store']]
#series.drop(columns_to_drop, axis=1, inplace=True)
#series.drop(['item(t)', 'store(t)'], axis=1, inplace=True)

In [None]:
cols_to_focus = [334,699,1064,1429,1794,2159,2524,2889,3254]
out = np.array([])

In [None]:
# Label
labels_col = 'sales(t+%d)' % lag_size
labels = series[labels_col]
series = series.drop(labels_col, axis=1)

X_train, X_valid, Y_train, Y_valid = train_test_split(series, labels.values, test_size=0, random_state=0)
print('Train set shape', X_train.shape)
print('Validation set shape', X_valid.shape)
X_train.head()

In [None]:
series.head()

In [None]:
series

In [None]:
cols_to_focus = [364,729,1094,1459,1824,2189,2554,2919,3284]
to_run = []
for i in cols_to_focus:
    to_run.append(series.iloc[int(i)-num_days_to_predict+1].values)
    print(to_run)

to_run = np.array(to_run)

In [None]:
to_run = to_run.reshape((to_run.shape[0], to_run.shape[1], 1))
to_run.shape

In [None]:
epochs = 39
batch = 256
lr = 0.0003
adam = optimizers.Adam(lr)

In [None]:
X_train_series = X_train.values.reshape((X_train.shape[0], X_train.shape[1], 1))
X_valid_series = X_valid.values.reshape((X_valid.shape[0], X_valid.shape[1], 1))

print('Train set shape', X_train_series.shape)
print('Validation set shape', X_valid_series.shape)

In [None]:
print(X_train_series.shape)
print(to_run.shape)


In [None]:
model_lstm = Sequential()
model_lstm.add(LSTM(50, activation='relu', input_shape=(X_train_series.shape[1], X_train_series.shape[2])))
model_lstm.add(Dense(1))
model_lstm.compile(loss='mse', optimizer=adam)
model_lstm.summary()

In [None]:
lstm_history = model_lstm.fit(X_train_series, Y_train, validation_data=(X_valid_series, Y_valid), epochs=epochs, verbose=2)

In [None]:
lstm_train_pred = model_lstm.predict(X_train_series)
print('Train rmse:', np.sqrt(mean_squared_error(Y_train, lstm_train_pred)))


In [None]:
output = model_lstm.predict(to_run)

In [None]:
output

In [None]:
model_lstm.save("/kaggle/working/three-days")