In [None]:
import numpy as np
import pandas as pd
import os
import matplotlib.pyplot as plt
import seaborn as sns
from statsmodels.tsa.deterministic import CalendarFourier, DeterministicProcess
import copy

from sklearn.model_selection import train_test_split
from sklearn.preprocessing import RobustScaler
from sklearn.linear_model import LinearRegression, Lasso, Ridge, ElasticNet
from sklearn import metrics

from sklearn.model_selection import train_test_split
from sklearn.preprocessing import RobustScaler

from tensorflow import keras
import tensorflow as tf
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense, Dropout, BatchNormalization

from tensorflow.keras.callbacks import ReduceLROnPlateau

In [None]:
store_sales = pd.read_csv(
    '../input/train.csv',
    usecols=['store_nbr', 'family', 'date', 'sales'],
    dtype={
        'store_nbr': 'category',
        'family': 'category',
        'sales': 'float32',
    },
    parse_dates=['date'],
    infer_datetime_format=True,
)

store_sales['date'] = store_sales.date.dt.to_period('D')
store_sales = store_sales.set_index(['store_nbr', 'family', 'date']).sort_index()

y = store_sales.unstack(['store_nbr', 'family']).loc["2017"]

fourier = CalendarFourier(freq='M', order=3)

dp = DeterministicProcess(
    index=y.index,
    constant=False,
    order=5,
    seasonal=True,
    additional_terms=[fourier],
    drop=True,
)

X = dp.in_sample()
X['NewYear'] = (X.index.dayofyear == 1)

In [None]:
X_train, X_val, y_train, y_val = train_test_split(X, y,test_size=0.001, random_state=13)
transformerL = RobustScaler().fit(X_train)

X_val = transformerL.transform(X_val)
X_train = transformerL.transform(X_train)

In [None]:
modelL = Lasso(alpha=1, fit_intercept = True, max_iter=7000).fit(X_train, y_train)
#print(model1.score(X_train, y_train))
y_predL = modelL.predict(X_val)
#print(model1.score(X_val, y_val))
print('Mean Absolute Error:', metrics.mean_absolute_error(y_val, y_predL))
print('Root Mean Squared Error:', np.sqrt(metrics.mean_squared_error(y_val, y_predL)))

y_predL[y_predL < 0] = 0

print(metrics.mean_absolute_error(y_val, y_predL/1.03))

print('======')

modelR = Ridge(alpha=0.4, fit_intercept = True, max_iter=7000).fit(X_train, y_train)
#print(model1.score(X_train, y_train))
y_predR = modelR.predict(X_val)
#print(model1.score(X_val, y_val))
print('Mean Absolute Error:', metrics.mean_absolute_error(y_val, y_predR))
print('Root Mean Squared Error:', np.sqrt(metrics.mean_squared_error(y_val, y_predR)))

y_predL[y_predL < 0] = 0

print(metrics.mean_absolute_error(y_val, y_predL/1.03))

print('======')


print(metrics.mean_absolute_error(y_val, 0.5*(y_predL + y_predR)/1.03))

In [None]:
X_train, X_val, y_train, y_val = train_test_split(X, y, random_state=13)
transformerKERAS = RobustScaler().fit(X_train)

X_val = transformerKERAS.transform(X_val)
X_train = transformerKERAS.transform(X_train)

In [None]:
reduce_lr = ReduceLROnPlateau(monitor='val_loss', factor=0.2, patience=20, min_lr=0.000001, verbose=1, mode='min')

def create_model():

    model = Sequential()
    model.add(Dense(units=500, activation='relu', input_dim=19))
    model.add(Dense(units=2000, activation='relu'))
    model.add(Dense(units=1500, activation='relu'))
    model.add(Dense(units=1782, activation='swish'))

    model.compile(loss='mae', optimizer='adam')

    return model

model = create_model()
model.fit(X_train, y_train, epochs=5000, batch_size=2000, validation_data=(X_val, y_val),callbacks=[reduce_lr])

y_pred1 = model.predict(X_val)
#print(model1.score(X_val, y_val))

y_pred1[y_pred1 < 0] = 0

print('Mean Absolute Error:', metrics.mean_absolute_error(y_val, y_pred1))
print('Root Mean Squared Error:', np.sqrt(metrics.mean_squared_error(y_val, y_pred1)))

In [None]:
df_test = pd.read_csv(
    '../input/test.csv',
    dtype={
        'store_nbr': 'category',
        'family': 'category',
        'onpromotion': 'uint32',
    },
    parse_dates=['date'],
    infer_datetime_format=True,
)
df_test['date'] = df_test.date.dt.to_period('D')
df_test = df_test.set_index(['store_nbr', 'family', 'date']).sort_index()

X_test = dp.out_of_sample(steps=16)
X_test.index.name = 'date'
X_test['NewYear'] = (X_test.index.dayofyear == 1)

X_test_copy = copy.deepcopy(X_test)

X_test = transformerL.transform(X_test)

predictionL = modelL.predict(X_test)
predictionL[predictionL < 0] = 0
predictionL = predictionL/1.03

predictionR = modelR.predict(X_test)
predictionR[predictionR < 0] = 0
predictionR = predictionR/1.03

X_test = copy.deepcopy(X_test_copy)

X_test = transformerKERAS.transform(X_test)
predictionKERAS = model.predict(X_test)
predictionKERAS[predictionKERAS < 0] = 0
predictionKERAS = predictionKERAS/1.03

prediction = 0.8*(predictionL*0.7 + predictionR*0.3) + 0.2*predictionKERAS

y_submit = pd.DataFrame(prediction, index=X_test_copy.index, columns=y.columns)

y_submit = y_submit.stack(['store_nbr', 'family'])

y_submit = y_submit.join(df_test.id).reindex(columns=['id', 'sales'])

y_submit.to_csv('submission.csv', index=False)

In [None]:
y_submit