In [83]:
import hydra
import pandas as pd
import numpy as np
import tensorflow as tf
from sklearn.preprocessing import MinMaxScaler
from sklearn.model_selection import train_test_split
from sklearn.metrics import mean_absolute_error, mean_squared_error
from keras.models import Sequential
from keras.layers import LSTM, Dense, LayerNormalization

import matplotlib.pyplot as plt
from math import sqrt
from statsmodels.tsa.arima.model import ARIMA


In [84]:
num_cols = ['Holiday_Flag', 'Temperature','Fuel_Price', 'CPI', 'Unemployment']

cate_cols = ['Store']

label = 'Weekly_Sales'

In [85]:
def one_hot_encode(*arrays):
    return [pd.get_dummies(arr).values for arr in arrays]

def create_dataset(dataset, time_steps=1):
    data_x, data_y = [], []
    for i in range(len(dataset) - time_steps):
        a = dataset[i:(i + time_steps), :]
        data_x.append(a)
        data_y.append(dataset[i + time_steps, 0])
    return np.array(data_x), np.array(data_y)

def parse_cate_cols(df, cate_cols, num_cols):
    cate_encoded = [one_hot_encode(df[cate])[0] for cate in cate_cols]
    stack_cols = [df[label], df[num_cols]] + cate_encoded
    sales_with_features = np.column_stack(stack_cols)
    return sales_with_features

def inverse_y_array(y, shape, scaler):
    dummies = [0] * (shape - 1)
    y_reform = np.array([[i] + dummies if type(i) != np.ndarray else [i[0]] + dummies for i in y ])
    rescale_y = scaler.inverse_transform(y_reform)
    rescale_y = [round(i[0],2) for i in rescale_y]
    return rescale_y

    

In [86]:
input_data_path = "../data/raw/train.csv"
df = pd.read_csv(input_data_path)
sales_with_features = parse_cate_cols(df, cate_cols, num_cols)
scaler = MinMaxScaler(feature_range=(0, 1))
sales_data_normalized = scaler.fit_transform(sales_with_features)
time_steps = 10
X, y = create_dataset(sales_data_normalized, time_steps)
shape = X.shape

In [87]:
# Perform time-based split
train_size = int(0.8 * len(X))
print(train_size)
X_train, X_test, y_train, y_test = train_test_split(X, y, train_size=train_size, shuffle=False)


5140


In [88]:
model = Sequential()
model.add(LSTM(units=50,
               dropout=0.1,
               recurrent_dropout=0.1,
               return_sequences=True,
               input_shape=(X.shape[1], X.shape[2]),
               # activation='LeakyReLU',
               # recurrent_activation='LeakyReLU'
               ))
model.add(LayerNormalization(axis=1, beta_initializer='ones', gamma_initializer='zeros'))
model.add(LSTM(units=50,
               dropout=0.1,
               recurrent_dropout=0.1,
               # return_sequences=True,
            #    activation='ReLU',
            #    recurrent_activation='ReLU'
               ))
model.add(LayerNormalization(axis=1, beta_initializer='ones', gamma_initializer='zeros'))
model.add(Dense(units=1))


opt = tf.optimizers.Adam(learning_rate=0.0015)
model.compile(optimizer=opt, loss='mean_squared_error')



In [96]:
model.summary()

Model: "sequential_5"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 lstm_10 (LSTM)              (None, 10, 50)            20400     
                                                                 
 layer_normalization_10 (La  (None, 10, 50)            20        
 yerNormalization)                                               
                                                                 
 lstm_11 (LSTM)              (None, 50)                20200     
                                                                 
 layer_normalization_11 (La  (None, 50)                100       
 yerNormalization)                                               
                                                                 
 dense_5 (Dense)             (None, 1)                 51        
                                                                 
Total params: 40771 (159.26 KB)
Trainable params: 4077

In [89]:
model.fit(X_train, y_train, epochs=2, batch_size=1, verbose=1)

Epoch 1/2
Epoch 2/2


<keras.src.callbacks.History at 0x2cdca6b90>

In [90]:
model.history.history

{'loss': [0.011578895151615143, 0.005821277387440205]}

In [91]:
eval_metric = model.evaluate(X_test,y_test)
y_predict = model.predict(X_test)
print(eval_metric)
#0.008298889733850956

0.004010303411632776


In [92]:
y_test_reform = inverse_y_array(y_test, shape[2], scaler)
y_predict_reform = inverse_y_array(y_predict, shape[2], scaler)

In [93]:
y_test_reform = inverse_y_array(y_test, shape[2], scaler)
y_predict_reform = inverse_y_array(y_predict, shape[2], scaler)

mae = mean_absolute_error(y_test_reform, y_predict_reform)
mse = mean_squared_error(y_test_reform, y_predict_reform)
rmse = np.sqrt(mse)

mae_scale = mean_absolute_error(y_test, y_predict)
mse_scale = mean_squared_error(y_test, y_predict)
rmse_scale = np.sqrt(mse_scale)

print(f"mae:{mae}, mse:{mse}, rmse:{rmse}")
print(f"mae:{mae_scale}, mse:{mse_scale}, rmse:{rmse_scale}")

mae:180708.9382490272, mse:52225056031.46898, rmse:228528.0202326817
mae:0.05007590772041748, mse:0.004010304104764538, rmse:0.06332696191011013


In [94]:
df_test = pd.DataFrame({'y_true':y_test_reform,'y_predict':y_predict_reform})
df_test['diff'] = df_test['y_predict'] - df_test['y_true']
df_test['ratio(%)'] = 100 * abs(df_test['diff']) / df_test['y_true']
display(df_test.sort_values('ratio(%)', ascending=False))

Unnamed: 0,y_true,y_predict,diff,ratio(%)
1046,241937.11,641147.02,399209.91,165.005654
1042,264214.12,655977.58,391763.46,148.274990
1098,263917.85,640931.45,377013.60,142.852634
1049,276157.80,661917.11,385759.31,139.688001
1035,268708.43,641642.01,372933.58,138.787451
...,...,...,...,...
890,657108.77,656568.66,-540.11,0.082195
495,972373.81,972897.31,523.50,0.053837
596,1338132.72,1338258.83,126.11,0.009424
659,1214944.29,1215006.32,62.03,0.005106
