In [1]:
import pandas as pd
import numpy as np
from pandas import read_csv
from pmdarima.arima import auto_arima
from datetime import datetime
import matplotlib.pyplot as plt
import tsfresh
import joblib
from pmdarima.arima import ADFTest
from tsfresh import extract_features
from statsmodels.tsa.arima.model import ARIMA
from sklearn.metrics import mean_absolute_error, mean_squared_error, mean_absolute_percentage_error

In [2]:
values = read_csv("../../Time_series_data.csv")

In [3]:
values.head(13)

Unnamed: 0,Month,Sales
0,2013-01,2815
1,2013-02,2672
2,2013-03,2755
3,2013-04,2721
4,2013-05,2946
5,2013-06,3036
6,2013-07,2282
7,2013-08,2212
8,2013-09,2922
9,2013-10,4301


In [4]:

values = values.rename(columns={values.columns[1]: 'Data'})
values = values.rename(columns={values.columns[0]: 'Time'})
values['Time'] = pd.to_datetime(
    values['Time'], errors='coerce')

In [5]:
freq = pd.infer_freq(values["Time"])

In [6]:
# end_date = pd.to_datetime(x) + pd.DateOffset(months=10)
# print(end_date)

In [7]:
values.set_index('Time', inplace=True)

In [8]:
from sklearn.model_selection import train_test_split
train, test = train_test_split(
    values, test_size=0.2, shuffle=False)

rnn_train = train
rnn_test = test

In [9]:
from sklearn.preprocessing import MinMaxScaler
scaler = MinMaxScaler()

scaler.fit(rnn_train)
scaled_train = scaler.transform(rnn_train)
scaled_test = scaler.transform(rnn_test)

In [10]:
from keras.preprocessing.sequence import TimeseriesGenerator

# define generator
n_input = 12
n_features = 1
generator = TimeseriesGenerator(
    scaled_train, scaled_train, length=n_input, batch_size=1)

from keras.models import Sequential
from keras.layers import Dense
from keras.layers import LSTM

In [11]:
# define model
model = Sequential()
model.add(LSTM(100, activation='relu',
            input_shape=(n_input, n_features)))
model.add(Dense(1))
model.compile(optimizer='adam', loss='mse')

model.summary()
model.fit(generator, epochs=50)

Model: "sequential"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 lstm (LSTM)                 (None, 100)               40800     
                                                                 
 dense (Dense)               (None, 1)                 101       
                                                                 
Total params: 40901 (159.77 KB)
Trainable params: 40901 (159.77 KB)
Non-trainable params: 0 (0.00 Byte)
_________________________________________________________________
Epoch 1/50
Epoch 2/50
Epoch 3/50
Epoch 4/50
Epoch 5/50
Epoch 6/50
Epoch 7/50
Epoch 8/50
Epoch 9/50
Epoch 10/50
Epoch 11/50
Epoch 12/50
Epoch 13/50
Epoch 14/50
Epoch 15/50
Epoch 16/50
Epoch 17/50
Epoch 18/50
Epoch 19/50
Epoch 20/50
Epoch 21/50
Epoch 22/50
Epoch 23/50
Epoch 24/50
Epoch 25/50
Epoch 26/50
Epoch 27/50
Epoch 28/50
Epoch 29/50
Epoch 30/50
Epoch 31/50
Epoch 32/50
Epoch 33/50
Epoch 34/50
Epoch 35/50
Epo

<keras.src.callbacks.History at 0x23b26d75760>

In [12]:
last_train_batch = scaled_train[-12:]
last_train_batch = last_train_batch.reshape((1, n_input, n_features))


In [13]:
last_train_batch

array([[[0.08636474],
        [0.10742931],
        [0.14558859],
        [0.1755651 ],
        [0.1096978 ],
        [0.19549542],
        [0.21421048],
        [0.0133679 ],
        [0.29555213],
        [0.39301628],
        [0.66993438],
        [0.93194523]]])

In [15]:
model.predict(last_train_batch)

test_predictions = []

first_eval_batch = scaled_train[-n_input:]
current_batch = first_eval_batch.reshape((1, n_input, n_features))



In [16]:
model.summary()

Model: "sequential"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 lstm (LSTM)                 (None, 100)               40800     
                                                                 
 dense (Dense)               (None, 1)                 101       
                                                                 
Total params: 40901 (159.77 KB)
Trainable params: 40901 (159.77 KB)
Non-trainable params: 0 (0.00 Byte)
_________________________________________________________________


In [17]:
for i in range(len(rnn_test)):
    # get the prediction value for the first batch
    current_pred = model.predict(current_batch)[0]

    # append the prediction into the array
    test_predictions.append(current_pred)
    
    # use the prediction to update the batch and remove the first value
    current_batch = np.append(current_batch[:, 1:, :], [
                                [current_pred]], axis=1)



In [18]:
current_batch

array([[[0.52306765],
        [0.79092807],
        [1.07825017],
        [0.37269959],
        [0.29219088],
        [0.28285432],
        [0.27916941],
        [0.2324615 ],
        [0.29330647],
        [0.31665212],
        [0.26231652],
        [0.50745201]]])

In [19]:
true_predictions = scaler.inverse_transform(test_predictions)
rnn_test['Predictions'] = true_predictions

In [20]:
forecast_errors = [rnn_test['Data'][i]-rnn_test['Predictions'][i] for i in range(len(rnn_test))]

In [21]:
forecast_errors

[-474.87092208862305,
 -959.6936868131161,
 -5.203043222427368,
 215.67917676270008,
 1145.9953997582197,
 189.0627538561821,
 -270.4504846930504,
 -1880.5994669646025,
 -573.0032794177532,
 -1048.2240431904793,
 -1484.4251158833504,
 -2211.8418452739716,
 -1825.2310213148594,
 -1615.5120266973972,
 -487.2708547115326,
 -230.78803339600563,
 175.72774021327496,
 118.71824181079865,
 -1183.4371067881584,
 -3397.77286785841,
 -1959.480173110962]

In [22]:
mean_forecast_error = np.mean(forecast_errors)

In [23]:
mae = mean_absolute_error(rnn_test['Data'], rnn_test['Predictions'])
mae

1021.5708230393274

In [24]:
mse = mean_squared_error(rnn_test['Data'], rnn_test['Predictions'])
mse

1796987.603271212

In [25]:
mape = mean_absolute_percentage_error(rnn_test['Data'], rnn_test['Predictions'])
mape

0.3144899062258788

In [26]:
model_folder = "Models"
model_filename = "RNN.pkl"
model_path = f"{model_folder}/{model_filename}"

In [27]:
# Tạo thư mục nếu chưa tồn tại
import os
os.makedirs(model_folder, exist_ok=True)

In [28]:
# Lưu mô hình
joblib.dump(model, model_path)

['Models/RNN.pkl']

In [29]:
loaded_model = joblib.load(model_path)

In [30]:
loaded_model.summary()

Model: "sequential"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 lstm (LSTM)                 (None, 100)               40800     
                                                                 
 dense (Dense)               (None, 1)                 101       
                                                                 
Total params: 40901 (159.77 KB)
Trainable params: 40901 (159.77 KB)
Non-trainable params: 0 (0.00 Byte)
_________________________________________________________________
