In [51]:
import pandas as pd
import numpy as np
from pandas import read_csv
from pmdarima.arima import auto_arima
from datetime import datetime
import matplotlib.pyplot as plt
import tsfresh
import joblib
from pmdarima.arima import ADFTest
from tsfresh import extract_features
from statsmodels.tsa.arima.model import ARIMA
from sklearn.metrics import mean_absolute_error, mean_squared_error, mean_absolute_percentage_error

In [52]:
values = read_csv("../../Time_series_data.csv")

In [53]:
values.head(13)

Unnamed: 0,Month,Sales
0,2013-01,2815
1,2013-02,2672
2,2013-03,2755
3,2013-04,2721
4,2013-05,2946
5,2013-06,3036
6,2013-07,2282
7,2013-08,2212
8,2013-09,2922
9,2013-10,4301


In [54]:

values = values.rename(columns={values.columns[1]: 'Data'})
values = values.rename(columns={values.columns[0]: 'Time'})
values['Time'] = pd.to_datetime(
    values['Time'], errors='coerce')

In [55]:
freq = pd.infer_freq(values["Time"])

In [56]:
# end_date = pd.to_datetime(x) + pd.DateOffset(months=10)
# print(end_date)

In [57]:
values.set_index('Time', inplace=True)

In [58]:
from sklearn.model_selection import train_test_split
train, test = train_test_split(
    values, test_size=0.2, shuffle=False)

rnn_train = train
rnn_test = test

In [59]:
test

Unnamed: 0_level_0,Data
Time,Unnamed: 1_level_1
2020-01-01,3934
2020-02-01,3162
2020-03-01,4286
2020-04-01,4676
2020-05-01,5010
2020-06-01,4874
2020-07-01,4633
2020-08-01,1659
2020-09-01,5951
2020-10-01,6981


In [60]:
from sklearn.preprocessing import MinMaxScaler
scaler = MinMaxScaler()

scaler.fit(rnn_train)
scaled_train = scaler.transform(rnn_train)
scaled_test = scaler.transform(rnn_test)

In [None]:
from keras.preprocessing.sequence import TimeseriesGenerator

# define generator
n_input = 12
n_features = 1
generator = TimeseriesGenerator(
    scaled_train, scaled_train, length=n_input, batch_size=1)

from keras.models import Sequential
from keras.layers import Dense
from keras.layers import LSTM

In [None]:
# define model
model = Sequential()
model.add(LSTM(100, activation='relu',
            input_shape=(n_input, n_features)))
model.add(Dense(1))
model.compile(optimizer='adam', loss='mse')

model.summary()
model.fit(generator, epochs=50)

In [None]:
last_train_batch = scaled_train[-12:]
last_train_batch = last_train_batch.reshape((1, n_input, n_features))


In [None]:
last_train_batch

In [None]:
model.predict(last_train_batch)

test_predictions = []

first_eval_batch = scaled_train[-n_input:]
current_batch = first_eval_batch.reshape((1, n_input, n_features))

In [None]:
model.summary()

In [None]:
for i in range(len(rnn_test)):
    # get the prediction value for the first batch
    current_pred = model.predict(current_batch)[0]

    # append the prediction into the array
    test_predictions.append(current_pred)
    
    # use the prediction to update the batch and remove the first value
    current_batch = np.append(current_batch[:, 1:, :], [
                                [current_pred]], axis=1)

In [None]:
current_batch

In [None]:
true_predictions = scaler.inverse_transform(test_predictions)
rnn_test['Predictions'] = true_predictions
print(true_predictions)

In [None]:
forecast_errors = [rnn_test['Data'][i]-rnn_test['Predictions'][i] for i in range(len(rnn_test))]

In [None]:
forecast_errors

In [None]:
mean_forecast_error = np.mean(forecast_errors)

In [None]:
mae = mean_absolute_error(rnn_test['Data'], rnn_test['Predictions'])
mae

In [None]:
mse = mean_squared_error(rnn_test['Data'], rnn_test['Predictions'])
mse

In [None]:
mape = mean_absolute_percentage_error(rnn_test['Data'], rnn_test['Predictions'])
mape

In [None]:
model_folder = "Models"
model_filename = "RNN.pkl"
model_path = f"{model_folder}/{model_filename}"

In [None]:
# Tạo thư mục nếu chưa tồn tại
import os
os.makedirs(model_folder, exist_ok=True)

In [None]:
# Lưu mô hình
joblib.dump(model, model_path)

In [None]:
loaded_model = joblib.load(model_path)

In [None]:
loaded_model.summary()

In [1]:
import pandas as pd

# Tạo một DataFrame chứa dữ liệu doanh số bán hàng
data = {'Year': [2000, 2001, 2002, 2003, 2004, 2005],
        'Sales': [4, 7, 4, 9, 7, 10]}

df = pd.DataFrame(data)

# Chọn cửa sổ trượt là 3
window_size = 3
print(type(df))
# Tính giá trị trung bình trượt
df['Moving_Average'] = df['Sales'].rolling(window=window_size).mean()

# Số lượng giá trị tiếp theo bạn muốn dự đoán
n_predictions = 5

# Dự đoán nhiều giá trị tiếp theo
for i in range(n_predictions):
    # Tính giá trị trung bình trượt cho tập dữ liệu mở rộng
    new_average = df['Sales'].iloc[-window_size:].mean()
    
    # Thêm giá trị dự đoán vào DataFrame
    df = df.append({'Year': df['Year'].iloc[-1] + 1, 'Sales': None, 'Moving_Average': new_average}, ignore_index=True)

# In kết quả dự đoán
print("Dự đoán nhiều giá trị tiếp theo:")
print(df.tail(n_predictions))

<class 'pandas.core.frame.DataFrame'>
Dự đoán nhiều giá trị tiếp theo:
      Year  Sales  Moving_Average
6   2006.0    NaN        8.666667
7   2007.0    NaN        8.500000
8   2008.0    NaN       10.000000
9   2009.0    NaN             NaN
10  2010.0    NaN             NaN
