In [1]:
!pip install statsmodels



In [2]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
from statsmodels.tsa.stattools import adfuller
from statsmodels.graphics.tsaplots import plot_acf, plot_pacf
from statsmodels.tsa.arima.model import ARIMA
from statsmodels.tsa.statespace.sarimax import SARIMAX
from sklearn.preprocessing import MinMaxScaler
from sklearn.linear_model import LinearRegression

In [3]:
gas_data = pd.DataFrame({
    'Month': ['Jan', 'Feb', 'Mar', 'Apr', 'May', 'Jun', 'Jul', 'Aug', 'Sep', 'Oct', 'Nov', 'Dec',
             'Jan', 'Feb', 'Mar', 'Apr', 'May', 'Jun', 'Jul', 'Aug', 'Sep', 'Oct', 'Nov', 'Dec'],
    'Gas_Use': [244, 228, 153, 140, 55, 34, 30, 28, 29, 41, 88, 199,
               230, 245, 247, 135, 34, 33, 27, 26, 28, 39, 86, 188],
})

electricity_data = pd.DataFrame({
    'Month': ['Jan', 'Feb', 'Mar', 'Apr', 'May', 'Jun', 'Jul', 'Aug', 'Sep', 'Oct', 'Nov', 'Dec',
             'Jan', 'Feb', 'Mar', 'Apr', 'May', 'Jun', 'Jul', 'Aug', 'Sep', 'Oct', 'Nov', 'Dec'],
    'Electric_Use': [967, 795, 820, 672, 722, 820, 1326, 1262, 1126, 814, 821, 918,
                     950, 878, 785, 690, 794, 802, 1445, 1357, 1268, 889, 830, 935],
})

month_number = {'Jan': 1, 'Feb': 2, 'Mar': 3, 'Apr': 4, 'May': 5, 'Jun': 6,
                 'Jul': 7, 'Aug': 8, 'Sep': 9, 'Oct': 10, 'Nov': 11, 'Dec': 12}

gas_data['Month'] = gas_data['Month'].map(month_number)
electricity_data['Month'] = electricity_data['Month'].map(month_number)

In [4]:
gas_use_col = gas_data[['Gas_Use']]
gas_use_col = gas_use_col.reset_index(drop=True)

In [5]:
electric_use_col = electricity_data[['Electric_Use']]
electric_use_col = electric_use_col.reset_index(drop=True)

Áp dụng ARIMA vào Gas

In [6]:
train_size = int(0.8 * len(gas_use_col))
test_size = int(0.2 * len(gas_use_col))

In [7]:
train_data = gas_use_col[:train_size]
test_data = gas_use_col[train_size:train_size+test_size]

In [8]:
!pip install pmdarima



In [9]:
# Quá trình Training
x_train = np.array(train_data.index).reshape(-1, 1)
y_train = np.array(train_data['Gas_Use'])
# Find the best ARIMA model using auto_arima
from pmdarima.arima import auto_arima
model = auto_arima(y_train, trace=True, error_action='ignore', suppress_warnings=True, seasonal = False)
# Fit the model
model.fit(y_train)

Performing stepwise search to minimize aic
 ARIMA(2,0,2)(0,0,0)[0]             : AIC=211.057, Time=0.03 sec
 ARIMA(0,0,0)(0,0,0)[0]             : AIC=245.315, Time=0.00 sec
 ARIMA(1,0,0)(0,0,0)[0]             : AIC=211.197, Time=0.01 sec
 ARIMA(0,0,1)(0,0,0)[0]             : AIC=inf, Time=0.01 sec
 ARIMA(1,0,2)(0,0,0)[0]             : AIC=209.869, Time=0.02 sec
 ARIMA(0,0,2)(0,0,0)[0]             : AIC=inf, Time=0.02 sec
 ARIMA(1,0,1)(0,0,0)[0]             : AIC=208.239, Time=0.02 sec
 ARIMA(2,0,1)(0,0,0)[0]             : AIC=209.074, Time=0.02 sec
 ARIMA(2,0,0)(0,0,0)[0]             : AIC=207.082, Time=0.01 sec
 ARIMA(3,0,0)(0,0,0)[0]             : AIC=209.073, Time=0.02 sec
 ARIMA(3,0,1)(0,0,0)[0]             : AIC=211.066, Time=0.05 sec
 ARIMA(2,0,0)(0,0,0)[0] intercept   : AIC=203.401, Time=0.03 sec
 ARIMA(1,0,0)(0,0,0)[0] intercept   : AIC=211.126, Time=0.01 sec
 ARIMA(3,0,0)(0,0,0)[0] intercept   : AIC=203.064, Time=0.04 sec
 ARIMA(4,0,0)(0,0,0)[0] intercept   : AIC=201.627, Time

In [10]:
x_test = np.array(test_data.index).reshape(-1, 1)
y_test = np.array(test_data['Gas_Use'])
y_pred = model.predict(n_periods=len(y_test))

Đánh giá độ chính xác thuật toán

In [12]:
test_rmse = np.sqrt(np.mean((y_pred - y_test)**2))
print('Testing RMSE:', test_rmse)
test_mape = np.mean(np.abs((y_test - y_pred) / y_test)) * 100
print('Testing MAPE:', test_mape)

Testing RMSE: 34.85268219182798
Testing MAPE: 66.14098924646645


Áp dụng ARIMA vào Electricity

In [13]:
train_size = int(0.8 * len(electric_use_col))
test_size = int(0.2 * len(electric_use_col))
train_data = electric_use_col[:train_size]
test_data = electric_use_col[train_size:train_size+test_size]

In [14]:
!pip install pmdarima



In [15]:
# Quá trình Training
x_train = np.array(train_data.index).reshape(-1, 1)
y_train = np.array(train_data['Electric_Use'])
# Find the best ARIMA model using auto_arima
from pmdarima.arima import auto_arima
model = auto_arima(y_train, trace=True, error_action='ignore', suppress_warnings=True, seasonal = False)
# Fit the model
model.fit(y_train)

Performing stepwise search to minimize aic
 ARIMA(2,0,2)(0,0,0)[0]             : AIC=inf, Time=0.10 sec
 ARIMA(0,0,0)(0,0,0)[0]             : AIC=316.088, Time=0.00 sec
 ARIMA(1,0,0)(0,0,0)[0]             : AIC=266.639, Time=0.01 sec
 ARIMA(0,0,1)(0,0,0)[0]             : AIC=301.149, Time=0.02 sec
 ARIMA(2,0,0)(0,0,0)[0]             : AIC=268.497, Time=0.03 sec
 ARIMA(1,0,1)(0,0,0)[0]             : AIC=268.512, Time=0.03 sec
 ARIMA(2,0,1)(0,0,0)[0]             : AIC=inf, Time=0.04 sec
 ARIMA(1,0,0)(0,0,0)[0] intercept   : AIC=260.777, Time=0.02 sec
 ARIMA(0,0,0)(0,0,0)[0] intercept   : AIC=261.665, Time=0.00 sec
 ARIMA(2,0,0)(0,0,0)[0] intercept   : AIC=260.300, Time=0.03 sec
 ARIMA(3,0,0)(0,0,0)[0] intercept   : AIC=258.673, Time=0.05 sec
 ARIMA(4,0,0)(0,0,0)[0] intercept   : AIC=260.274, Time=0.06 sec
 ARIMA(3,0,1)(0,0,0)[0] intercept   : AIC=260.133, Time=0.09 sec
 ARIMA(2,0,1)(0,0,0)[0] intercept   : AIC=264.588, Time=0.09 sec
 ARIMA(4,0,1)(0,0,0)[0] intercept   : AIC=262.158, Time

In [16]:
x_test = np.array(test_data.index).reshape(-1, 1)
y_test = np.array(test_data['Electric_Use'])
y_pred = model.predict(n_periods=len(y_test))

Đánh giá độ chính xác thuật toán


In [17]:
test_rmse = np.sqrt(np.mean((y_pred - y_test)**2))
print('Testing RMSE:', test_rmse)
test_mape = np.mean(np.abs((y_test - y_pred) / y_test)) * 100
print('Testing MAPE:', test_mape)

Testing RMSE: 151.58784149413765
Testing MAPE: 14.71469503628823
