In [1]:
!pip install statsmodels



In [2]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
from statsmodels.tsa.stattools import adfuller
from statsmodels.graphics.tsaplots import plot_acf, plot_pacf
from statsmodels.tsa.arima.model import ARIMA
from statsmodels.tsa.statespace.sarimax import SARIMAX
from sklearn.preprocessing import MinMaxScaler
from sklearn.linear_model import LinearRegression

In [3]:
gas_data = pd.DataFrame({
    'Month': ['Jan', 'Feb', 'Mar', 'Apr', 'May', 'Jun', 'Jul', 'Aug', 'Sep', 'Oct', 'Nov', 'Dec',
             'Jan', 'Feb', 'Mar', 'Apr', 'May', 'Jun', 'Jul', 'Aug', 'Sep', 'Oct', 'Nov', 'Dec'],
    'Gas_Use': [244, 228, 153, 140, 55, 34, 30, 28, 29, 41, 88, 199,
               230, 245, 247, 135, 34, 33, 27, 26, 28, 39, 86, 188],
})

electricity_data = pd.DataFrame({
    'Month': ['Jan', 'Feb', 'Mar', 'Apr', 'May', 'Jun', 'Jul', 'Aug', 'Sep', 'Oct', 'Nov', 'Dec',
             'Jan', 'Feb', 'Mar', 'Apr', 'May', 'Jun', 'Jul', 'Aug', 'Sep', 'Oct', 'Nov', 'Dec'],
    'Electric_Use': [967, 795, 820, 672, 722, 820, 1326, 1262, 1126, 814, 821, 918,
                     950, 878, 785, 690, 794, 802, 1445, 1357, 1268, 889, 830, 935],
})

month_number = {'Jan': 1, 'Feb': 2, 'Mar': 3, 'Apr': 4, 'May': 5, 'Jun': 6,
                 'Jul': 7, 'Aug': 8, 'Sep': 9, 'Oct': 10, 'Nov': 11, 'Dec': 12}

gas_data['Month'] = gas_data['Month'].map(month_number)
electricity_data['Month'] = electricity_data['Month'].map(month_number)

In [4]:
gas_use_col = gas_data[['Gas_Use']]
gas_use_col = gas_use_col.reset_index(drop=True)

In [5]:
electric_use_col = electricity_data[['Electric_Use']]
electric_use_col = electric_use_col.reset_index(drop=True)

Áp dụng ARIMA vào Gas

In [6]:
train_size = int(0.8 * len(gas_use_col))
test_size = int(0.8 * len(gas_use_col))

In [7]:
train_data = gas_use_col[:train_size]
test_data = gas_use_col[train_size:train_size+test_size]

In [8]:
!pip install pmdarima



In [9]:
# Quá trình Training
model = SARIMAX(train_data.Gas_Use, order=(0, 2, 0), seasonal_order=(0, 2, 0, 12))
results = model.fit()

In [10]:
y_test = np.array(test_data['Gas_Use'])
y_pred = results.forecast(steps=len(y_test))

Đánh giá độ chính xác thuật toán

In [11]:
test_rmse = np.sqrt(np.mean((y_pred - y_test)**2))
print('Testing RMSE:', test_rmse)
test_mape = np.mean(np.abs((y_test - y_pred) / y_test)) * 100
print('Testing MAPE:', test_mape)

Testing RMSE: 43.29145412203144
Testing MAPE: 21.98392157075964


Áp dụng ARIMA vào Electricity

In [12]:
train_size = int(0.8 * len(electric_use_col))
test_size = int(0.8 * len(electric_use_col))
train_data = electric_use_col[:train_size]
test_data = electric_use_col[train_size:train_size+test_size]

In [13]:
# Quá trình Training
model = ARIMA(train_data.Electric_Use, order=(1, 0, 1))
fitted_model = model.fit()

In [14]:
y_test = np.array(test_data['Electric_Use'])
y_pred = fitted_model.forecast(steps=len(y_test))

Đánh giá độ chính xác thuật toán


In [15]:
test_rmse = np.sqrt(np.mean((y_pred - y_test)**2))
print('Testing RMSE:', test_rmse)
test_mape = np.mean(np.abs((y_test - y_pred) / y_test)) * 100
print('Testing MAPE:', test_mape)

Testing RMSE: 134.6542498435286
Testing MAPE: 10.291792845839312
