# Energy Forecasting with ARIMA / SARIMAX
This notebook loads a dataframe with energy usage data and performs ARIMA and SARIMAX forecasting.

In [None]:
import pandas as pd
import matplotlib.pyplot as plt
from statsmodels.tsa.arima.model import ARIMA
from statsmodels.tsa.statespace.sarimax import SARIMAX
from pmdarima import auto_arima

df = pd.read_csv('../../data/steel_industry_data.csv')

df['date'] = pd.to_datetime(df['date'], dayfirst=True)

df = df.set_index('date')
y = df['Usage_kWh']

exog_cols = [
    'Lagging_Current_Reactive.Power_kVarh',
    'Leading_Current_Reactive_Power_kVarh',
    'CO2(tCO2)',
    'Lagging_Current_Power_Factor',
    'Leading_Current_Power_Factor',
    'NSM'
]
exog = df[exog_cols]

train_size = int(len(df) * 0.8)
y_train = y[:train_size]
y_test = y[train_size:]
exog_train = exog[:train_size]
exog_test = exog[train_size:]

In [None]:
# Auto ARIMA
auto_model = auto_arima(y_train, exogenous=exog_train, seasonal=False, trace=True)
auto_model

In [None]:
# Fit SARIMAX
order = auto_model.order
model = SARIMAX(y_train, exog=exog_train, order=order)
model_fit = model.fit()
model_fit.summary()

In [None]:
# Forecast
forecast = model_fit.predict(start=len(y_train), end=len(df)-1, exog=exog_test)

plt.figure(figsize=(12,5))
plt.plot(y_test.index, y_test, label='Actual')
plt.plot(y_test.index, forecast, label='Forecast')
plt.legend()
plt.title('SARIMAX Forecast')
plt.show()