# Hybrid (ARIMA + LSTM)

In [1]:
import os
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
from datetime import datetime
from sklearn.metrics import mean_squared_error, mean_absolute_error
from sklearn.preprocessing import MinMaxScaler
from statsmodels.tsa.arima.model import ARIMA
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import LSTM, Dense
from tensorflow.keras.optimizers import Adam



In [2]:
# Load dataset
df = pd.read_csv("data/interpolated_co2.csv")
df['Date'] = pd.to_datetime(df['Date'])
df.set_index('Date', inplace=True)

In [3]:
# Split into train and test
train_end = datetime(2024, 12, 31)
test_start = datetime(2025, 1, 1)
test_end = datetime(2025, 4, 13)

In [4]:
train_df = df[df.index <= train_end]
test_df = df[(df.index >= test_start) & (df.index <= test_end)]

In [5]:
# Fit ARIMA model
arima_model = ARIMA(train_df['CO2'], order=(5,1,0))
arima_result = arima_model.fit()

  self._init_dates(dates, freq)
  self._init_dates(dates, freq)
  self._init_dates(dates, freq)


In [6]:
# ARIMA forecast
arima_forecast = arima_result.forecast(steps=len(test_df))
arima_forecast.index = test_df.index

In [7]:
# Residuals
residuals = train_df['CO2'].iloc[1:] - arima_result.fittedvalues

In [8]:
# Normalize residuals
residuals = residuals.dropna()
scaler = MinMaxScaler(feature_range=(-1, 1))
residuals_scaled = scaler.fit_transform(residuals.values.reshape(-1, 1))

In [9]:
# Create sequences
def create_sequences(data, seq_length):
    X, y = [], []
    for i in range(len(data) - seq_length):
        X.append(data[i:i+seq_length])
        y.append(data[i+seq_length])
    return np.array(X), np.array(y)

In [10]:
seq_len = 30
X, y = create_sequences(residuals_scaled, seq_len)
X = X.reshape((X.shape[0], X.shape[1], 1))

In [11]:
# Build LSTM
model = Sequential()
model.add(LSTM(50, activation='tanh', input_shape=(seq_len, 1)))
model.add(Dense(1))
model.compile(optimizer=Adam(0.01), loss='mse')
model.fit(X, y, epochs=20, batch_size=32, verbose=1)

Epoch 1/20


  super().__init__(**kwargs)


[1m577/577[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 3ms/step - loss: 0.0151
Epoch 2/20
[1m577/577[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 3ms/step - loss: 0.0145
Epoch 3/20
[1m577/577[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 2ms/step - loss: 0.0145
Epoch 4/20
[1m577/577[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 3ms/step - loss: 0.0148
Epoch 5/20
[1m577/577[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 3ms/step - loss: 0.0154
Epoch 6/20
[1m577/577[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 3ms/step - loss: 0.0152
Epoch 7/20
[1m577/577[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 3ms/step - loss: 0.0149
Epoch 8/20
[1m577/577[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 3ms/step - loss: 0.0146
Epoch 9/20
[1m577/577[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 3ms/step - loss: 0.0151
Epoch 10/20
[1m577/577[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 3ms/step - loss: 0.0149


<keras.src.callbacks.history.History at 0x17140ff70>

In [12]:
# Predict residuals for test period
test_res_inputs = residuals_scaled[-(seq_len + len(test_df)):]
test_X = []
for i in range(len(test_df)):
    test_X.append(test_res_inputs[i:i+seq_len])
test_X = np.array(test_X).reshape((len(test_X), seq_len, 1))
lstm_residuals_scaled = model.predict(test_X)
lstm_residuals = scaler.inverse_transform(lstm_residuals_scaled).flatten()

[1m4/4[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 21ms/step


In [13]:
# Hybrid forecast
hybrid_forecast = arima_forecast.values + lstm_residuals

In [14]:
# Evaluation
actual = test_df['CO2'].values
rmse = np.sqrt(mean_squared_error(actual, hybrid_forecast))
mae = mean_absolute_error(actual, hybrid_forecast)
accuracy = 100 - (np.mean(np.abs(actual - hybrid_forecast) / actual) * 100)

In [15]:
print(f"RMSE: {rmse:.4f}")
print(f"MAE: {mae:.4f}")
print(f"Accuracy: {accuracy:.2f}%")

RMSE: 1.5734
MAE: 1.1817
Accuracy: 99.72%


In [18]:
# Save predictions
pred_df = pd.DataFrame({
    'Date': test_df.index,
    'Actual': actual,
    'Predicted': hybrid_forecast  # HYBRID prediction
})
os.makedirs("Predictions", exist_ok=True)
pred_df.to_csv("Predictions/hybrid_predictions.csv", index=False)