In [1]:
import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split
from sklearn.ensemble import RandomForestRegressor
from sklearn.metrics import mean_squared_error
from statsmodels.tsa.arima.model import ARIMA
from statsmodels.tsa.statespace.sarimax import SARIMAX
import tensorflow as tf
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import LSTM, Dense
from sklearn.preprocessing import MinMaxScaler

# =============================================================================
# 1. Data Loading and Preprocessing
# =============================================================================

In [26]:
# Load dataset (ensure that your CSV file is in your working directory)
data = pd.read_excel(r"C:\Users\vanim\Downloads\All_Datasets\final_data.xlsx", parse_dates=['Date'])

In [27]:
# Sort data by date (important for time series models)
data.sort_values('Date', inplace=True)

In [61]:
# (Optional) Print columns to verify
print("Columns:", data.columns.tolist())

Columns: ['Date', 'Day', 'Holiday', 'Temp', 'Rain', 'Inflation', 'MaxT', 'MinT', 'WindSpeed', 'Humidity', 'Precipitation', 'Energy Required (MU)']


In [62]:
# Set the Date column as index for time series models
data.set_index('Date', inplace=True)

In [63]:
# For non–time-series models we will also use a train/test split (using a sequential split so that the test set is the most recent 20% of data)
split_index = int(len(data) * 0.8)
train = data.iloc[:split_index].copy()
test = data.iloc[split_index:].copy()

In [64]:
# For ARIMA/SARIMA we only need the target series
train_energy = train['Energy Required (MU)']
test_energy = test['Energy Required (MU)']

# =============================================================================
# 2. ARIMA Model (Univariate Time Series)
# =============================================================================


In [65]:
# Here we fit an ARIMA model on the training energy demand.
# The order (5,1,0) is an example; in practice, you may use model selection to choose the best order.
model_arima = ARIMA(train_energy, order=(5, 1, 0))
model_arima_fit = model_arima.fit()

  self._init_dates(dates, freq)
  self._init_dates(dates, freq)
  self._init_dates(dates, freq)


In [66]:
# Forecast for the length of the test period
forecast_arima = model_arima_fit.forecast(steps=len(test_energy))
print("ARIMA Forecast:")
print(forecast_arima.head())

ARIMA Forecast:
2304    170.755485
2305    173.826942
2306    177.443722
2307    179.851936
2308    181.329217
Name: predicted_mean, dtype: float64


  return get_prediction_index(
  return get_prediction_index(


# =============================================================================
# 3. SARIMA Model (Seasonal ARIMA)
# =============================================================================

In [67]:
# SARIMA adds seasonal components. Here we use an example seasonal order of (1,1,1,7)
model_sarima = SARIMAX(train_energy, order=(1, 1, 1), seasonal_order=(1, 1, 1, 7))
model_sarima_fit = model_sarima.fit(disp=False)
forecast_sarima = model_sarima_fit.forecast(steps=len(test_energy))
print("\nSARIMA Forecast:")
print(forecast_sarima.head())

  self._init_dates(dates, freq)
  self._init_dates(dates, freq)



SARIMA Forecast:
2304    169.999749
2305    169.433430
2306    166.988072
2307    169.938304
2308    170.959230
Name: predicted_mean, dtype: float64


  return get_prediction_index(
  return get_prediction_index(


# =============================================================================
# 4. Random Forest Regression
# =============================================================================


In [50]:
# For Random Forest we use the exogenous features.
feature_cols = ['Day', 'Humidity', 'Holiday','Inflation', 'Rain', 'Temp', 'MaxT', 'MinT', 'WindSpeed', 'Precipitation']
X_train = train[feature_cols]
y_train = train['Energy Required (MU)']
X_test = test[feature_cols]
y_test = test['Energy Required (MU)']


In [51]:
# Convert categorical feature 'Day' to dummy variables
X_train = pd.get_dummies(X_train, columns=['Day'], drop_first=True)
X_test = pd.get_dummies(X_test, columns=['Day'], drop_first=True)
# Align the columns in case one set has missing dummy columns
X_train, X_test = X_train.align(X_test, join='left', axis=1, fill_value=0)

In [52]:
rf = RandomForestRegressor(n_estimators=100, random_state=42)
rf.fit(X_train, y_train)
pred_rf = rf.predict(X_test)
rmse_rf = np.sqrt(mean_squared_error(y_test, pred_rf))
print(f"\nRandom Forest RMSE: {rmse_rf:.2f}")


Random Forest RMSE: 31.96


# =============================================================================
# 5. LSTM Model
# =============================================================================

In [53]:
# LSTM requires sequence data and scaled inputs. We will create sequences with a sliding window.
# First, scale features and the target.
scaler_X = MinMaxScaler()
scaler_y = MinMaxScaler()

In [54]:
X_train_scaled = scaler_X.fit_transform(X_train)
X_test_scaled = scaler_X.transform(X_test)
y_train_scaled = scaler_y.fit_transform(y_train.values.reshape(-1, 1))
y_test_scaled = scaler_y.transform(y_test.values.reshape(-1, 1))

In [55]:
# Define a helper function to create sequences from the time series data.
def create_sequences(X, y, window_size):
    Xs, ys = [], []
    for i in range(len(X) - window_size):
        Xs.append(X[i : i + window_size])
        ys.append(y[i + window_size])
    return np.array(Xs), np.array(ys)

In [56]:
# Define the sequence window (e.g., using the previous 7 days)
window_size = 7
X_train_seq, y_train_seq = create_sequences(X_train_scaled, y_train_scaled, window_size)
X_test_seq, y_test_seq = create_sequences(X_test_scaled, y_test_scaled, window_size)

In [57]:
# Build the LSTM model
lstm_model = Sequential([
    LSTM(50, activation='relu', input_shape=(window_size, X_train_seq.shape[2])),
    Dense(1)
])
lstm_model.compile(optimizer='adam', loss='mse')
lstm_model.summary()

  super().__init__(**kwargs)


In [58]:
# Train the model (adjust epochs and batch_size as needed)
lstm_model.fit(X_train_seq, y_train_seq, epochs=20, batch_size=32, verbose=1)

Epoch 1/20
[1m72/72[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m5s[0m 7ms/step - loss: 0.0348
Epoch 2/20
[1m72/72[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 7ms/step - loss: 0.0171
Epoch 3/20
[1m72/72[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 7ms/step - loss: 0.0149
Epoch 4/20
[1m72/72[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 7ms/step - loss: 0.0139
Epoch 5/20
[1m72/72[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 7ms/step - loss: 0.0132
Epoch 6/20
[1m72/72[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 7ms/step - loss: 0.0129
Epoch 7/20
[1m72/72[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 8ms/step - loss: 0.0118
Epoch 8/20
[1m72/72[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 7ms/step - loss: 0.0117
Epoch 9/20
[1m72/72[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 7ms/step - loss: 0.0107
Epoch 10/20
[1m72/72[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 6ms/step - loss: 0.0095
Epoch 11/

<keras.src.callbacks.history.History at 0x1d40fb43e90>

In [59]:
# Predict on the test set and invert the scaling
pred_lstm_scaled = lstm_model.predict(X_test_seq)
pred_lstm = scaler_y.inverse_transform(pred_lstm_scaled)
y_test_actual = scaler_y.inverse_transform(y_test_seq)
rmse_lstm = np.sqrt(mean_squared_error(y_test_actual, pred_lstm))
print(f"\nLSTM RMSE: {rmse_lstm:.2f}")

[1m18/18[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 28ms/step

LSTM RMSE: 30.80


In [68]:
from sklearn.metrics import mean_squared_error
import numpy as np

# Calculate RMSE for ARIMA and SARIMA forecasts
rmse_arima = np.sqrt(mean_squared_error(test_energy, forecast_arima))
rmse_sarima = np.sqrt(mean_squared_error(test_energy, forecast_sarima))

print(f"ARIMA RMSE: {rmse_arima:.2f}")
print(f"SARIMA RMSE: {rmse_sarima:.2f}")
print(f"Random Forest RMSE: {rmse_rf:.2f}")
print(f"LSTM RMSE: {rmse_lstm:.2f}")



ARIMA RMSE: 21.60
SARIMA RMSE: 28.62
Random Forest RMSE: 31.96
LSTM RMSE: 30.80
