In [1]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
%matplotlib inline

from sklearn.model_selection import train_test_split
from sklearn.preprocessing import MinMaxScaler
from keras.models import Sequential
from keras.layers import LSTM, Dense, Dropout

from sklearn.metrics import mean_squared_error, mean_absolute_error, r2_score

from keras.utils import plot_model
from keras import backend as K


2023-08-08 09:40:21.367788: I tensorflow/core/platform/cpu_feature_guard.cc:182] This TensorFlow binary is optimized to use available CPU instructions in performance-critical operations.
To enable the following instructions: AVX2 AVX512F FMA, in other operations, rebuild TensorFlow with the appropriate compiler flags.


In [2]:
# Load your dataset here (assuming it's in CSV format, adjust accordingly)
df = pd.read_csv('data_interpolated.csv')
df = df.drop(columns=['Sno', 'day'])

# Convert 'timestamp' column to pandas datetime format
df['Date'] = pd.to_datetime(df['Date'])
# Extract year, month, and day into separate columns
df['year'] = df['Date'].dt.year
df['month'] = df['Date'].dt.month
df['day'] = df['Date'].dt.day
df.head()


Unnamed: 0,Date,Load,GDP Growth,per capita GDP us dollar,Access to electricty(% population),min temp c,meantemp c,max temp c,precipitation(mm),No of consumer,Total Availiability (GWH),year,month,day
0,2012-04-13,900.8,4.670142,794.092559,75.21,8.17,15.41,22.69,50.32,2324414,4178,2012,4,13
1,2012-04-14,889.0,4.670142,794.092559,75.21,8.17,15.41,22.69,50.32,2324414,4178,2012,4,14
2,2012-04-15,902.6,4.670142,794.092559,75.21,8.17,15.41,22.69,50.32,2324414,4178,2012,4,15
3,2012-04-16,683.0,4.670142,794.092559,75.21,8.17,15.41,22.69,50.32,2324414,4178,2012,4,16
4,2012-04-17,816.2,4.670142,794.092559,75.21,8.17,15.41,22.69,50.32,2324414,4178,2012,4,17


In [3]:
# Assuming you have a 'timestamp' column and a 'load' column in your dataset
timestamps = df['Date'].values
load_values = df['Load'].values

# Normalize the data between 0 and 1 (LSTM models perform better with scaled data)
scaler = MinMaxScaler(feature_range=(0, 1))
scaled_load_values = scaler.fit_transform(load_values.reshape(-1, 1))


In [4]:
scaled_load_values.shape

(3933, 1)

In [5]:
scaled_load_values

array([[0.3026314 ],
       [0.29441264],
       [0.3038851 ],
       ...,
       [0.93937621],
       [0.85649212],
       [0.90106844]])

In [6]:
print("Shape of scaled load values: ", scaled_load_values.shape)

Shape of scaled load values:  (3933, 1)


In [7]:
# Define the number of time steps (lookback) to consider for each prediction
lookback = 30  # For example, use the last 24 hours to predict the next hour's load

X, y = [], []
for i in range(lookback, len(scaled_load_values)):
    X.append(scaled_load_values[i - lookback:i, 0])
    y.append(scaled_load_values[i, 0])

X, y = np.array(X), np.array(y)

# Reshape the input data to match LSTM input shape (samples, time steps, features)
X = np.reshape(X, (X.shape[0], X.shape[1], 1))


In [8]:
print(X.shape)
print(y.shape)

(3903, 30, 1)
(3903,)


In [9]:
len(X[0])

30

# Train test split

In [10]:
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

In [11]:
print("Shape of X_train: ", X_train.shape)
print("Shape of X_test: ", X_test.shape)
print("Shape of y_train: ", y_train.shape)
print("Shape of y_test: ", y_test.shape)


Shape of X_train:  (3122, 30, 1)
Shape of X_test:  (781, 30, 1)
Shape of y_train:  (3122,)
Shape of y_test:  (781,)


In [None]:
# Build the LSTM Model
model = Sequential()
model.add(LSTM(50, return_sequences=True, input_shape=(X.shape[1], 1)))
model.add(Dropout(0.2))  # Add a dropout layer with a dropout rate of 0.2
model.add(LSTM(50, return_sequences=True))
model.add(Dropout(0.2))  # Add a dropout layer with a dropout rate of 0.2
model.add(LSTM(50))
model.add(Dropout(0.2))  # Add a dropout layer with a dropout rate of 0.2
model.add(Dense(1))
model.compile(loss='mean_squared_error', optimizer='adam')

# model.summary()
# plot_model(model, to_file='LSTM_Model_Load_Forecating.png', show_shapes = True)


In [None]:
# Training parameters
epochs = 100
batch_size = 32

history = model.fit(X_train, 
                    y_train, 
                    epochs=epochs, 
                    batch_size=batch_size, 
                    validation_split = 0.2 )

In [None]:
model.save("LSTM_LoadForecasting_100_Epoch.h5")

In [None]:
def plot_train_validation_graph(history, file_name):
    # acc = history.history['accuracy']
    # val_acc = history.history['val_accuracy']

    loss = history.history['loss']
    val_loss = history.history['val_loss']

    epochs_range = range(len(loss))

    plt.figure(figsize=(6, 6), dpi=100)
    # plt.subplot(2, 1, 1)
    # plt.plot(epochs_range, acc, label='Training Accuracy', color='g')
    # plt.plot(epochs_range, val_acc, label='Validation Accuracy', color='r')
    # plt.xlabel('Epoches')
    # plt.ylabel('Score')
    # plt.legend(loc='lower right')
    # plt.title('Training and Validation Accuracy')

    # plt.subplot(2, 1, 2)
    plt.plot(epochs_range, loss, label='Training Loss')
    plt.plot(epochs_range, val_loss, label='Validation Loss')
    plt.legend(loc='upper right')
    plt.xlabel('Epoches')
    plt.ylabel('MSE')
    plt.title('Training Vs Validation Loss')
    plt.savefig(file_name + ".png")
    plt.tight_layout(pad=1.0)
    plt.grid(True)
    plt.legend()
    plt.show()

In [None]:
plot_train_validation_graph(history, "Training_vs_Validation_curve_LSTM_100_epoch")

# Prediction

In [13]:
# Predict the load values
predicted_scaled_load_values = model.predict(X_test)

# Inverse transform to get actual load values
predicted_load_values = scaler.inverse_transform(predicted_scaled_load_values)
actual_load_values = scaler.inverse_transform(y_test.reshape(-1, 1))

2023-08-03 06:35:02.843465: I tensorflow/core/common_runtime/executor.cc:1197] [/device:CPU:0] (DEBUG INFO) Executor start aborting (this does not indicate an error and you can ignore this message): INVALID_ARGUMENT: You must feed a value for placeholder tensor 'gradients/split_2_grad/concat/split_2/split_dim' with dtype int32
	 [[{{node gradients/split_2_grad/concat/split_2/split_dim}}]]
2023-08-03 06:35:02.846072: I tensorflow/core/common_runtime/executor.cc:1197] [/device:CPU:0] (DEBUG INFO) Executor start aborting (this does not indicate an error and you can ignore this message): INVALID_ARGUMENT: You must feed a value for placeholder tensor 'gradients/split_grad/concat/split/split_dim' with dtype int32
	 [[{{node gradients/split_grad/concat/split/split_dim}}]]
2023-08-03 06:35:02.847826: I tensorflow/core/common_runtime/executor.cc:1197] [/device:CPU:0] (DEBUG INFO) Executor start aborting (this does not indicate an error and you can ignore this message): INVALID_ARGUMENT: You mus



In [18]:
# Calculate evaluation metrics
mse = mean_squared_error(actual_load_values, predicted_load_values)
rmse = np.sqrt(mse)
mae = mean_absolute_error(actual_load_values, predicted_load_values)
r2 = r2_score(actual_load_values, predicted_load_values)

print("Mean Squared Error:", mse)
print("Root Mean Squared Error:", rmse)
print("Mean Absolute Error:", mae)
print("R-squared Score:", r2)

Mean Squared Error: 18119.768624752036
Root Mean Squared Error: 134.60968993631934
Mean Absolute Error: 62.04809349195651
R-squared Score: 0.7459582198734662


# Explanation

In [19]:
from sklearn.metrics import mean_squared_error

# Calculate the Mean Squared Error (MSE) between original and reconstructed images.
mse = mean_squared_error( actual_load_values, predicted_load_values)
print(f"Mean Squared Error (MSE): {mse:.4f}")

Mean Squared Error (MSE): 18119.7686


### R-squared (R2) Score:
R2 score represents the proportion of variance in the dependent variable (load) that is predictable from the independent variable (predictions). It ranges from 0 to 1, where 1 indicates a perfect fit.

In [20]:
from sklearn.metrics import r2_score

r2 = r2_score( actual_load_values, predicted_load_values)
print("R-squared Score:", r2)


R-squared Score: 0.7459582198734662


### Explained Variance Score:
Explained Variance Score measures the proportion of variance in the dependent variable (load) explained by the model's predictions.

In [21]:
from sklearn.metrics import explained_variance_score

explained_var = explained_variance_score( actual_load_values, predicted_load_values)
print("Explained Variance Score:", explained_var)


Explained Variance Score: 0.7618843003652178


### Mean Absolute Error (MAE):
MAE calculates the average absolute difference between the predicted and actual values. It is less sensitive to outliers compared to MSE.

In [22]:
from sklearn.metrics import mean_absolute_error

mae = mean_absolute_error( actual_load_values, predicted_load_values)
print("Mean Absolute Error:", mae)


Mean Absolute Error: 62.04809349195651


### Root Mean Squared Error (RMSE):
RMSE is the square root of MSE and gives an error metric in the same unit as the original data.

In [23]:
from sklearn.metrics import mean_squared_error

rmse = np.sqrt(mean_squared_error(actual_load_values, predicted_load_values))
print("Root Mean Squared Error:", rmse)


Root Mean Squared Error: 134.60968993631934


In [24]:
actual_load_values.shape

(799, 1)