# Imports

In [None]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
from sklearn.preprocessing import MinMaxScaler
from sklearn.metrics import mean_squared_error
import tensorflow as tf
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import LSTM, Dense

In [None]:
# prompt: set plotly as plotting backend for pd

import pandas as pd
pd.options.plotting.backend = "plotly"

# Data

In [None]:
url = "https://storage.googleapis.com/edulabs-public-datasets/AEP_hourly.csv.zip"

In [None]:
df = pd.read_csv(url, index_col='Datetime', parse_dates=True)

In [None]:
df

# Explore and visualize the data

In [None]:
df.plot()

In [None]:
df.sort_index().plot()

In [None]:
df = df.sort_index()

# Preprocess the Data

In [None]:
# a) Normalize the data
# LSTMs, like many neural networks, work best when input data is scaled.
# We'll use MinMaxScaler to scale the demand to a range of [0, 1].
scaler = MinMaxScaler()
scaled_demand = scaler.fit_transform(df)

In [None]:

# b) Create sequences using a sliding window
# We need to transform our flat time series into input sequences (X) and
# corresponding output labels (y).
def create_sequences(data, sequence_length):
    """
    Creates input/output sequences for the LSTM.
    """
    X, y = [], []
    for i in range(len(data) - sequence_length):
        # The input is a sequence of 'sequence_length' days
        X.append(data[i:(i + sequence_length), 0])
        # The output is the demand on the day immediately after the sequence
        y.append(data[i + sequence_length, 0])
    return np.array(X), np.array(y)


In [None]:
# Define the sequence length
SEQUENCE_LENGTH = 24

X, y = create_sequences(scaled_demand, SEQUENCE_LENGTH)

# Reshape X to be [samples, time_steps, features] which is required by LSTM layers
X = np.reshape(X, (X.shape[0], X.shape[1], 1))
print(f"Shape of X: {X.shape}")
print(f"Shape of y: {y.shape}")

In [None]:



# --- 5. Split Data into Training and Testing Sets ---
# We'll use the first 80% of the data for training and the last 20% for testing.
# It's crucial not to shuffle time-series data.
train_size = int(len(X) * 0.8)
X_train, X_test = X[:train_size], X[train_size:]
y_train, y_test = y[:train_size], y[train_size:]

print(f"Training samples: {len(X_train)}")
print(f"Testing samples: {len(X_test)}")

# Basic LSTM Model

In [None]:
# --- 6. Build the LSTM Model ---
# We'll create a simple LSTM model with one LSTM layer and one Dense output layer.
model = Sequential([
    tf.keras.layers.Input(shape=(SEQUENCE_LENGTH, 1)),
    LSTM(units=50, return_sequences=False),
    Dense(units=1)
])

model.compile(optimizer='adam', loss='mean_squared_error', metrics=['mae'])

model.summary()


In [None]:
# --- 7. Train the Model ---

history = model.fit(
    X_train,
    y_train,
    epochs=2,
    batch_size=32,
    validation_split=0.1, # Use 10% of training data for validation
    verbose=1
)

In [None]:
# Plot training & validation loss
plt.figure(figsize=(12, 6))
plt.plot(history.history['loss'], label='Training Loss')
plt.plot(history.history['val_loss'], label='Validation Loss')
plt.title('Model Loss During Training')
plt.xlabel('Epoch')
plt.ylabel('Loss (MSE)')
plt.legend()
plt.show()

In [None]:
# --- 8. Evaluate the Model and Make Predictions ---

# Make predictions on the test set
predicted_demand_scaled = model.predict(X_test)

# Inverse transform the predictions and actual values to their original scale
predicted_demand = scaler.inverse_transform(predicted_demand_scaled)
y_test_actual = scaler.inverse_transform(y_test.reshape(-1, 1))

# Calculate Root Mean Squared Error (RMSE)
rmse = np.sqrt(mean_squared_error(y_test_actual, predicted_demand))
print(f"Root Mean Squared Error (RMSE) on Test Set: {rmse:.2f}")




In [None]:
# prompt: visualize results with plotly

import pandas as pd
import plotly.express as px

# --- 9. Visualize the Results with Plotly ---
# Plot the actual vs. predicted values for the test set using Plotly.

# Create a DataFrame for easy plotting
results_df = pd.DataFrame({
    'Actual Demand': y_test_actual.flatten(),
    'Predicted Demand': predicted_demand.flatten()
}, index=df.index[-len(y_test_actual):])

fig = px.line(results_df, title='Electricity Demand: Actual vs. Predicted (Plotly)')
fig.update_layout(
    xaxis_title='Date',
    yaxis_title='Demand (MW)'
)
fig.show()

# Visualize training & validation loss with Plotly
loss_history_df = pd.DataFrame({
    'Training Loss': history.history['loss'],
    'Validation Loss': history.history['val_loss']
})

fig_loss = px.line(loss_history_df, title='Model Loss During Training (Plotly)')
fig_loss.update_layout(
    xaxis_title='Epoch',
    yaxis_title='Loss (MSE)'
)
fig_loss.show()

# Multiple LSTM Layers