# Time Series Forecasting: LSTM baseline + Attention Seq2Seq

This notebook is Colab-ready. Upload `electricity_consumption.csv` to `/content` (or mount Drive) before running. It performs:
- Data loading and hourly resampling
- Imputation with IterativeImputer
- Baseline LSTM (48 â†’ 6 hours)
- Attention-augmented seq2seq (Bahdanau)
- Evaluation (RMSE, MAE, MAPE)
- Saves models and run summary


In [None]:
# Setup imports
import os, json, math, numpy as np, pandas as pd
from pathlib import Path
from sklearn.impute import IterativeImputer
from sklearn.preprocessing import StandardScaler
from sklearn.metrics import mean_squared_error, mean_absolute_error
import tensorflow as tf
from tensorflow.keras import layers, models, callbacks, backend as K
import matplotlib.pyplot as plt
print("Libraries loaded. TensorFlow version:", tf.__version__)

In [None]:
# Configuration: set path where you uploaded the CSV
POSSIBLE_PATHS = [
    '/content/electricity_consumption.csv',
    '/mnt/data/electricity_consumption.csv',
    './electricity_consumption.csv'
]
DATA_PATH = None
for p in POSSIBLE_PATHS:
    if Path(p).exists():
        DATA_PATH = p
        break
if DATA_PATH is None:
    raise FileNotFoundError("Place electricity_consumption.csv in /content or update DATA_PATH.")
print("Using:", DATA_PATH)

In [None]:
# Load CSV and detect datetime column
df = pd.read_csv(DATA_PATH)
display(df.head())
print("Columns:", df.columns.tolist())

# detect datetime column
datetime_col = None
for c in df.columns:
    if 'date' in c.lower() or 'time' in c.lower() or 'timestamp' in c.lower():
        datetime_col = c
        break
if datetime_col is None:
    for c in df.columns[:3]:
        try:
            pd.to_datetime(df[c])
            datetime_col = c
            break
        except Exception:
            continue
if datetime_col is None:
    raise ValueError("Couldn't auto-detect datetime column. Rename it to include 'date' or 'time' or make it the first columns.")
print('Datetime column:', datetime_col)
df[datetime_col] = pd.to_datetime(df[datetime_col])
df = df.set_index(datetime_col).sort_index()
print('Index range:', df.index.min(), 'to', df.index.max())

In [None]:
# Resample hourly and impute missing values
df_hour = df.resample('H').mean()
numeric_cols = df_hour.select_dtypes(include=['float64','int64']).columns.tolist()
print("Numeric columns:", numeric_cols)
imputer = IterativeImputer(random_state=0, max_iter=10)
df_hour[numeric_cols] = imputer.fit_transform(df_hour[numeric_cols])
df_hour = df_hour.ffill().bfill()
display(df_hour.head())

In [None]:
# Choose target column (heuristic) and features
target_candidates = [c for c in numeric_cols if any(k in c.lower() for k in ['consum','global_active','active_power','power'])]
TARGET = target_candidates[0] if target_candidates else numeric_cols[0]
print("Target:", TARGET)
FEATURES = [c for c in numeric_cols if c != TARGET]
print("Features:", FEATURES)

scaler_X = StandardScaler()
scaler_y = StandardScaler()

X_all = df_hour[FEATURES].values if FEATURES else df_hour[[TARGET]].values
y_all = df_hour[[TARGET]].values
X_scaled = scaler_X.fit_transform(X_all)
y_scaled = scaler_y.fit_transform(y_all)

In [None]:
# Create sequences (48 -> 6) and time-aware split
def create_sequences(X, y, input_steps=48, output_steps=6):
    Xs, ys = [], []
    for i in range(len(X) - input_steps - output_steps + 1):
        Xs.append(X[i:(i+input_steps)])
        ys.append(y[(i+input_steps):(i+input_steps+output_steps)].reshape(-1))
    return np.array(Xs), np.array(ys)

INPUT_STEPS = 48
OUTPUT_STEPS = 6
X_seq, y_seq = create_sequences(X_scaled, y_scaled, INPUT_STEPS, OUTPUT_STEPS)
print("Seq shapes:", X_seq.shape, y_seq.shape)

train_size = int(len(X_seq) * 0.7)
val_size = int(len(X_seq) * 0.15)
X_train = X_seq[:train_size]; y_train = y_seq[:train_size]
X_val = X_seq[train_size:train_size+val_size]; y_val = y_seq[train_size:train_size+val_size]
X_test = X_seq[train_size+val_size:]; y_test = y_seq[train_size+val_size:]
print('Train/Val/Test:', len(X_train), len(X_val), len(X_test))

In [None]:

    inputs = layers.Input(shape=input_shape)
    x = layers.Mas# Baseline LSTM model
def build_lstm_baseline(input_shape, output_steps):king()(inputs)
    x = layers.LSTM(128, return_sequences=False)(x)
    x = layers.Dropout(0.2)(x)
    x = layers.Dense(64, activation='relu')(x)
    outputs = layers.Dense(output_steps)(x)
    model = models.Model(inputs, outputs)
    model.compile(optimizer='adam', loss='mse')
    return model

lstm_model = build_lstm_baseline((INPUT_STEPS, X_train.shape[2]), OUTPUT_STEPS)
lstm_model.summary()

es = callbacks.EarlyStopping(monitor='val_loss', patience=5, restore_best_weights=True)
history_lstm = lstm_model.fit(X_train, y_train, epochs=30, batch_size=32, validation_data=(X_val,y_val), callbacks=[es])

In [None]:
# Bahdanau attention and seq2seq (simplified)
class BahdanauAttention(layers.Layer):
    def __init__(self, units):
        super(BahdanauAttention, self).__init__()
        self.W1 = layers.Dense(units)
        self.W2 = layers.Dense(units)
        self.V = layers.Dense(1)
    def call(self, query, values):
        query_with_time_axis = tf.expand_dims(query, 1)
        score = self.V(tf.nn.tanh(self.W1(values) + self.W2(query_with_time_axis)))
        attention_weights = tf.nn.softmax(score, axis=1)
        context_vector = attention_weights * values
        context_vector = tf.reduce_sum(context_vector, axis=1)
        return context_vector, tf.squeeze(attention_weights, -1)

def build_attention_seq2seq(input_shape, output_steps, enc_units=128, dec_units=128):
    encoder_inputs = layers.Input(shape=input_shape, name='encoder_inputs')
    encoder_mask = layers.Masking()(encoder_inputs)
    encoder_lstm = layers.LSTM(enc_units, return_sequences=True, return_state=True, name='encoder_lstm')
    enc_outputs, enc_h, enc_c = encoder_lstm(encoder_mask)
    decoder_inputs = layers.Input(shape=(output_steps, input_shape[1]), name='decoder_inputs')
    attention = BahdanauAttention(dec_units)
    decoder_lstm_cell = layers.LSTMCell(dec_units)
    dense_out = layers.Dense(1)
    all_outputs = []
    state_h = enc_h; state_c = enc_c
    last_encoder_step = encoder_inputs[:, -1, :]
    decoder_input_t = tf.expand_dims(last_encoder_step, 1)
    for t in range(output_steps):
        context_vec, attn_weights = attention(state_h, enc_outputs)
        decoder_input_flat = tf.concat([tf.squeeze(decoder_input_t,1), context_vec], axis=-1)
        out, [state_h, state_c] = decoder_lstm_cell(decoder_input_flat, states=[state_h, state_c])
        output = dense_out(out)
        all_outputs.append(output)
        decoder_input_t = tf.expand_dims(tf.tile(output, [1, input_shape[1]]), 1)
    decoder_outputs = layers.Lambda(lambda x: K.stack(x, axis=1))(all_outputs)
    decoder_outputs = layers.Reshape((output_steps,))(decoder_outputs)
    model = models.Model([encoder_inputs, decoder_inputs], decoder_outputs)
    model.compile(optimizer='adam', loss='mse')
    return model

decoder_dummy = np.zeros((X_train.shape[0], OUTPUT_STEPS, X_train.shape[2]))
attn_model = build_attention_seq2seq((INPUT_STEPS, X_train.shape[2]), OUTPUT_STEPS)
attn_model.summary()
history_attn = attn_model.fit([X_train, decoder_dummy], y_train, epochs=30, batch_size=32, validation_data=([X_val, np.zeros((X_val.shape[0], OUTPUT_STEPS, X_val.shape[2]))], y_val), callbacks=[es])

In [None]:
# Evaluation helpers and metrics
def inverse_and_metrics(y_true_scaled, y_pred_scaled):
    y_true = scaler_y.inverse_transform(y_true_scaled.reshape(-1,1)).reshape(y_true_scaled.shape)
    y_pred = scaler_y.inverse_transform(y_pred_scaled.reshape(-1,1)).reshape(y_pred_scaled.shape)
    rmse = math.sqrt(mean_squared_error(y_true.flatten(), y_pred.flatten()))
    mae = mean_absolute_error(y_true.flatten(), y_pred.flatten())
    mape = np.mean(np.abs((y_true.flatten() - y_pred.flatten()) / (y_true.flatten()+1e-9))) * 100
    return rmse, mae, mape, y_true, y_pred

y_pred_lstm = lstm_model.predict(X_test)
rmse_lstm, mae_lstm, mape_lstm, y_true, y_pred_lstm_inv = inverse_and_metrics(y_test, y_pred_lstm)
print('LSTM -> RMSE: {:.4f}, MAE: {:.4f}, MAPE: {:.2f}%'.format(rmse_lstm, mae_lstm, mape_lstm))

decoder_dummy_test = np.zeros((X_test.shape[0], OUTPUT_STEPS, X_test.shape[2]))
y_pred_attn = attn_model.predict([X_test, decoder_dummy_test])
rmse_attn, mae_attn, mape_attn, _, y_pred_attn_inv = inverse_and_metrics(y_test, y_pred_attn)
print('Attn -> RMSE: {:.4f}, MAE: {:.4f}, MAPE: {:.2f}%'.format(rmse_attn, mae_attn, mape_attn))

In [None]:
# Plot example predictions
def plot_preds(y_true, y_pred, idx=0):
    plt.figure(figsize=(8,4))
    plt.plot(y_true[idx], marker='o', label='True')
    plt.plot(y_pred[idx], marker='x', label='Pred')
    plt.title('Example multi-step forecast (true vs pred)')
    plt.xlabel('Horizon (hours)'); plt.legend(); plt.grid(True); plt.show()

plot_preds(scaler_y.inverse_transform(y_test.reshape(-1,1)).reshape(y_test.shape), y_pred_lstm_inv, idx=0)
plot_preds(scaler_y.inverse_transform(y_test.reshape(-1,1)).reshape(y_test.shape), y_pred_attn_inv, idx=0)

In [None]:
# Save models and summary to /content/project_output
OUTPUT_DIR = '/content/project_output'
os.makedirs(OUTPUT_DIR, exist_ok=True)
lstm_model.save(os.path.join(OUTPUT_DIR, 'lstm_baseline.h5'))
attn_model.save(os.path.join(OUTPUT_DIR, 'attn_seq2seq.h5'))

summary = {
    'target': TARGET,
    'input_steps': INPUT_STEPS,
    'output_steps': OUTPUT_STEPS,
    'lstm_metrics': {'rmse': float(rmse_lstm), 'mae': float(mae_lstm), 'mape': float(mape_lstm)},
    'attn_metrics': {'rmse': float(rmse_attn), 'mae': float(mae_attn), 'mape': float(mape_attn)}
}
with open(os.path.join(OUTPUT_DIR, 'run_summary.json'),'w') as f:
    json.dump(summary, f, indent=2)
print('Saved to', OUTPUT_DIR)