# Improved Attention-Based Time Series Forecasting Project

## 1. Imports and Setup

In [None]:
import numpy as npimport pandas as pdimport tensorflow as tffrom tensorflow.keras.layers import LSTM, Dense, Dropout, Input, Layerfrom tensorflow.keras.models import Modelfrom sklearn.preprocessing import MinMaxScalerfrom sklearn.metrics import mean_absolute_error, mean_squared_errorimport matplotlib.pyplot as pltnp.random.seed(42)tf.random.set_seed(42)

## 2. Load the Synthetic Multivariate Dataset
This dataset has 5000 samples and 6 columns (5 features + target).

In [None]:
df = pd.read_csv('/mnt/data/synthetic_sensor_timeseries_5000.csv')print("Dataset Loaded:", df.shape)df.head()features = df.values

## 3. Scaling and Sequence Creation
We use MinMaxScaler and construct input-output windows.

In [None]:
scaler = MinMaxScaler()scaled = scaler.fit_transform(features)SEQ_LEN = 30FORECAST_HORIZON = 5def create_sequences(data, seq_len, horizon):    X, y = [], []    for i in range(len(data) - seq_len - horizon):        X.append(data[i:i+seq_len, :])        y.append(data[i+seq_len:i+seq_len+horizon, -1])    return np.array(X), np.array(y)X, y = create_sequences(scaled, SEQ_LEN, FORECAST_HORIZON)print("X shape:", X.shape, "Y shape:", y.shape)

## 4. Train / Validation / Test Split (Chronological)

In [None]:
train_size = int(len(X) * 0.7)val_size = int(len(X) * 0.15)X_train = X[:train_size]y_train = y[:train_size]X_val = X[train_size:train_size+val_size]y_val = y[train_size:train_size+val_size]X_test = X[train_size+val_size:]y_test = y[train_size+val_size:]print("Train:", X_train.shape, "Val:", X_val.shape, "Test:", X_test.shape)

## 5. Custom Attention Layer
This mechanism computes timestep importance scores.

In [None]:
class AttentionLayer(Layer):    def __init__(self):        super().__init__()    def build(self, input_shape):        self.W = self.add_weight(shape=(input_shape[-1], input_shape[-1]),                                 initializer="glorot_uniform",                                 trainable=True)        self.b = self.add_weight(shape=(input_shape[-1],),                                 initializer="zeros",                                 trainable=True)        self.u = self.add_weight(shape=(input_shape[-1],),                                 initializer="glorot_uniform",                                 trainable=True)    def call(self, inputs):        score = tf.tanh(tf.tensordot(inputs, self.W, axes=[2, 0]) + self.b)        score = tf.tensordot(score, self.u, axes=[2, 0])        attention_weights = tf.nn.softmax(score, axis=1)        context = attention_weights[..., None] * inputs        context = tf.reduce_sum(context, axis=1)        return context, attention_weights

## 6. Baseline LSTM and Attention-LSTM Models

In [None]:
def build_baseline_lstm(input_shape, units=64, dropout=0.2):    inp = Input(shape=input_shape)    x = LSTM(units)(inp)    x = Dropout(dropout)(x)    out = Dense(FORECAST_HORIZON)(x)    model = Model(inp, out)    model.compile(optimizer='adam', loss='mse')    return modeldef build_attention_lstm(input_shape, units=64, dropout=0.2):    inp = Input(shape=input_shape)    lstm_out = LSTM(units, return_sequences=True)(inp)    lstm_out = Dropout(dropout)(lstm_out)    context, attn = AttentionLayer()(lstm_out)    out = Dense(FORECAST_HORIZON)(context)    model = Model(inp, out)    model.compile(optimizer='adam', loss='mse')    return model

## 7. Rolling Window Validation
Each fold trains on all prior data and tests on the next segment.

In [None]:
def rolling_validation(model_fn, X, y, splits=4):    fold_size = len(X) // splits    errors = []    for i in range(splits):        end_train = (i + 1) * fold_size        X_tr, y_tr = X[:end_train], y[:end_train]        X_te, y_te = X[end_train:end_train+fold_size], y[end_train:end_train+fold_size]        model = model_fn()        model.fit(X_tr, y_tr, epochs=10, batch_size=32, verbose=0)        pred = model.predict(X_te)        rmse = np.sqrt(mean_squared_error(y_te, pred))        errors.append(rmse)        print(f"Fold {i+1}: RMSE = {rmse:.4f}")    return np.mean(errors)

## 8. Hyperparameter Tuning Grid Search

In [None]:
param_grid = [    {"units": 32, "dropout": 0.1},    {"units": 64, "dropout": 0.2},    {"units": 128, "dropout": 0.3},]def model_builder_baseline(params):    return build_baseline_lstm(X_train.shape[1:], params["units"], params["dropout"])def model_builder_attention(params):    return build_attention_lstm(X_train.shape[1:], params["units"], params["dropout"])print("Evaluating Baseline LSTM...")baseline_scores = []for p in param_grid:    score = rolling_validation(lambda: model_builder_baseline(p), X_train, y_train)    baseline_scores.append((p, score))print("\nEvaluating Attention-LSTM...")attention_scores = []for p in param_grid:    score = rolling_validation(lambda: model_builder_attention(p), X_train, y_train)    attention_scores.append((p, score))best_baseline = min(baseline_scores, key=lambda x: x[1])best_attention = min(attention_scores, key=lambda x: x[1])print("\nBest Baseline Params:", best_baseline)print("Best Attention Params:", best_attention)

## 9. Train Final Attention-LSTM with Best Parameters

In [None]:
final_attention_model = build_attention_lstm(    X_train.shape[1:],     best_attention[0]["units"],     best_attention[0]["dropout"])final_attention_model.fit(    X_train, y_train,    validation_data=(X_val, y_val),    epochs=20,    batch_size=32,    verbose=1)test_pred = final_attention_model.predict(X_test)test_rmse = np.sqrt(mean_squared_error(y_test, test_pred))print("\nFinal Test RMSE:", test_rmse)

## 10. Attention Weights Extraction and Visualization

In [None]:
attention_layer = Nonefor layer in final_attention_model.layers:    if isinstance(layer, AttentionLayer):        attention_layer = layer        breakattention_extractor = Model(    inputs=final_attention_model.input,    outputs=attention_layer.output[1])attn_weights = attention_extractor.predict(X_test[:1])[0]plt.figure(figsize=(10,4))plt.plot(attn_weights)plt.title("Attention Weights for First Test Sample")plt.xlabel("Timestep")plt.ylabel("Weight")plt.show()