### Implementation of GRU on the time-series dataset

In [1]:
from google.colab import drive

# Mount Google Drive
drive.mount('/content/drive')

Mounted at /content/drive


In [2]:
# Import necessary libraries
import numpy as np
import pandas as pd
import tensorflow as tf
from tensorflow.keras.models import Sequential
from tensorflow.keras.optimizers import Adam
from tensorflow.keras.layers import Embedding, GRU, Dense, Dropout
from tensorflow.keras.preprocessing.text import Tokenizer
# from tensorflow.keras.preprocessing.sequence import pad_sequences
from sklearn.metrics import mean_squared_error
from sklearn.model_selection import train_test_split
import matplotlib.pyplot as plt

In [3]:
# Load test features and labels
X_test = np.load('/content/drive/MyDrive/Infosys/MileStone3/rolling_window_sequences.npy')  # Replace with actual file path
metadata_test = pd.read_csv("/content/drive/MyDrive/Infosys/MileStone3/sequence_metadata_with_RUL.csv")  # Replace with actual file path
y_test = metadata_test["RUL"].values
print("Test feature shape:", X_test.shape)
print("Test target shape:", y_test.shape)

Test feature shape: (17631, 30, 66)
Test target shape: (17631,)


In [4]:
# Split the data into train and validation sets (assuming no separate train set given)
X_train, X_val, y_train, y_val = train_test_split(X_test, y_test, test_size=0.3, random_state=42)

In [5]:
# Define GRU model creation function
def create_gru_model(input_shape, units=64, learning_rate=0.001, dropout_rate=0.2):
    model = Sequential()
    model.add(GRU(units, input_shape=input_shape))
    model.add(Dropout(dropout_rate))
    model.add(Dense(1))
    optimizer = Adam(learning_rate=learning_rate)
    model.compile(loss='mse', optimizer=optimizer, metrics=['mae'])
    return model

In [6]:
from tensorflow.keras.callbacks import EarlyStopping

In [7]:
# Create the model
model = create_gru_model(input_shape=(X_train.shape[1], X_train.shape[2]), units=64, learning_rate=0.001, dropout_rate=0.2)

# Callbacks for early stopping
callbacks = [
    EarlyStopping(monitor='val_loss', patience=5, restore_best_weights=True)
]

  super().__init__(**kwargs)


In [8]:
# Train the model
history = model.fit(
    X_train, y_train,
    validation_data=(X_val, y_val),
    epochs=30,
    batch_size=64,
    callbacks=callbacks,
    verbose=1
)

Epoch 1/30
[1m193/193[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m10s[0m 38ms/step - loss: 7559.4365 - mae: 75.4251 - val_loss: 5999.9775 - val_mae: 65.8100
Epoch 2/30
[1m193/193[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m9s[0m 31ms/step - loss: 5728.9790 - mae: 64.1222 - val_loss: 4673.4946 - val_mae: 56.4722
Epoch 3/30
[1m193/193[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m7s[0m 35ms/step - loss: 4409.7920 - mae: 54.5233 - val_loss: 3653.6072 - val_mae: 48.7979
Epoch 4/30
[1m193/193[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m5s[0m 26ms/step - loss: 3482.7415 - mae: 47.6402 - val_loss: 2859.0095 - val_mae: 42.4964
Epoch 5/30
[1m193/193[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m7s[0m 36ms/step - loss: 2743.7007 - mae: 41.7526 - val_loss: 2239.4204 - val_mae: 37.2342
Epoch 6/30
[1m193/193[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m5s[0m 26ms/step - loss: 2137.3735 - mae: 36.3857 - val_loss: 1765.4270 - val_mae: 32.8784
Epoch 7/30
[1m193/193[0m [32m━

In [9]:
# Evaluate on validation set
val_loss, val_mae = model.evaluate(X_val, y_val, verbose=0)
print(f'Validation MSE: {val_loss:.4f}, MAE: {val_mae:.4f}')

# Predict on test set
y_pred = model.predict(X_test).flatten()

# Calculate test MSE
test_mse = mean_squared_error(y_test, y_pred)
print(f'Test MSE: {test_mse:.4f}')

Validation MSE: 110.0374, MAE: 7.7946
[1m551/551[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m3s[0m 6ms/step
Test MSE: 96.0046


Task: Build and train a GRU (Gated Recurrent Unit) neural network model for sequence regression on a given dataset. The implementation must include the following features:

Use of callbacks such as EarlyStopping to prevent overfitting and ModelCheckpoint to save the best model.

Hyperparameter tuning for model parameters like number of GRU units, learning rate, and dropout rate.

Cross-validation (e.g., K-Fold) to robustly evaluate model performance across different data splits.

Reporting of evaluation metrics such as mean squared error (MSE) and mean absolute error (MAE).