# LSTM Model Training for RUL Prediction (PyTorch)

**Objective:** Build and train an LSTM neural network to predict Remaining Useful Life (RUL)

**Steps:**
1. Load preprocessed data
2. Build LSTM model architecture with PyTorch
3. Train model with early stopping ( In Progress )
4. Evaluate performance (RMSE, MAE) ( In Progress )
5. Visualize predictions and training history ( In Progress )
6. Save trained model and extract latent features for XGBoost ( In Progress )

In [None]:
# --------------------------------------------------------------------------
# Import required libraries for deep learning and visualization
# --------------------------------------------------------------------------

import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
import pickle
import os

import torch
import torch.nn as nn
import torch.optim as optim
from torch.utils.data import TensorDataset, DataLoader
from sklearn.metrics import mean_squared_error, mean_absolute_error, r2_score

import warnings
warnings.filterwarnings('ignore')

# Set random seeds for reproducibility
np.random.seed(42)
torch.manual_seed(42)
if torch.cuda.is_available():
    torch.cuda.manual_seed(42)

# Set device
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')

sns.set_style('whitegrid')
plt.rcParams['figure.figsize'] = (12, 6)

print(f"PyTorch version: {torch.__version__}")
print(f"Device: {device}")

In [None]:
# --------------------------------------------------------------------------
# Load preprocessed data and metadata
# --------------------------------------------------------------------------

X_train = np.load('processed_data/X_train.npy')
y_train = np.load('processed_data/y_train.npy')
X_val = np.load('processed_data/X_val.npy')
y_val = np.load('processed_data/y_val.npy')
X_test = np.load('processed_data/X_test.npy')
y_test = np.load('processed_data/y_test.npy')

with open('processed_data/metadata.pkl', 'rb') as f:
    metadata = pickle.load(f)

print(f"Training set: {X_train.shape}")
print(f"Validation set: {X_val.shape}")
print(f"Test set: {X_test.shape}")
print(f"\nMetadata:")
for key, value in metadata.items():
    print(f"  {key}: {value}")

# Convert to PyTorch tensors
X_train_tensor = torch.FloatTensor(X_train).to(device)
y_train_tensor = torch.FloatTensor(y_train).unsqueeze(1).to(device)
X_val_tensor = torch.FloatTensor(X_val).to(device)
y_val_tensor = torch.FloatTensor(y_val).unsqueeze(1).to(device)
X_test_tensor = torch.FloatTensor(X_test).to(device)
y_test_tensor = torch.FloatTensor(y_test).unsqueeze(1).to(device)

# Create data loaders
train_dataset = TensorDataset(X_train_tensor, y_train_tensor)
val_dataset = TensorDataset(X_val_tensor, y_val_tensor)
test_dataset = TensorDataset(X_test_tensor, y_test_tensor)

train_loader = DataLoader(train_dataset, batch_size=128, shuffle=True)
val_loader = DataLoader(val_dataset, batch_size=128, shuffle=False)
test_loader = DataLoader(test_dataset, batch_size=128, shuffle=False)

print(f"\nData loaders created with batch size: 128")

In [None]:
# --------------------------------------------------------------------------
# Build LSTM model architecture
# --------------------------------------------------------------------------

class LSTMModel(nn.Module):
    def __init__(self, n_features, hidden_dim1=64, hidden_dim2=32, fc_dim=32, dropout=0.2):
        super(LSTMModel, self).__init__()
        
        self.lstm1 = nn.LSTM(n_features, hidden_dim1, batch_first=True)
        self.dropout1 = nn.Dropout(dropout)
        
        self.lstm2 = nn.LSTM(hidden_dim1, hidden_dim2, batch_first=True)
        self.dropout2 = nn.Dropout(dropout)
        
        self.fc1 = nn.Linear(hidden_dim2, fc_dim)
        self.relu = nn.ReLU()
        self.dropout3 = nn.Dropout(dropout)
        
        self.fc2 = nn.Linear(fc_dim, 1)
        
    def forward(self, x):
        # LSTM 1
        lstm1_out, _ = self.lstm1(x)
        lstm1_out = self.dropout1(lstm1_out)
        
        # LSTM 2
        lstm2_out, _ = self.lstm2(lstm1_out)
        lstm2_out = self.dropout2(lstm2_out)
        
        # Take last timestep output
        lstm2_last = lstm2_out[:, -1, :]
        
        # Fully connected layers
        fc1_out = self.fc1(lstm2_last)
        fc1_out = self.relu(fc1_out)
        fc1_out = self.dropout3(fc1_out)
        
        output = self.fc2(fc1_out)
        
        return output, lstm2_last  # Return both output and latent features
    
    def predict(self, x):
        output, _ = self.forward(x)
        return output

# Build model
sequence_length = metadata['sequence_length']
n_features = metadata['n_features']

model = LSTMModel(n_features=n_features).to(device)

# Define loss function and optimizer
criterion = nn.MSELoss()
optimizer = optim.Adam(model.parameters(), lr=0.001)

# Learning rate scheduler
scheduler = optim.lr_scheduler.ReduceLROnPlateau(
    optimizer, mode='min', factor=0.5, patience=7, min_lr=1e-6, verbose=True
)

print("Model architecture:")
print(model)
print(f"\nTotal parameters: {sum(p.numel() for p in model.parameters()):,}")

In [None]:
# --------------------------------------------------------------------------
# Define training and validation functions
# --------------------------------------------------------------------------

# Create models directory
os.makedirs('models', exist_ok=True)

def train_epoch(model, loader, criterion, optimizer, device):
    """Train for one epoch"""
    model.train()
    total_loss = 0
    total_mae = 0
    
    for batch_X, batch_y in loader:
        optimizer.zero_grad()
        
        outputs, _ = model(batch_X)
        loss = criterion(outputs, batch_y)
        
        loss.backward()
        optimizer.step()
        
        total_loss += loss.item()
        total_mae += torch.mean(torch.abs(outputs - batch_y)).item()
    
    avg_loss = total_loss / len(loader)
    avg_mae = total_mae / len(loader)
    
    return avg_loss, avg_mae

def validate(model, loader, criterion, device):
    """Validate model"""
    model.eval()
    total_loss = 0
    total_mae = 0
    
    with torch.no_grad():
        for batch_X, batch_y in loader:
            outputs, _ = model(batch_X)
            loss = criterion(outputs, batch_y)
            
            total_loss += loss.item()
            total_mae += torch.mean(torch.abs(outputs - batch_y)).item()
    
    avg_loss = total_loss / len(loader)
    avg_mae = total_mae / len(loader)
    
    return avg_loss, avg_mae

print("Training functions defined")
print("  - Early stopping patience: 15 epochs")
print("  - Learning rate reduction patience: 7 epochs")