In [14]:
import pandas as pd
import os
import numpy as np
import tensorflow as tf
from sklearn.preprocessing import MinMaxScaler
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import LSTM, Dense, Dropout, BatchNormalization, Bidirectional
from tensorflow.keras.callbacks import TensorBoard
from tensorflow.keras.optimizers import Adam
import datetime


In [15]:
def load_data(train_file_path, test_file_path):
    # Load training and test data
    column_names = ['engine_id', 'cycle', 'op_setting_1', 'op_setting_2', 'op_setting_3', 
                    'sensor_1', 'sensor_2', 'sensor_3', 'sensor_4', 'sensor_5', 'sensor_6', 
                    'sensor_7', 'sensor_8', 'sensor_9', 'sensor_10', 'sensor_11', 'sensor_12', 
                    'sensor_13', 'sensor_14', 'sensor_15', 'sensor_16', 'sensor_17', 'sensor_18', 
                    'sensor_19', 'sensor_20', 'sensor_21']

    # Read CSV data
    train_data = pd.read_csv(train_file_path, header=None, names=column_names, sep=' ')
    test_data = pd.read_csv(test_file_path, header=None, names=column_names, sep=' ')
    
    return train_data, test_data


In [16]:
# Load dataset from your file path
train_file_path = '../data/train_FD001.txt'
test_file_path = '../data/test_FD001.txt'

In [17]:
train_data, test_data = load_data(train_file_path, test_file_path)


In [18]:
print(f"Train data shape: {train_data.shape}")
print(f"Test data shape: {test_data.shape}")

# train_data = train_data.drop(columns=['sensor_20', 'sensor_21'])
test_data = test_data.drop(columns=['sensor_20', 'sensor_21'])



Train data shape: (20631, 26)
Test data shape: (13096, 26)


In [19]:
train_data.head()

Unnamed: 0,Unnamed: 1,engine_id,cycle,op_setting_1,op_setting_2,op_setting_3,sensor_1,sensor_2,sensor_3,sensor_4,sensor_5,...,sensor_12,sensor_13,sensor_14,sensor_15,sensor_16,sensor_17,sensor_18,sensor_19,sensor_20,sensor_21
1,1,-0.0007,-0.0004,100.0,518.67,641.82,1589.7,1400.6,14.62,21.61,554.36,...,8138.62,8.4195,0.03,392,2388,100.0,39.06,23.419,,
1,2,0.0019,-0.0003,100.0,518.67,642.15,1591.82,1403.14,14.62,21.61,553.75,...,8131.49,8.4318,0.03,392,2388,100.0,39.0,23.4236,,
1,3,-0.0043,0.0003,100.0,518.67,642.35,1587.99,1404.2,14.62,21.61,554.26,...,8133.23,8.4178,0.03,390,2388,100.0,38.95,23.3442,,
1,4,0.0007,0.0,100.0,518.67,642.35,1582.79,1401.87,14.62,21.61,554.45,...,8133.83,8.3682,0.03,392,2388,100.0,38.88,23.3739,,
1,5,-0.0019,-0.0002,100.0,518.67,642.37,1582.85,1406.22,14.62,21.61,554.0,...,8133.8,8.4294,0.03,393,2388,100.0,38.9,23.4044,,


In [20]:
train_data.columns

Index(['engine_id', 'cycle', 'op_setting_1', 'op_setting_2', 'op_setting_3',
       'sensor_1', 'sensor_2', 'sensor_3', 'sensor_4', 'sensor_5', 'sensor_6',
       'sensor_7', 'sensor_8', 'sensor_9', 'sensor_10', 'sensor_11',
       'sensor_12', 'sensor_13', 'sensor_14', 'sensor_15', 'sensor_16',
       'sensor_17', 'sensor_18', 'sensor_19', 'sensor_20', 'sensor_21'],
      dtype='object')

In [21]:
test_data.columns

Index(['engine_id', 'cycle', 'op_setting_1', 'op_setting_2', 'op_setting_3',
       'sensor_1', 'sensor_2', 'sensor_3', 'sensor_4', 'sensor_5', 'sensor_6',
       'sensor_7', 'sensor_8', 'sensor_9', 'sensor_10', 'sensor_11',
       'sensor_12', 'sensor_13', 'sensor_14', 'sensor_15', 'sensor_16',
       'sensor_17', 'sensor_18', 'sensor_19'],
      dtype='object')

In [22]:
train_data['RUL'] = train_data.groupby('engine_id')['cycle'].transform('max') - train_data['cycle']
test_data['RUL'] = test_data.groupby('engine_id')['cycle'].transform('max') - test_data['cycle']


### DATA PREPROCESSING

In [23]:
def create_sequences(data, window_size=30):
    X, y = [], []
    for engine_id in data['engine_id'].unique():
        # Select the data for each engine
        engine_data = data[data['engine_id'] == engine_id].iloc[:, 2:21].values  # Select sensor columns
        engine_rul = data[data['engine_id'] == engine_id]['RUL'].values  # RUL column
        
        # Create sequences
        for i in range(len(engine_data) - window_size):
            X.append(engine_data[i:i + window_size])
            y.append(engine_rul[i + window_size])  # Predict RUL after window
    return np.array(X), np.array(y)


In [24]:
# Prepare training and testing sequences
window_size = 30  # Set window size (30 time steps)
X_train, y_train = create_sequences(train_data, window_size)
X_test, y_test = create_sequences(test_data, window_size)


In [25]:
print(f"Training data shape: {X_train.shape}, Test data shape: {X_test.shape}")


Training data shape: (17202, 30, 19), Test data shape: (9840, 30, 19)


In [26]:
X_train 

array([[[1.0000e+02, 5.1867e+02, 6.4182e+02, ..., 3.0000e-02,
         3.9200e+02, 2.3880e+03],
        [1.0000e+02, 5.1867e+02, 6.4250e+02, ..., 3.0000e-02,
         3.9100e+02, 2.3880e+03],
        [1.0000e+02, 5.1867e+02, 6.4270e+02, ..., 3.0000e-02,
         3.9400e+02, 2.3880e+03],
        ...,
        [1.0000e+02, 5.1867e+02, 6.4381e+02, ..., 3.0000e-02,
         3.9400e+02, 2.3880e+03],
        [1.0000e+02, 5.1867e+02, 6.4221e+02, ..., 3.0000e-02,
         3.9200e+02, 2.3880e+03],
        [1.0000e+02, 5.1867e+02, 6.4202e+02, ..., 3.0000e-02,
         3.9200e+02, 2.3880e+03]],

       [[1.0000e+02, 5.1867e+02, 6.4250e+02, ..., 3.0000e-02,
         3.9100e+02, 2.3880e+03],
        [1.0000e+02, 5.1867e+02, 6.4270e+02, ..., 3.0000e-02,
         3.9400e+02, 2.3880e+03],
        [1.0000e+02, 5.1867e+02, 6.4248e+02, ..., 3.0000e-02,
         3.9200e+02, 2.3880e+03],
        ...,
        [1.0000e+02, 5.1867e+02, 6.4221e+02, ..., 3.0000e-02,
         3.9200e+02, 2.3880e+03],
        [1.0

### DATA NORMALIZATION

In [27]:
def normalize_data(X_train, X_test):
    scaler = MinMaxScaler(feature_range=(0, 1))
    # Flatten the data to apply MinMaxScaler
    X_train_reshaped = X_train.reshape(-1, X_train.shape[-1])
    X_test_reshaped = X_test.reshape(-1, X_test.shape[-1])
    
    # Fit the scaler to training data
    X_train_scaled = scaler.fit_transform(X_train_reshaped).reshape(X_train.shape)
    # Apply the scaler to test data
    X_test_scaled = scaler.transform(X_test_reshaped).reshape(X_test.shape)
    
    return X_train_scaled, X_test_scaled

# Normalize the sequences
X_train_scaled, X_test_scaled = normalize_data(X_train, X_test)


In [28]:
X_train_scaled

array([[[0.        , 0.        , 0.18373494, ..., 0.        ,
         0.33333333, 0.        ],
        [0.        , 0.        , 0.38855422, ..., 0.        ,
         0.25      , 0.        ],
        [0.        , 0.        , 0.44879518, ..., 0.        ,
         0.5       , 0.        ],
        ...,
        [0.        , 0.        , 0.78313253, ..., 0.        ,
         0.5       , 0.        ],
        [0.        , 0.        , 0.30120482, ..., 0.        ,
         0.33333333, 0.        ],
        [0.        , 0.        , 0.2439759 , ..., 0.        ,
         0.33333333, 0.        ]],

       [[0.        , 0.        , 0.38855422, ..., 0.        ,
         0.25      , 0.        ],
        [0.        , 0.        , 0.44879518, ..., 0.        ,
         0.5       , 0.        ],
        [0.        , 0.        , 0.38253012, ..., 0.        ,
         0.33333333, 0.        ],
        ...,
        [0.        , 0.        , 0.30120482, ..., 0.        ,
         0.33333333, 0.        ],
        [0. 

### LSTM MODEL

In [29]:
def create_lstm_model(X_train):
    model = Sequential()
    
    # Bidirectional LSTM layer
    model.add(Bidirectional(LSTM(100, return_sequences=True), input_shape=(X_train.shape[1], X_train.shape[2])))
    model.add(Dropout(0.3))
    model.add(BatchNormalization())
    
    # Additional LSTM layers for complexity
    model.add(LSTM(50, return_sequences=True))
    model.add(Dropout(0.3))
    model.add(BatchNormalization())
    
    # Final LSTM layer and dropout
    model.add(LSTM(30))
    model.add(Dropout(0.3))
    model.add(BatchNormalization())

    # Dense output layer to predict RUL
    model.add(Dense(1))

    # Compile model
# Recompile the model after loading it
    model.compile(optimizer='adam', loss='mean_squared_error', metrics=['mae'])
    
    return model

# Create the LSTM model
model = create_lstm_model(X_train_scaled)

# Print model summary
model.summary()


I0000 00:00:1738037807.194622  104248 gpu_device.cc:2022] Created device /job:localhost/replica:0/task:0/device:GPU:0 with 3620 MB memory:  -> device: 0, name: NVIDIA GeForce RTX 3050 6GB Laptop GPU, pci bus id: 0000:01:00.0, compute capability: 8.6
  super().__init__(**kwargs)


### TENSORBOARD VISUALIZATION



In [30]:
def train_model(model, X_train, y_train):
    log_dir = "../logs/fit/" + datetime.datetime.now().strftime("%Y%m%d-%H%M%S")
    tensorboard_callback = TensorBoard(log_dir=log_dir, histogram_freq=1)

    # Train the model
    history = model.fit(
        X_train, y_train,
        epochs=10,
        batch_size=128,
        validation_split=0.2,
        callbacks=[tensorboard_callback]
    )
    
    return model, history

# Train the model
model, history = train_model(model, X_train_scaled, y_train)


2025-01-28 09:47:36.492400: W external/local_xla/xla/tsl/framework/cpu_allocator_impl.cc:83] Allocation of 31375080 exceeds 10% of free system memory.
2025-01-28 09:47:36.619884: W external/local_xla/xla/tsl/framework/cpu_allocator_impl.cc:83] Allocation of 31375080 exceeds 10% of free system memory.


Epoch 1/10


I0000 00:00:1738037864.437139  104790 cuda_dnn.cc:529] Loaded cuDNN version 90300


[1m108/108[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m21s[0m 82ms/step - loss: 1.2495 - mae: 0.8685 - val_loss: 0.0024 - val_mae: 0.0443
Epoch 2/10
[1m108/108[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m8s[0m 77ms/step - loss: 0.3520 - mae: 0.4669 - val_loss: 0.0016 - val_mae: 0.0336
Epoch 3/10
[1m108/108[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m8s[0m 74ms/step - loss: 0.1963 - mae: 0.3428 - val_loss: 0.0014 - val_mae: 0.0264
Epoch 4/10
[1m108/108[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m10s[0m 89ms/step - loss: 0.1139 - mae: 0.2623 - val_loss: 6.7462e-04 - val_mae: 0.0220
Epoch 5/10
[1m108/108[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m8s[0m 75ms/step - loss: 0.0654 - mae: 0.1993 - val_loss: 2.1155e-04 - val_mae: 0.0105
Epoch 6/10
[1m108/108[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m8s[0m 76ms/step - loss: 0.0367 - mae: 0.1491 - val_loss: 9.7803e-05 - val_mae: 0.0075
Epoch 7/10
[1m108/108[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m8s[0m 71

In [31]:
model.save('../models/best_model.h5')



In [32]:
def evaluate_model(model, X_test):
    y_pred = model.predict(X_test)
    return y_pred

# Evaluate on test data
y_pred = evaluate_model(model, X_test_scaled)

# Print some predictions and actual values
print("Predicted RUL:", y_pred[:5])
print("Actual RUL:", y_test[:5])


2025-01-28 09:50:02.217050: W external/local_xla/xla/tsl/framework/cpu_allocator_impl.cc:83] Allocation of 22435200 exceeds 10% of free system memory.


[1m308/308[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m7s[0m 22ms/step
Predicted RUL: [[-0.00231721]
 [-0.00294602]
 [-0.00335184]
 [-0.0040621 ]
 [-0.00509058]]
Actual RUL: [0.0002 0.0005 0.0005 0.0009 0.0009]


In [37]:
X_test_scaled

array([[[0.        , 0.        , 0.54518072, ..., 0.        ,
         0.33333333, 0.        ],
        [0.        , 0.        , 0.43373494, ..., 0.        ,
         0.41666667, 0.        ],
        [0.        , 0.        , 0.4186747 , ..., 0.        ,
         0.41666667, 0.        ],
        ...,
        [0.        , 0.        , 0.20180723, ..., 0.        ,
         0.25      , 0.        ],
        [0.        , 0.        , 0.38855422, ..., 0.        ,
         0.25      , 0.        ],
        [0.        , 0.        , 0.43373494, ..., 0.        ,
         0.25      , 0.        ]],

       [[0.        , 0.        , 0.43373494, ..., 0.        ,
         0.41666667, 0.        ],
        [0.        , 0.        , 0.4186747 , ..., 0.        ,
         0.41666667, 0.        ],
        [0.        , 0.        , 0.49096386, ..., 0.        ,
         0.41666667, 0.        ],
        ...,
        [0.        , 0.        , 0.38855422, ..., 0.        ,
         0.25      , 0.        ],
        [0. 