In [79]:
import pandas as pd
import os
from sklearn.preprocessing import MinMaxScaler
from sklearn.model_selection import train_test_split
import numpy as np
from tensorflow.keras.utils import Sequence
import tensorflow as tf
from tensorflow.keras.layers import Input, Bidirectional, LSTM, Dense, Concatenate, Dropout, GlobalAveragePooling1D, GlobalMaxPooling1D, Lambda
from tensorflow.keras.models import Model
import joblib
from sklearn.metrics import mean_squared_error

In [3]:
# Directory containing the CSV files
csv_directory = "D:\MTdataset\DL_dataset\Python_DL\Final_current_vibration_data"

## Load and combine all the current and vibration data

In [5]:
# Load the current and vibration data
current_files = [file for file in os.listdir(csv_directory) if 'current' in file]
vibration_files = [file for file in os.listdir(csv_directory) if 'vibration' in file]

current_dfs = [pd.read_csv(os.path.join(csv_directory, file)) for file in current_files]
vibration_dfs = [pd.read_csv(os.path.join(csv_directory, file)) for file in vibration_files]

# Combine all current data and vibration data into single DataFrames
current_data = pd.concat(current_dfs, ignore_index=True)
vibration_data = pd.concat(vibration_dfs, ignore_index=True)

## Data preprocessing
### Take care of missing values and data normalization

In [6]:
# Ensure there are no missing values
current_data = current_data.dropna()
vibration_data = vibration_data.dropna()

# Normalize the data (excluding the Time column)
scaler_current = MinMaxScaler()
scaler_vibration = MinMaxScaler()

current_data.iloc[:, 1:] = scaler_current.fit_transform(current_data.iloc[:, 1:])
vibration_data.iloc[:, 1:] = scaler_vibration.fit_transform(vibration_data.iloc[:, 1:])

### Data generation - creating data sequences for training

In [7]:
# Function to create sequences
def create_sequences(data, sequence_length):
    sequences = []
    for i in range(len(data) - sequence_length):
        seq = data.iloc[i:i + sequence_length].values
        sequences.append(seq)
    return np.array(sequences)

sequence_length = 100

current_sequences = create_sequences(current_data, sequence_length)
vibration_sequences = create_sequences(vibration_data, sequence_length)

# Assume the RUL is the last value in the Time column for each sequence
rul = current_data['Time'][sequence_length:].values

# Train-validation split
train_current_seq, val_current_seq, train_vibration_seq, val_vibration_seq, train_rul, val_rul = train_test_split(
    current_sequences, vibration_sequences, rul, test_size=0.2, random_state=42)

class DataGenerator(Sequence):
    def __init__(self, current_sequences, vibration_sequences, rul, batch_size):
        self.current_sequences = current_sequences
        self.vibration_sequences = vibration_sequences
        self.rul = rul
        self.batch_size = batch_size

    def __len__(self):
        return int(np.ceil(len(self.current_sequences) / self.batch_size))

    def __getitem__(self, index):
        batch_current = self.current_sequences[index * self.batch_size:(index + 1) * self.batch_size]
        batch_vibration = self.vibration_sequences[index * self.batch_size:(index + 1) * self.batch_size]
        batch_rul = self.rul[index * self.batch_size:(index + 1) * self.batch_size]
        return [batch_current, batch_vibration], batch_rul

# Create generators
batch_size = 32
train_generator = DataGenerator(train_current_seq, train_vibration_seq, train_rul, batch_size)
validation_generator = DataGenerator(val_current_seq, val_vibration_seq, val_rul, batch_size)

## Time and Channel Attention layer functions

In [76]:
# Define custom layers for time and channel attention
class TimeAttentionLayer(tf.keras.layers.Layer):
    def __init__(self, hidden_size):
        super(TimeAttentionLayer, self).__init__()
        self.W_h = tf.keras.layers.Dense(hidden_size)
        self.W_t = tf.keras.layers.Dense(hidden_size)
        self.v = tf.keras.layers.Dense(1)

    def call(self, inputs):
        h_t = inputs
        u_t = tf.nn.tanh(self.W_h(h_t) + tf.expand_dims(self.W_t(h_t[:, -1, :]), axis=1))
        a_t = tf.nn.softmax(self.v(u_t), axis=1)
        S_t = tf.reduce_sum(a_t * h_t, axis=1)
        return S_t

    def get_config(self):
        config = super(TimeAttentionLayer, self).get_config()
        config.update({"hidden_size": self.hidden_size})
        return config

class ChannelAttentionLayer(tf.keras.layers.Layer):
    def __init__(self, num_channels):
        super(ChannelAttentionLayer, self).__init__()
        self.W_v = tf.keras.layers.Dense(num_channels)
        self.W_m = tf.keras.layers.Dense(num_channels)
        self.W_n = tf.keras.layers.Dense(num_channels)
        self.W_a = tf.keras.layers.Dense(num_channels, activation='sigmoid')

    def call(self, inputs):
        H = tf.expand_dims(inputs, axis=1)  # Add an extra dimension
        v_j = GlobalAveragePooling1D()(H)
        m_j = GlobalMaxPooling1D()(H)
        n_j = tf.reduce_sum(H, axis=1)
        concatenated = tf.concat([v_j, m_j, n_j], axis=1)
        r = tf.nn.relu(self.W_v(concatenated) + self.W_m(concatenated) + self.W_n(concatenated))
        attention_weights = self.W_a(r)
        attention_weights = tf.expand_dims(attention_weights, axis=1)
        attention_weights = tf.tile(attention_weights, [1, 1, tf.shape(inputs)[1]])
        attention_weights = tf.transpose(attention_weights, perm=[0, 2, 1])
        attention_output = attention_weights * H
        return tf.reduce_sum(attention_output, axis=1)

    def get_config(self):
        config = super(ChannelAttentionLayer, self).get_config()
        config.update({"num_channels": self.num_channels})
        return config

## MCA BiLSTM model function

In [77]:
# Define input shapes
stator_input_shape = (100, 6)  # (sequence_length, number of stator features)
rotor_input_shape = (100, 1)  # (sequence_length, number of rotor features)

# Define the model
def build_mca_bilstm_model(hidden_size, dropout_rate, dense_size):
    # Stator current input
    input_stator = Input(shape=stator_input_shape, name='stator_input')
    lstm_stator = Bidirectional(LSTM(hidden_size, return_sequences=True))(input_stator)
    time_attention_stator = TimeAttentionLayer(hidden_size)(lstm_stator)
    
    print("Shape after time attention (stator):", time_attention_stator.shape)

    # Rotor vibration input
    input_rotor = Input(shape=rotor_input_shape, name='rotor_input')
    lstm_rotor = Bidirectional(LSTM(hidden_size, return_sequences=True))(input_rotor)
    time_attention_rotor = TimeAttentionLayer(hidden_size)(lstm_rotor)
    
    print("Shape after time attention (rotor):", time_attention_rotor.shape)

    # Concatenate outputs from time attention layers
    concatenated = Concatenate()([time_attention_stator, time_attention_rotor])
    print("Shape after concatenation:", concatenated.shape)

    # Channel attention layer
    channel_attention_layer = ChannelAttentionLayer(hidden_size * 2)(concatenated)
    print("Shape after channel attention:", channel_attention_layer.shape)

    # Dense layers with Dropout
    dense1 = Dense(dense_size, activation='relu')(channel_attention_layer)
    dropout1 = Dropout(dropout_rate)(dense1)
    output = Dense(1, name='RUL')(dropout1)

    model = Model(inputs=[input_stator, input_rotor], outputs=output)
    model.compile(optimizer='adam', loss='mse')
    return model

## Training the model

In [40]:
# Hyperparameters
hidden_size = 64
dropout_rate = 0.3
dense_size = 128

# Build and compile the model
model = build_mca_bilstm_model(hidden_size, dropout_rate, dense_size)

# Print the model summary
model.summary()

# Train the model
model.fit(train_generator, epochs=5, validation_data=validation_generator)

Shape after time attention (stator): (None, 128)
Shape after time attention (rotor): (None, 128)
Shape after concatenation: (None, 256)
Shape after channel attention: (None, 256)
Model: "model_1"
__________________________________________________________________________________________________
 Layer (type)                   Output Shape         Param #     Connected to                     
 stator_input (InputLayer)      [(None, 100, 6)]     0           []                               
                                                                                                  
 rotor_input (InputLayer)       [(None, 100, 1)]     0           []                               
                                                                                                  
 bidirectional_18 (Bidirectiona  (None, 100, 128)    36352       ['stator_input[0][0]']           
 l)                                                                                               
            

<keras.callbacks.History at 0x20860958be0>

In [None]:
predictions = model.predict([val_current_seq, val_vibration_seq])
mse = mean_squared_error(val_rul, predictions)
print('Mean squared error:', mse)

## Save the model

In [78]:
# Save the model
model.save('mca_bilstm_model_minmax.h5')

# Save the scalers
import joblib
joblib.dump(scaler_current, 'scaler_current_minmax.pkl')
joblib.dump(scaler_vibration, 'scaler_vibration_minmax.pkl')


NotImplementedError: 
Layer TimeAttentionLayer has arguments ['hidden_size']
in `__init__` and therefore must override `get_config()`.

Example:

class CustomLayer(keras.layers.Layer):
    def __init__(self, arg1, arg2):
        super().__init__()
        self.arg1 = arg1
        self.arg2 = arg2

    def get_config(self):
        config = super().get_config()
        config.update({
            "arg1": self.arg1,
            "arg2": self.arg2,
        })
        return config