In [32]:
import numpy as np
import pandas as pd
import tensorflow as tf
from tensorflow.keras.layers import Input, LSTM, Bidirectional, Dense, Concatenate, Multiply, Permute, Lambda, GlobalMaxPooling1D, GlobalAvgPool1D, Reshape, Activation, Dropout, Add
from tensorflow.keras.models import Model
import tensorflow.keras.backend as K
import pydot, graphviz

### Load the data from CSV files

In [33]:
# List all of the CSV file paths
stator_csv_files = [r"D:\MTdataset\DL_dataset\Simulink_data_generation\Final_current_vibration_data\01_SCIM_current_allfault_360V.csv",
r"D:\MTdataset\DL_dataset\Simulink_data_generation\Final_current_vibration_data\02_SCIM_current_PTPab_400V.csv",
r"D:\MTdataset\DL_dataset\Simulink_data_generation\Final_current_vibration_data\03_SCIM_current_PTPab_480V.csv",
r"D:\MTdataset\DL_dataset\Simulink_data_generation\Final_current_vibration_data\04_SCIM_current_PTGa_360V.csv",
r"D:\MTdataset\DL_dataset\Simulink_data_generation\Final_current_vibration_data\05_SCIM_current_PTGb_360V.csv",
r"D:\MTdataset\DL_dataset\Simulink_data_generation\Final_current_vibration_data\06_SCIM_current_PTGc_360V.csv",
r"D:\MTdataset\DL_dataset\Simulink_data_generation\Final_current_vibration_data\07_SCIM_current_PTPbc_400V.csv"]

rotor_csv_files = [r"D:\MTdataset\DL_dataset\Simulink_data_generation\Final_current_vibration_data\01_SCIM_vibration_allfault_360V.csv",
r"D:\MTdataset\DL_dataset\Simulink_data_generation\Final_current_vibration_data\02_SCIM_vibration_PTPab_400V.csv",
r"D:\MTdataset\DL_dataset\Simulink_data_generation\Final_current_vibration_data\03_SCIM_vibration_PTPab_480V.csv",
r"D:\MTdataset\DL_dataset\Simulink_data_generation\Final_current_vibration_data\04_SCIM_vibration_PTGa_360V.csv",
r"D:\MTdataset\DL_dataset\Simulink_data_generation\Final_current_vibration_data\05_SCIM_vibration_PTGb_360V.csv",
r"D:\MTdataset\DL_dataset\Simulink_data_generation\Final_current_vibration_data\06_SCIM_vibration_PTGc_360V.csv",
r"D:\MTdataset\DL_dataset\Simulink_data_generation\Final_current_vibration_data\07_SCIM_vibration_PTPbc_400V.csv"]

# Read the CSV files into pandas dataframe and convert to numpy arrays
stator_data = [pd.read_csv(file).values for file in stator_csv_files]
rotor_data = [pd.read_csv(file).values for file in rotor_csv_files]


### Data generator function

In [34]:
def data_generator(stator_data, rotor_data, sequence_length=100, batch_size=32):
    # get the number of samples
    num_samples = stator_data[0].shape[0] - sequence_length     

    # Infinite loop to continuously yield the batches
    while True:
        # Generate batches
        for start_idx in range(0, num_samples, batch_size):
            stator_batch = []
            rotor_batch = []
            batch_y = []

            for i in range(batch_size):
                end_idx = start_idx + i + sequence_length
                if end_idx >= num_samples:
                    break

            # Create sequences of length 'Sequence length'
            stator_seq = [data[start_idx + i:end_idx, 1:] for data in stator_data] # excluding the time column
            rotor_seq = [data[start_idx + i:end_idx, 1] for data in rotor_data] # excluding the time column & vibration is the only remaining column
            
            # Target is the next time step's 'Time' value in stator data (same as rotor data)
            y = stator_data[0][end_idx, 0] # assuming time is the first column in the stator files

            stator_batch.append(np.concatenate(stator_seq, axis=-1))
            rotor_batch.append(np.concatenate(rotor_seq, axis=-1))
            batch_y.append(y)
        
        if len(stator_batch) == batch_size:
            X_stator = np.array(stator_batch)
            X_rotor = np.array(rotor_batch)
            y = np.array(batch_y)

            #concatenating data along the last axis
            yield [X_stator, X_rotor], y 

In [35]:
# Create the data generator

generator = data_generator(stator_data, rotor_data, sequence_length=100, batch_size=32)

## Create the BiLSTM network with Time and Channel attention

In [36]:
# Define the time attention mechanism 
class TimeAttentionLayer(tf.keras.layers.Layer):
    def __init__(self, **kwargs):
        super(TimeAttentionLayer, self).__init__(**kwargs)
        self.W_omega = None
        self.b_omega = None
        self.u_omega = None

    def build(self, input_shape):
        hidden_size = int(input_shape[-1])
        self.W_omega = self.add_weight(name='W_omega', shape=(hidden_size, hidden_size), initializer='uniform', trainable=True)
        self.b_omega = self.add_weight(name='b_omega', shape=(hidden_size,), initializer='uniform', trainable=True)
        self.u_omega = self.add_weight(name='u_omega', shape=(hidden_size,), initializer='uniform', trainable=True)
        super(TimeAttentionLayer, self).build(input_shape)

    def call(self, inputs):
        u_t = tf.nn.tanh(tf.tensordot(inputs, self.W_omega, axes=1) + self.b_omega)
        alpha_t = tf.nn.softmax(tf.reduce_sum(u_t * self.u_omega, axis=2, keepdims=True), axis=1)
        context_vector = tf.reduce_sum(alpha_t * inputs, axis=1)
        return context_vector

class ChannelAttentionLayer(tf.keras.layers.Layer):
    def __init__(self, **kwargs):
        super(ChannelAttentionLayer, self).__init__(**kwargs)

    def build(self, input_shape):
        self.input_shape = input_shape
        self.dense1 = Dense(input_shape[-1] // 8, activation='relu')
        self.dense2 = Dense(input_shape[-1], activation='linear')
        self.dense3 = Dense(input_shape[-1], activation='linear')
        self.activation = Activation('hard_sigmoid')
        super(ChannelAttentionLayer, self).build(input_shape)

    def call(self, inputs):
        # GMP and GAP for descriptors m and v
        gmp = GlobalMaxPooling1D()(inputs)
        gap = GlobalAvgPool1D()(inputs)
        
        # Fully Connected Layer for descriptor n
        fcl = self.dense3(inputs)
        
        # Descriptors
        m = self.dense1(gmp)
        v = self.dense1(gap)
        n = self.dense1(fcl)
        
        # Multilayer Perceptrons
        W12 = self.dense2(v)
        W22 = self.dense2(m)
        
        # Combine using element-wise summation and add n
        combined = Add()([W12, W22, n])
        
        # Activation function to get attention weights
        attention_weights = self.activation(combined)
        attention_weights = Reshape((1, self.input_shape[-1]))(attention_weights)
        
        # Apply attention weights
        channel_attention = Multiply()([inputs, attention_weights])
        
        return channel_attention


In [37]:
# Define the model with modified channel attention mechanism and dropout
def create_model(stator_input_shape, rotor_input_shape):
    # Stator current input
    input_stator = Input(shape=stator_input_shape, name='stator_input')
    lstm_stator = Bidirectional(LSTM(64, return_sequences=True))(input_stator)
    time_attention_stator = TimeAttentionLayer()(lstm_stator)
    channel_attention_stator = ChannelAttentionLayer()(tf.expand_dims(time_attention_stator, 1))

    # Rotor vibration input
    input_rotor = Input(shape=rotor_input_shape, name='rotor_input')
    lstm_rotor = Bidirectional(LSTM(64, return_sequences=True))(input_rotor)
    time_attention_rotor = TimeAttentionLayer()(lstm_rotor)
    channel_attention_rotor = ChannelAttentionLayer()(tf.expand_dims(time_attention_rotor, 1))

    # Concatenate and Dense layers with Dropout
    concatenated = Concatenate()([channel_attention_stator, channel_attention_rotor])
    dense1 = Dense(64, activation='relu')(concatenated)
    dropout1 = Dropout(0.5)(dense1)
    output = Dense(1)(dropout1)

    model = Model(inputs=[input_stator, input_rotor], outputs=output)
    model.compile(optimizer='adam', loss='mse')
    return model

# input shape for stator and rotor data
stator_input_shape = (100, 6) # (sequence_length, number of stator features)
rotor_input_shape = (100, 1) # (sequence_length, number of rotor features)

model = create_model(stator_input_shape, rotor_input_shape)
model.summary

# Fit the model using the data generator
# model.fit(generator, steps_per_epoch=(stator_data[0].shape[0] - 100) // 32, epochs=10)

ValueError: A KerasTensor cannot be used as input to a TensorFlow function. A KerasTensor is a symbolic placeholder for a shape and dtype, used when constructing Keras Functional models or Keras Functions. You can only use it as input to a Keras layer or a Keras operation (from the namespaces `keras.layers` and `keras.operations`). You are likely doing something like:

```
x = Input(...)
...
tf_fn(x)  # Invalid.
```

What you should do instead is wrap `tf_fn` in a layer:

```
class MyLayer(Layer):
    def call(self, x):
        return tf_fn(x)

x = MyLayer()(x)
```


In [38]:
# Visualization Functions
def visualize_bilstm(input_shape):
    input_layer = Input(shape=input_shape)
    bilstm_layer = Bidirectional(LSTM(64, return_sequences=True))(input_layer)
    model = Model(inputs=input_layer, outputs=bilstm_layer)
    return model

def visualize_time_attention(input_shape):
    input_layer = Input(shape=input_shape)
    attention_output = TimeAttentionLayer()(input_layer)
    model = Model(inputs=input_layer, outputs=attention_output)
    return model

def visualize_channel_attention(input_shape):
    input_layer = Input(shape=input_shape)
    attention_output = ChannelAttentionLayer()(input_layer)
    model = Model(inputs=input_layer, outputs=attention_output)
    return model


bilstm_model = visualize_bilstm((100, 6))  # Adjust input_shape accordingly
tf.keras.utils.plot_model(bilstm_model, to_file='bilstm_model.png', show_shapes=True)

time_attention_model = visualize_time_attention((100, 128))  # Adjust input_shape accordingly
tf.keras.utils.plot_model(time_attention_model, to_file='time_attention_model.png', show_shapes=True)

channel_attention_model = visualize_channel_attention((100, 128))  # Adjust input_shape accordingly
tf.keras.utils.plot_model(channel_attention_model, to_file='channel_attention_model.png', show_shapes=True)

# Visualize the entire model
tf.keras.utils.plot_model(model, to_file='full_model.png', show_shapes=True)

You must install pydot (`pip install pydot`) for `plot_model` to work.
You must install pydot (`pip install pydot`) for `plot_model` to work.


RuntimeError: Exception encountered when calling ChannelAttentionLayer.call().

[1mCould not automatically infer the output shape / dtype of 'channel_attention_layer_3' (of type ChannelAttentionLayer). Either the `ChannelAttentionLayer.call()` method is incorrect, or you need to implement the `ChannelAttentionLayer.compute_output_spec() / compute_output_shape()` method. Error encountered:

Inputs have incompatible shapes. Received shapes (100, 16) and (128,)[0m

Arguments received by ChannelAttentionLayer.call():
  • args=('<KerasTensor shape=(None, 100, 128), dtype=float32, sparse=None, name=keras_tensor_45>',)
  • kwargs=<class 'inspect._empty'>