In [12]:
import numpy as np
import pandas as pd
import tensorflow as tf
from tensorflow.keras import layers, Model

In [13]:
import warnings
warnings.filterwarnings("ignore", category=UserWarning, module="openpyxl")


#########################Load the experiment result file#########################################
# directory of the experiment result files
outdir = '../../EPFLAerosenseData/EPFLAerosenseData/'
#csv of experiments
experiment_result = "_DOE_Aerosense_Dynamic_Experiments_EPFL.xlsx"

attack0 = pd.read_excel(outdir+experiment_result, sheet_name='0_deg_angle_attack')
attack8 = pd.read_excel(outdir+experiment_result, sheet_name='8_deg_angle_attack')

relevant_attribute0 = ["Experiment Number", "Zeroing", "Heaving frequency in [Hz],  from motor excitations", "Wind speed [m/s]", 
                        "Crack length [mm]",  "Concentrated mass [yes = 1, no = 0]" ]

relevant_attribute8 = ["Experiment Number","Zeroing", "Heaving frequency in [Hz],  from motor excitations", "Wind speed [m/s]", 
                        "Crack length [mm]",  "Concentrated mass"  ]

data_attack0 = attack0[relevant_attribute0]
data_attack8 = attack8[relevant_attribute8]


#######################  Load the time series data ###################################################

dict_attack0 = data_attack0.to_dict(orient = "records")

for i in dict_attack0:
    exp_num = i["Experiment Number"]    
    
    filename_pre = "aoa_0deg_Exp_"
    if exp_num < 10:
        filename_num = "00" + str(exp_num)
    elif exp_num < 100:
        filename_num = "0" + str(exp_num)
    else:
        filename_num = str(exp_num)        
    filename_sub = "_aerosense"
    complete_name = filename_pre + filename_num + filename_sub
    complete_path = outdir +"aerosense_aerodynamic_data/" +  "aoa_0deg/" +complete_name + "/" + "1_baros_p.csv" 
    
    csv_data = pd.read_csv(complete_path,header=None,skiprows=2)
    i["csv_data"] = csv_data.iloc[:-1,1:-1] #first column of time is not useful, last row maybe incomplete, las column is nan, drop them
    i["csv_data"] = i["csv_data"].drop(columns=[23,37])    


#######################  Dont use the zeroing experiments  ###################################################

dic_attack0_filtered  = []
dic_attack0_filtered_group_by = {}
for i in dict_attack0:
    if i["Zeroing"] != "zeroing" and i["Wind speed [m/s]"]!= 0: # only the expriments with heaving and wind
        exp_num = i["Experiment Number"]
        #group by every 3 experiments
        #every first 2 used for training and the 3rd for testing
        if exp_num-1 not in dic_attack0_filtered_group_by:
            dic_attack0_filtered_group_by[exp_num] = (exp_num, "training")
        elif exp_num-1 in dic_attack0_filtered_group_by and exp_num-2  not in dic_attack0_filtered_group_by:
            dic_attack0_filtered_group_by[exp_num] = (exp_num - 1,"training")
        elif exp_num-1 in dic_attack0_filtered_group_by and exp_num-2  in dic_attack0_filtered_group_by:
            dic_attack0_filtered_group_by[exp_num] = (exp_num -2, "testing")



############################# Signal Windowing #############################################
signal_windowing = []
for i_key, i_value in dic_attack0_filtered_group_by.items():
    exp_ind = i_key - 1 #to get the experiment  number
    exp_i = dict_attack0[exp_ind] # get the whole dictionary of the corresponding experiment
    df_csv_data = exp_i["csv_data"]
    df_csv_data = df_csv_data.iloc[2000:] # Drop the first 2000 rows    
    num_rows_per_block = 2000# Calculate the number of rows in each of the 6 blocks
    num_blocks = 6  #len(df) // num_rows_per_block
    # Split the DataFrame into 6 blocks of 2000 rows each
    total_number_rows = num_rows_per_block * num_blocks
    start_block = [ i  for i in range(0,10001,1000)] #starting position of each block: [0, 1000, 2000, 3000, 4000, 5000, 6000, 7000, 8000, 9000, 10000]
    end_block = [i + 2000 for i in start_block]
    start_end_ind = [ (i,j) for i,j in zip(start_block, end_block)]
    blocks = [df_csv_data.iloc[ind[0] : ind[1] ] for ind in start_end_ind]
    for block_ind, block in enumerate(blocks):
        window = {key: value for key, value in exp_i.items() if key != "csv_data"}         
        window["block_ind"] = block_ind
        window["exp_group"] = i_value[0]
        window["training_or_testing"] = i_value[1]
        window["block"] = block        
        signal_windowing.append(window)

windowing_list = []
for i in signal_windowing:
    windowing_list.append({ i_key: i_value for i_key, i_value in i.items() if i_key != "block"})
windowing_df = pd.DataFrame(windowing_list)

time_series_dict = {}
n_total = len(signal_windowing)
for i in range(n_total):
    time_series_dict[i] = signal_windowing[i]["block"].to_dict(orient="series")
time_series_df = pd.DataFrame(time_series_dict).T

#Concentrated mass changed to class 5
for i in range(windowing_df.shape[0]):
    if windowing_df.iloc[i,5] == 1:
        windowing_df.iloc[i,4] = 25


In [14]:
processed_data = []
for index, row in time_series_df.iterrows():
    experiment_data = []
    # Iterate over each cell in the row
    for cell in row:
        # Convert the Pandas Series in each cell to a NumPy array and append to the experiment data
        experiment_data.append(cell.to_numpy())
    # Stack the sensor data for each experiment and append to the processed data
    processed_data.append(np.stack(experiment_data, axis=0))
data = np.array(processed_data)

#normalize the data
data = (data - data.min()) / (data.max() - data.min())

In [15]:
data.shape

(792, 38, 2000)

In [27]:
import tensorflow as tf
from tensorflow.keras import layers, Model

# Encoder with LSTM
latent_dim = 128  # Size of the latent vector

encoder_inputs = layers.Input(shape=(38, 2000))
x = layers.LSTM(256, return_sequences=True)(encoder_inputs)
x = layers.LSTM(128)(x)
latent_vector = layers.Dense(latent_dim, activation='relu')(x)
encoder = Model(encoder_inputs, latent_vector, name="encoder")

# Decoder with LSTM
decoder_inputs = layers.Input(shape=(latent_dim,))
x = layers.RepeatVector(38)(decoder_inputs)  # Adjusting the input shape for LSTM
x = layers.LSTM(128, return_sequences=True)(x)
x = layers.LSTM(256, return_sequences=True)(x)
decoder_outputs = layers.TimeDistributed(layers.Dense(2000, activation="sigmoid"))(x)
decoder = Model(decoder_inputs, decoder_outputs, name="decoder")

# Autoencoder Model
class Autoencoder(Model):
    def __init__(self, encoder, decoder, **kwargs):
        super(Autoencoder, self).__init__(**kwargs)
        self.encoder = encoder
        self.decoder = decoder

    def call(self, inputs):
        encoded = self.encoder(inputs)
        decoded = self.decoder(encoded)
        return decoded

autoencoder = Autoencoder(encoder, decoder)

# Example of compiling the model
autoencoder.compile(optimizer='adam', loss='mean_squared_error')


Model: "sequential_4"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 lstm_20 (LSTM)              (None, 38, 64)            528640    
                                                                 
 lstm_21 (LSTM)              (None, 64)                33024     
                                                                 
 dense_14 (Dense)            (None, 128)               8320      
                                                                 
Total params: 569984 (2.17 MB)
Trainable params: 569984 (2.17 MB)
Non-trainable params: 0 (0.00 Byte)
_________________________________________________________________


In [28]:
# Assume `x_train` is your training data
autoencoder.fit(data, data, epochs=10, batch_size=32, verbose = 1)
# To get the latent features for some data
latent_features = encoder.predict(data)




Epoch 1/10


ValueError: in user code:

    File "c:\Users\zheng\anaconda3\Lib\site-packages\keras\src\engine\training.py", line 1377, in train_function  *
        return step_function(self, iterator)
    File "c:\Users\zheng\anaconda3\Lib\site-packages\keras\src\engine\training.py", line 1360, in step_function  **
        outputs = model.distribute_strategy.run(run_step, args=(data,))
    File "c:\Users\zheng\anaconda3\Lib\site-packages\keras\src\engine\training.py", line 1349, in run_step  **
        outputs = model.train_step(data)
    File "c:\Users\zheng\anaconda3\Lib\site-packages\keras\src\engine\training.py", line 1127, in train_step
        loss = self.compute_loss(x, y, y_pred, sample_weight)
    File "c:\Users\zheng\anaconda3\Lib\site-packages\keras\src\engine\training.py", line 1185, in compute_loss
        return self.compiled_loss(
    File "c:\Users\zheng\anaconda3\Lib\site-packages\keras\src\engine\compile_utils.py", line 277, in __call__
        loss_value = loss_obj(y_t, y_p, sample_weight=sw)
    File "c:\Users\zheng\anaconda3\Lib\site-packages\keras\src\losses.py", line 143, in __call__
        losses = call_fn(y_true, y_pred)
    File "c:\Users\zheng\anaconda3\Lib\site-packages\keras\src\losses.py", line 270, in call  **
        return ag_fn(y_true, y_pred, **self._fn_kwargs)
    File "c:\Users\zheng\anaconda3\Lib\site-packages\keras\src\losses.py", line 1706, in mean_squared_error
        return backend.mean(tf.math.squared_difference(y_pred, y_true), axis=-1)

    ValueError: Dimensions must be equal, but are 128 and 2000 for '{{node mean_squared_error/SquaredDifference}} = SquaredDifference[T=DT_FLOAT](sequential_4/dense_14/BiasAdd, IteratorGetNext:1)' with input shapes: [?,128], [?,38,2000].


In [8]:
encoder_model = Model(encoder_inputs, encoded)
latent_features = encoder_model.predict(data)




In [21]:
latent_features.shape

(792, 2000, 512)