In [1]:
import numpy as np
import pandas as pd
import tensorflow as tf
from tensorflow.keras import layers, Model


In [2]:
import warnings
warnings.filterwarnings("ignore", category=UserWarning, module="openpyxl")


#########################Load the experiment result file#########################################
# directory of the experiment result files
outdir = '../../EPFLAerosenseData/EPFLAerosenseData/'
#csv of experiments
experiment_result = "_DOE_Aerosense_Dynamic_Experiments_EPFL.xlsx"

attack0 = pd.read_excel(outdir+experiment_result, sheet_name='0_deg_angle_attack')
attack8 = pd.read_excel(outdir+experiment_result, sheet_name='8_deg_angle_attack')

relevant_attribute0 = ["Experiment Number", "Zeroing", "Heaving frequency in [Hz],  from motor excitations", "Wind speed [m/s]", 
                        "Crack length [mm]",  "Concentrated mass [yes = 1, no = 0]" ]

relevant_attribute8 = ["Experiment Number","Zeroing", "Heaving frequency in [Hz],  from motor excitations", "Wind speed [m/s]", 
                        "Crack length [mm]",  "Concentrated mass"  ]

data_attack0 = attack0[relevant_attribute0]
data_attack8 = attack8[relevant_attribute8]


#######################  Load the time series data ###################################################

dict_attack0 = data_attack0.to_dict(orient = "records")

for i in dict_attack0:
    exp_num = i["Experiment Number"]    
    
    filename_pre = "aoa_0deg_Exp_"
    if exp_num < 10:
        filename_num = "00" + str(exp_num)
    elif exp_num < 100:
        filename_num = "0" + str(exp_num)
    else:
        filename_num = str(exp_num)        
    filename_sub = "_aerosense"
    complete_name = filename_pre + filename_num + filename_sub
    complete_path = outdir +"aerosense_aerodynamic_data/" +  "aoa_0deg/" +complete_name + "/" + "1_baros_p.csv" 
    
    csv_data = pd.read_csv(complete_path,header=None,skiprows=2)
    i["csv_data"] = csv_data.iloc[:-1,1:-1] #first column of time is not useful, last row maybe incomplete, las column is nan, drop them
    i["csv_data"] = i["csv_data"].drop(columns=[23,37])    


#######################  Dont use the zeroing experiments  ###################################################

dic_attack0_filtered  = []
dic_attack0_filtered_group_by = {}
for i in dict_attack0:
    if i["Zeroing"] != "zeroing" and i["Wind speed [m/s]"]!= 0: # only the expriments with heaving and wind
        exp_num = i["Experiment Number"]
        #group by every 3 experiments
        #every first 2 used for training and the 3rd for testing
        if exp_num-1 not in dic_attack0_filtered_group_by:
            dic_attack0_filtered_group_by[exp_num] = (exp_num, "training")
        elif exp_num-1 in dic_attack0_filtered_group_by and exp_num-2  not in dic_attack0_filtered_group_by:
            dic_attack0_filtered_group_by[exp_num] = (exp_num - 1,"training")
        elif exp_num-1 in dic_attack0_filtered_group_by and exp_num-2  in dic_attack0_filtered_group_by:
            dic_attack0_filtered_group_by[exp_num] = (exp_num -2, "testing")



############################# Signal Windowing #############################################
signal_windowing = []
for i_key, i_value in dic_attack0_filtered_group_by.items():
    exp_ind = i_key - 1 #to get the experiment  number
    exp_i = dict_attack0[exp_ind] # get the whole dictionary of the corresponding experiment
    df_csv_data = exp_i["csv_data"]
    df_csv_data = df_csv_data.iloc[2000:] # Drop the first 2000 rows    
    num_rows_per_block = 2000# Calculate the number of rows in each of the 6 blocks
    num_blocks = 6  #len(df) // num_rows_per_block
    # Split the DataFrame into 6 blocks of 2000 rows each
    total_number_rows = num_rows_per_block * num_blocks
    start_block = [ i  for i in range(0,10001,1000)] #starting position of each block: [0, 1000, 2000, 3000, 4000, 5000, 6000, 7000, 8000, 9000, 10000]
    end_block = [i + 2000 for i in start_block]
    start_end_ind = [ (i,j) for i,j in zip(start_block, end_block)]
    blocks = [df_csv_data.iloc[ind[0] : ind[1] ] for ind in start_end_ind]
    for block_ind, block in enumerate(blocks):
        window = {key: value for key, value in exp_i.items() if key != "csv_data"}         
        window["block_ind"] = block_ind
        window["exp_group"] = i_value[0]
        window["training_or_testing"] = i_value[1]
        window["block"] = block        
        signal_windowing.append(window)

windowing_list = []
for i in signal_windowing:
    windowing_list.append({ i_key: i_value for i_key, i_value in i.items() if i_key != "block"})
windowing_df = pd.DataFrame(windowing_list)

time_series_dict = {}
n_total = len(signal_windowing)
for i in range(n_total):
    time_series_dict[i] = signal_windowing[i]["block"].to_dict(orient="series")
time_series_df = pd.DataFrame(time_series_dict).T

#Concentrated mass changed to class 5
for i in range(windowing_df.shape[0]):
    if windowing_df.iloc[i,5] == 1:
        windowing_df.iloc[i,4] = 25


In [3]:
processed_data = []
for index, row in time_series_df.iterrows():
    experiment_data = []
    # Iterate over each cell in the row
    for cell in row:
        # Convert the Pandas Series in each cell to a NumPy array and append to the experiment data
        experiment_data.append(cell.to_numpy())
    # Stack the sensor data for each experiment and append to the processed data
    processed_data.append(np.stack(experiment_data, axis=0))
data = np.array(processed_data)

#normalize the data
data = (data - data.min()) / (data.max() - data.min())

In [4]:
import tensorflow as tf
from tensorflow.keras import layers, Model

class Sampling(layers.Layer):
    def call(self, inputs):
        z_mean, z_log_var = inputs
        batch = tf.shape(z_mean)[0]
        dim = tf.shape(z_mean)[1]
        epsilon = tf.keras.backend.random_normal(shape=(batch, dim))
        return z_mean + tf.exp(0.5 * z_log_var) * epsilon

# Encoder
latent_dim = 512

encoder_inputs = layers.Input(shape=(38, 2000))
x = layers.Flatten()(encoder_inputs)
x = layers.Dense(1024, activation="relu")(x)
x = layers.Dense(512, activation="relu")(x)  # Simplified layer
z_mean = layers.Dense(latent_dim, name="z_mean")(x)
z_log_var = layers.Dense(latent_dim, name="z_log_var")(x)
z = Sampling()([z_mean, z_log_var])
encoder = Model(encoder_inputs, [z_mean, z_log_var, z], name="encoder")

# Decoder
latent_inputs = layers.Input(shape=(latent_dim,))
x = layers.Dense(512, activation="relu")(latent_inputs)  # Simplified layer
x = layers.Dense(1024, activation="relu")(x)
x = layers.Dense(38 * 2000, activation="sigmoid")(x)
decoder_outputs = layers.Reshape((38, 2000))(x)
decoder = Model(latent_inputs, decoder_outputs, name="decoder")

# VAE Model
class VAE(Model):
    def __init__(self, encoder, decoder, **kwargs):
        super(VAE, self).__init__(**kwargs)
        self.encoder = encoder
        self.decoder = decoder

    def train_step(self, data):
        with tf.GradientTape() as tape:
            z_mean, z_log_var, z = self.encoder(data)
            reconstruction = self.decoder(z)
            reconstruction_loss = tf.reduce_mean(
                tf.keras.losses.mean_squared_error(data, reconstruction)
            )
            kl_loss = -0.5 * (1 + z_log_var - tf.square(z_mean) - tf.exp(z_log_var))
            kl_loss = tf.reduce_mean(kl_loss)
            total_loss = reconstruction_loss + kl_loss
        grads = tape.gradient(total_loss, self.trainable_weights)
        self.optimizer.apply_gradients(zip(grads, self.trainable_weights))
        return {
            "loss": total_loss,
            "reconstruction_loss": reconstruction_loss,
            "kl_loss": kl_loss,
        }

vae = VAE(encoder, decoder)


In [5]:
# Adjust the learning rate
opt = tf.keras.optimizers.Adam(learning_rate=0.0005)
vae.compile(optimizer=opt)
vae.fit(data, epochs=20, batch_size=32)  # You can adjust epochs and batch size as needed


Epoch 1/20
Epoch 2/20
Epoch 3/20
Epoch 4/20
Epoch 5/20
Epoch 6/20
Epoch 7/20
Epoch 8/20
Epoch 9/20
Epoch 10/20
Epoch 11/20
Epoch 12/20
Epoch 13/20
Epoch 14/20
Epoch 15/20
Epoch 16/20
Epoch 17/20
Epoch 18/20
Epoch 19/20
Epoch 20/20


<keras.src.callbacks.History at 0x2973581fc10>

In [6]:
latent_features = encoder.predict(data)[0]  # Extracts the mean (z_mean) as the feature representation




In [7]:
latent_features.shape

(792, 512)

In [8]:
X_new = pd.DataFrame(latent_features)
X_plus = windowing_df[["Heaving frequency in [Hz],  from motor excitations","Wind speed [m/s]" ]]
X = pd.concat([X_plus,X_new],axis=1)
y = windowing_df["Crack length [mm]"]/5

In [9]:
X_train = X[windowing_df["training_or_testing"] == "training"]
X_test = X[windowing_df["training_or_testing"] == "testing"]
y_train = y[windowing_df["training_or_testing"] == "training"]
y_test = y[windowing_df["training_or_testing"] == "testing"]

In [10]:
X_train.to_csv("../data/VAEExtractorOverlapping/X_train.csv",index=False)
X_test.to_csv("../data/VAEExtractorOverlapping/X_test.csv",index=False)
y_train.to_csv("../data/VAEExtractorOverlapping/y_train.csv",index=False)
y_test.to_csv("../data/VAEExtractorOverlapping/y_test.csv",index=False)