In [6]:
import numpy as np
import pandas as pd


from sktime.transformations.panel.rocket import (
    MiniRocket,
    MiniRocketMultivariate,
    MiniRocketMultivariateVariable,
)
from sklearn.feature_selection import SelectKBest, mutual_info_classif, f_classif, chi2


In [7]:
import warnings
warnings.filterwarnings("ignore", category=UserWarning, module="openpyxl")


#########################Load the experiment result file#########################################
# directory of the experiment result files
outdir = '../../EPFLAerosenseData/EPFLAerosenseData/'
#csv of experiments
experiment_result = "_DOE_Aerosense_Dynamic_Experiments_EPFL.xlsx"

attack0 = pd.read_excel(outdir+experiment_result, sheet_name='0_deg_angle_attack')
attack8 = pd.read_excel(outdir+experiment_result, sheet_name='8_deg_angle_attack')

relevant_attribute0 = ["Experiment Number", "Zeroing", "Heaving frequency in [Hz],  from motor excitations", "Wind speed [m/s]", 
                        "Crack length [mm]",  "Concentrated mass [yes = 1, no = 0]" ]

relevant_attribute8 = ["Experiment Number","Zeroing", "Heaving frequency in [Hz],  from motor excitations", "Wind speed [m/s]", 
                        "Crack length [mm]",  "Concentrated mass"  ]

data_attack0 = attack0[relevant_attribute0]
data_attack8 = attack8[relevant_attribute8]


#######################  Load the time series data ###################################################

dict_attack0 = data_attack0.to_dict(orient = "records")

for i in dict_attack0:
    exp_num = i["Experiment Number"]    
    
    filename_pre = "aoa_0deg_Exp_"
    if exp_num < 10:
        filename_num = "00" + str(exp_num)
    elif exp_num < 100:
        filename_num = "0" + str(exp_num)
    else:
        filename_num = str(exp_num)        
    filename_sub = "_aerosense"
    complete_name = filename_pre + filename_num + filename_sub
    complete_path = outdir +"aerosense_aerodynamic_data/" +  "aoa_0deg/" +complete_name + "/" + "1_baros_p.csv" 
    
    csv_data = pd.read_csv(complete_path,header=None,skiprows=2)
    i["csv_data"] = csv_data.iloc[:-1,1:-1] #first column of time is not useful, last row maybe incomplete, las column is nan, drop them
    i["csv_data"] = i["csv_data"].drop(columns=[23,37])    


#######################  Dont use the zeroing experiments  ###################################################

dic_attack0_filtered  = []
dic_attack0_filtered_group_by = {}
for i in dict_attack0:
    if i["Zeroing"] != "zeroing" and i["Wind speed [m/s]"]!= 0: # only the expriments with heaving and wind
        exp_num = i["Experiment Number"]
        #group by every 3 experiments
        #every first 2 used for training and the 3rd for testing
        if exp_num-1 not in dic_attack0_filtered_group_by:
            dic_attack0_filtered_group_by[exp_num] = (exp_num, "training")
        elif exp_num-1 in dic_attack0_filtered_group_by and exp_num-2  not in dic_attack0_filtered_group_by:
            dic_attack0_filtered_group_by[exp_num] = (exp_num - 1,"training")
        elif exp_num-1 in dic_attack0_filtered_group_by and exp_num-2  in dic_attack0_filtered_group_by:
            dic_attack0_filtered_group_by[exp_num] = (exp_num -2, "testing")



############################# Signal Windowing #############################################
signal_windowing = []
for i_key, i_value in dic_attack0_filtered_group_by.items():
    exp_ind = i_key - 1 #to get the experiment  number
    exp_i = dict_attack0[exp_ind] # get the whole dictionary of the corresponding experiment
    df_csv_data = exp_i["csv_data"]
    df_csv_data = df_csv_data.iloc[2000:] # Drop the first 2000 rows    
    num_rows_per_block = 2000# Calculate the number of rows in each of the 6 blocks
    num_blocks = 6  #len(df) // num_rows_per_block
    blocks = [df_csv_data.iloc[i * num_rows_per_block : (i + 1) * num_rows_per_block] for i in range(num_blocks)]

    for block_ind, block in enumerate(blocks):
        window = {key: value for key, value in exp_i.items() if key != "csv_data"}         
        window["block_ind"] = block_ind
        window["exp_group"] = i_value[0]
        window["training_or_testing"] = i_value[1]
        window["block"] = block        
        signal_windowing.append(window)

windowing_list = []
for i in signal_windowing:
    windowing_list.append({ i_key: i_value for i_key, i_value in i.items() if i_key != "block"})
windowing_df = pd.DataFrame(windowing_list)

time_series_dict = {}
n_total = len(signal_windowing)
for i in range(n_total):
    time_series_dict[i] = signal_windowing[i]["block"].to_dict(orient="series")
time_series_df = pd.DataFrame(time_series_dict).T

In [8]:
#Concentrated mass changed to class 5
for i in range(windowing_df.shape[0]):
    if windowing_df.iloc[i,5] == 1:
        windowing_df.iloc[i,4] = 25
#pd.set_option('display.min_rows', 400)
#windowing_df

#apply ROCKET
MRMVV = MiniRocketMultivariate(num_kernels=1600, random_state=1234)
features_extracted = MRMVV.fit_transform(time_series_df)

X = features_extracted
y = windowing_df["Crack length [mm]"]

#select the best features outof the many random features
select = SelectKBest(f_classif, k=40)
X_new = select.fit_transform(X,y)
X_new = pd.DataFrame(X_new)

#Also use heaving frequency and wind speed as explanatory variable
X_plus = windowing_df[["Heaving frequency in [Hz],  from motor excitations","Wind speed [m/s]" ]]
X = pd.concat([X_plus,X_new],axis=1)
y = windowing_df["Crack length [mm]"]/5 #divide by 5 to map [0 5 10 15 20 25] to [0 1 2 3 4 5] 

#training and testing split
X_train = X[windowing_df["training_or_testing"] == "training"]
X_test = X[windowing_df["training_or_testing"] == "testing"]
y_train = y[windowing_df["training_or_testing"] == "training"]
y_test = y[windowing_df["training_or_testing"] == "testing"]

 1033 1041 1049 1075 1083 1096 1122 1130 1138 1146 1164 1172 1177 1185
 1203 1211 1219 1227 1235 1243 1245 1253 1266 1274 1282 1284 1290 1292
 1308 1314 1316 1324 1332 1340 1342 1345 1347 1353 1355 1363 1365 1371
 1373 1379 1381 1387 1389 1391 1395 1397 1405 1407 1411 1413 1415 1421
 1423 1428 1436 1438 1442 1444 1450 1452 1454 1458 1460 1468 1470 1476
 1478 1482 1484 1486 1488 1492 1494 1500 1502 1504 1508 1510 1513 1515
 1517 1519 1521 1523 1525 1527 1529 1531 1533 1535 1537 1541 1543 1545
 1547 1549 1555 1557 1559 1561 1563 1565 1571 1573 1575 1577 1579 1581
 1583 1585 1587 1589 1591 1593 1595] are constant.
  f = msb / msw


In [9]:
X_train.to_csv("../data/RocketExtractorNoOverlapping/X_train.csv",index=False)
X_test.to_csv("../data/RocketExtractorNoOverlapping/X_test.csv",index=False)
y_train.to_csv("../data/RocketExtractorNoOverlapping/y_train.csv",index=False)
y_test.to_csv("../data/RocketExtractorNoOverlapping/y_test.csv",index=False)