In [None]:
import numpy as np
import pandas as pd
from pickle import load
from tqdm import tqdm
from scipy.stats import spearmanr

from sklearn.preprocessing import MinMaxScaler

#models will be built using Keras
from tensorflow import keras
import tensorflow as tf

import plotly.express as px
import plotly.io as pio
pio.templates.default="simple_white"

from progtools.preprocessing import RawFlightData
from progtools.modelling import gpu_check
from progtools.scoring import result_stats

In [None]:
#check that TensorFlow is able to detect the GPU
gpu_check()

In [None]:
#master dataframes for dev data
#read the raw flight data in
filename = "data/N-CMAPSS_DS05.h5"
raw_data_read = RawFlightData(filename=filename)

#create the development dataframes
df_W_dev = raw_data_read.dev_flight_data()
df_X_s_dev = raw_data_read.dev_sensor_data()
df_A_dev = raw_data_read.dev_aux_data()
df_Y_dev = raw_data_read.dev_RUL_data()

In [None]:
#load the scaler used in model training
scaler_W = load(open("sensor_models/scaler_W.pkl","rb"))
development_W = scaler_W.transform(np.asarray(df_W_dev))

# Create Predictions

In [None]:
#create a dataframe to store results
df_pred = pd.DataFrame(index=df_W_dev.index)

#loop through each sensor
for s in tqdm(df_X_s_dev.columns):
    #load the specific model
    X_s_model = keras.models.load_model(f"sensor_models/{s}_model.h5")
    
    #create predictions on the dev set
    X_s_pred = X_s_model.predict(development_W)
    
        #wrangle into a dataframe
    df_X_s_deg = pd.DataFrame(data=X_s_pred,
                             columns=[s],
                             index=df_W_dev.index)
    
    #append the results
    df_pred=df_pred.join(df_X_s_deg)
    
    #print the progress
    print(f"Sensor: {s} complete")

In [None]:
#convert to numpy array for further calcs
X_s_pred = np.asarray(df_pred)
X_s_dev = np.asarray(df_X_s_dev)

#calculate percentage delta between predicitons and measurements
X_s_deg=((X_s_dev-X_s_pred)/X_s_dev)*100

#wrangle back into dataframe
df_X_s_deg = pd.DataFrame(data=X_s_deg,
                         columns=df_X_s_dev.columns,
                         index=df_W_dev.index)

In [None]:
#join auxiliary flight data
df_X_s_deg = df_X_s_deg.join(df_A_dev[["unit","cycle","hs"]])

In [None]:
#group by to determine mean for each cycle and unit
df_X_s_deg = df_X_s_deg.groupby(["unit","cycle"],as_index=False).agg("mean")

# Examine results

In [None]:
#create individual plots for each sensor for predicted to measured mean delta

#loop through each sensor
for i in df_X_s_deg.drop(columns=["unit","cycle","hs"]).columns:
    
    #create a plotly line plot of cycle vs. mean percentage split out by unit
    fig1=px.line(df_X_s_deg,
                 x="cycle",
                 y=i,
                 color="unit",
                title=f"Predicted to Measured Mean Delta by Cycle for sensor: {i}",
                #width=540,
                #height=480
                )
    
    #adjust x axis title
    fig1.update_xaxes(title="Cycle [-]")
    
    #adjust y axis title and limits
    fig1.update_yaxes(title="Mean Delta [%]",
                     range=[-2.5,2.5])
    fig1.show()

# Mann-Whitney U Test

In [None]:
#complete Mann-Whitney U test for first and last 10 cycles of each unit and sensor
mwu_results = result_stats(df_X_s_deg, cycles = 10)

In [None]:
#create boxplot of p-values for Mann-Whitney U test versus sensor
fig1 = px.box(mwu_results,
             y="p_value",
             x="sensor",
             title="Boxplot of Spearman Correlation P-Values")

#update x axis label
fig1.update_xaxes(title="Physical Sensor")

#update y axis label and limits
fig1.update_yaxes(title="P-Value [-]",
            range=[0,1])

#includ a horizontal line at 0.05
fig1.add_hline(y=0.05,line_color="black",line_width=2.5)

In [None]:
#create a results dataframe
mwu_table = []

#loop through each sensor
for i in mwu_results["sensor"].unique():
    
    #determine median Mann-Whitney U value
    median = mwu_results[mwu_results["sensor"]==i]["U"].median()
    
    #determine mean Mann-Whitney U value
    mean = mwu_results[mwu_results["sensor"]==i]["U"].mean()
    
    #append results to list
    mwu_table.append([i, median, mean])
    
mwu_table = pd.DataFrame(mwu_table, columns=["sensor", "median_u", "mean_U"])
mwu_table

# Correlation to RUL

In [None]:
#join auxiliary data to target data 
df_RUL = df_Y_dev.join(df_A_dev[["unit","cycle"]])

In [None]:
#trim down to one RUL per unit and cycle
df_RUL_mean = df_RUL.groupby(["unit","cycle"],as_index=False).agg("mean")

In [None]:
#store Spearman Ranked Correlation results in new dataframe
sr_results = []

#loop through each unit
for u in tqdm(df_RUL_mean.unit.unique()):
    #isolate the RUL
    rul = df_RUL_mean[df_RUL_mean["unit"]==u]["RUL"]
    
    #loop through each sensor
    for s in df_X_s_deg.drop(columns=["unit","cycle","hs"]).columns:
        #isolate the delta for the selected sensor between prediction and measurement
        sens = df_X_s_deg[(df_X_s_deg["unit"]==u)][s]
        
        #determine the Spearman R statistic and p-value for RUL and sensor 
        corr,p_value = spearmanr(rul,sens)
        
        #append results to list
        sr_results.append([u, s, round(corr, 5), round(p_value, 5)])

df_sr_results=pd.DataFrame(sr_results, columns=["unit", "sensor", "R", "p_value"])

In [None]:
#create a new table to capture Spearman summary stats
sr_table = []

#loop through each sensor
for i in df_sr_results["sensor"].unique():
    #determine median R stat for given sensor
    median = df_sr_results[df_sr_results["sensor"]==i]["R"].median()
    
    #determine mean R stat for given sensor
    mean = df_sr_results[df_sr_results["sensor"]==i]["R"].mean()
    
    #append results to list
    sr_table.append([i, median, mean])
    
sr_table = pd.DataFrame(sr_table, columns=["sensor", "median_R", "mean_R"])
sr_table

In [None]:
#create boxplot of p-values by sensor
fig2 = px.box(df_sr_results,
             y="p_value",
             x="sensor",
             title="Boxplot of Spearman Correlation P-Values")

#adjust x axes label
fig2.update_xaxes(title="Physical Sensor")

#adjust y axes label and limits
fig2.update_yaxes(title="P-Value [-]",
            range=[0,1])

#include horizontal line at 0.05
fig2.add_hline(y=0.05,line_color="black",line_width=2.5)

In [None]:
#create a boxplot of Spearman R values, by sensor, for sensors with p-values <0.05
fig3 = px.box(df_sr_results[df_sr_results["p_value"]<=0.05],
             y="R",
             x="sensor",
             title=f"Boxplot of Spearman Correlation Rho Scores for observations where P-Value ≤ 0.05")

#adjust x axes label
fig3.update_xaxes(title="Physical Sensor")

#update y axes label and limits
fig3.update_yaxes(title="Rho [-]",
            range=[-1,1])

#include horizontal line at 0.5
fig3.add_hline(y=0.5,line_color="black",line_width=2.5)

#include horizontal line at -0.5
fig3.add_hline(y=-0.5,line_color="black",line_width=2.5)