F score

In [None]:
import seaborn as sb
import pandas as pd
import matplotlib.pyplot as plt

import sys
sys.path.append('../../')
from feature import selection as sel

Time domain

Unnormalized vs. Normalized features
- Result found: F score is independent of scaling

In [None]:
features = sel.load_td_feat(['az'], all=True, path=FEATURES_PATH)
fscore = sel.calc_f_stat(features, sel.TD_COLUMNS, summary=True)

features_normalized = sel.normalize_features(features)
fscore_norm = sel.calc_f_stat(features_normalized, sel.TD_COLUMNS, summary=True)

fig, ax = plt.subplots(1, 2, figsize=(20, 5))
fscore.plot.bar(figsize=(10, 4), grid=True, xlabel='Feature', ylabel='F statistic', legend=False, title='Unnormalized', ax=ax[0])
fscore_norm.plot.bar(figsize=(10, 4), grid=True, xlabel='Feature', ylabel='F statistic', legend=False, title='Normalized', ax=ax[1])
plt.show()

F statistic between feature on axis and target fault state

In [None]:
features = sel.load_td_feat(['az'], all=True, path=FEATURES_PATH)
df = sel.calc_f_stat(features, sel.TD_COLUMNS, summary=False)
fig, ax = plt.subplots(figsize=(10, 5)) 
sb.heatmap(df, annot=True, ax=ax, cmap="Greens", fmt=".0f")
plt.show()
df

Frequency domain

F score of all windows in all axis to multiclass fault

In [None]:
EXCLUDE_COLS = ['inharmonicity', 'hdev', 'negentropy']
columns = list(set(sel.FD_COLUMNS) - set(EXCLUDE_COLS))

features = sel.load_fd_features(['az'], all=True, path=FEATURES_PATH))
features = features.drop(columns=EXCLUDE_COLS)

fscore = sel.calc_f_stat(features, columns, summary=True)
fscore.plot.bar(figsize=(5, 4), grid=True, xlabel='Feature', ylabel='F statistic', legend=False, title='F score in Frequency domain')
plt.show()
features['fault'].cat.categories

F score to some faults

In [None]:
features_chosen = features[features['fault'].isin([
    'normal', 'imbalance', 'vertical-misalignment', 'horizontal-misalignment' 
])]
fscore = sel.calc_f_stat(features_chosen, columns, summary=True)
fscore.plot.bar(figsize=(5, 4), grid=True, xlabel='Feature', ylabel='F statistic', legend=False, title='F score in Frequency domain')
plt.show()

F score in all axis to multiclass fault (per each window size)

In [None]:
df = sel.calc_score_in_fft_windows(features, columns, lambda f, c: sel.calc_f_stat(f, c, summary=True))
sel.plot_fscore_part(df, 'window')

All faults: best features by ranking over all windows (non-weighted vs. weighted by score)
- less is better

In [None]:
sel.plot_rank(df, 'window')

F score in all axis to multiclass fault (per each window size) and chosen faults

In [None]:
df_chosen = sel.calc_score_in_fft_windows(features_chosen, columns, lambda f, c: sel.calc_f_stat(f, c, summary=True))
sel.plot_fscore_in_fft_win(df_chosen)

All faults: best features by ranking over all windows (non-weighted vs. weighted by score)

In [None]:
sel.plot_rank(df_chosen, 'window')

Wavelet packet transform

In [None]:
features = sel.load_wavelet_domain_features(['ax', 'ay', 'az', 'bx', 'by', 'bz'])
df = sel.calc_score_in_wpd_features(features, lambda f, c: sel.calc_f_stat(f, c, summary=True))
sel.plot_fscore_part(df, 'metric', n=20)

WPD features in one layer

In [None]:
level = 3
df = sel.calc_score_in_wpd_features(features, lambda f, c: sel.calc_f_stat(f, c, summary=True))
layer = df[df.index.str.startswith(f'L{level}')]
sel.plot_fscore_part(layer, 'metric')

In [None]:
level = 4
df = sel.calc_score_in_wpd_features(features, lambda f, c: sel.calc_f_stat(f, c, summary=True))
layer = df[df.index.str.startswith(f'L{level}')]
sel.plot_fscore_part(layer, 'metric')