## Feature selection

Choose metric
- Correlation
- F statistic
- Mutual information

In [None]:
METRICS = ['C', 'F', 'MI']
METRIC = METRICS[0]

In [None]:
import seaborn as sb
import pandas as pd
import matplotlib.pyplot as plt
from sklearn.feature_selection import VarianceThreshold

import numpy as np
import sys
sys.path.append('../')
from vibrodiagnostics import selection as sel
from vibrodiagnostics.models import fault_labeling


FEATURES_PATH =  '../../datasets/features_data/'


if METRIC == 'C':
    calc_func = sel.calc_corr_stat
    title = 'Correlation'
elif METRIC == 'F':
    calc_func = sel.calc_f_stat
    title = 'F statistic'
elif METRIC == 'MI':
    calc_func = sel.calc_mutual_information
    title = 'Mutual information'

Time domain

Unnormalized vs. Normalized features
- Result found: F score is independent of scaling

In [None]:
features = sel.load_td_feat(['az'], all=True, path=FEATURES_PATH)
columns = sel.filter_out_metadata_columns(features).columns
fscore = calc_func(features, columns)

features_normalized = sel.normalize_features(features, columns)
fscore_norm = calc_func(features_normalized, columns)

fig, ax = plt.subplots(1, 2, figsize=(20, 5))
fscore.plot.bar(figsize=(10, 4), grid=True, xlabel='Feature', ylabel=title, legend=False, title='Unnormalized', ax=ax[0])
fscore_norm.plot.bar(figsize=(10, 4), grid=True, xlabel='Feature', ylabel=title, legend=False, title='Normalized', ax=ax[1])
plt.show()

Frequency domain

All windows in all axis to multiclass fault

In [None]:
features = sel.load_fd_feat(['az'], all=True, path=FEATURES_PATH)
columns = sel.filter_out_metadata_columns(features).columns

fscore = calc_func(features, columns)
fscore.plot.bar(figsize=(20, 4), grid=True, xlabel='Feature', ylabel=title, legend=False, title='F score in Frequency domain')
plt.show()

print(features['fault'].cat.categories)
features

To some faults

In [None]:
features_chosen = features[features['fault'].isin([
    'normal', 'imbalance', 'vertical-misalignment', 'horizontal-misalignment' 
])]
fscore = calc_func(features_chosen, columns)
fscore.plot.bar(figsize=(20, 4), grid=True, xlabel='Feature', ylabel=title, legend=False, title='F score in Frequency domain')
plt.show()

In all axis to multiclass fault (per each window size)

In [None]:
df = sel.calc_score_in_fft_windows(features, columns, calc_func)
sel.plot_fscore_part(df.set_index('feature'), 'window', title)

All faults: best features by ranking over all windows (non-weighted vs. weighted by score)
- less is better

In [None]:
sel.plot_rank(df, 'window')

In all axis to multiclass fault (per each window size) and chosen faults

In [None]:
df_chosen = sel.calc_score_in_fft_windows(features_chosen, columns, calc_func)
sel.plot_fscore_in_fft_win(df_chosen.set_index('feature'), title)

All faults: best features by ranking over all windows (non-weighted vs. weighted by score)

In [None]:
sel.plot_rank(df_chosen, 'window')

Wavelet packet transform

In [None]:
features = sel.load_wavelet_domain_features(['ax', 'ay', 'az'], path=FEATURES_PATH, all=True)
df = sel.calc_score_in_wpd_features(features, calc_func)
sel.plot_fscore_part(df, 'metric', title, n=20)

WPD features in one layer

In [None]:
level = 3
df = sel.calc_score_in_wpd_features(features, calc_func)
layer = df[df.index.str.startswith(f'L{level}')]
sel.plot_fscore_part(layer, 'metric', title)

In [None]:
level = 4
df = sel.calc_score_in_wpd_features(features, calc_func)
layer = df[df.index.str.startswith(f'L{level}')]
sel.plot_fscore_part(layer, 'metric', title)

In [None]:
def plot_wpd_energy_ratio_per_level(features, wpd_axis):
    features = features[features['axis'].isin(wpd_axis)]  
    features_energy_ratio = features[features['feature'] == 'energy_ratio']
    # print(len(features_energy_ratio))
    
    fig, ax = plt.subplots(6, 1, figsize=(15, 20))
    
    for level in range(1, 7):
        cols = np.array(columns)
        cols = cols[np.char.startswith(cols, f'L{level}')]
        mi = calc_func(features_energy_ratio, cols)
        
        o = ax.flatten()[level-1]
        o.bar(mi.index, mi.values.T[0])
        o.grid(True)
        o.set_xlabel('Feature')
        o.set_ylabel('MI')
        
        # Rotate x labels by 45 deg
        o.set_xticks(o.get_xticks())
        o.set_xticklabels(o.get_xticklabels(), rotation=45, ha='right')

    fig.suptitle(f'WPD energy ratio: Axis "{wpd_axis}"', fontsize=16, y=0.9)
    plt.show()

In [None]:
def level_to_frequency_bands(level, fs):
    bin_count = 2 ** level
    bin_width = (fs / 2) / bin_count
    for bin in range(bin_count):
        a = bin * bin_width
        b = a + bin_width
        print(f'L{level}_{bin} = [{a}; {b}] Hz')

level_to_frequency_bands(level=4, fs=50000)

Features in Wavelets

In [None]:
features = sel.load_wavelet_domain_features(['ax', 'ay', 'az'], path=FEATURES_PATH, all=True)
# df = sel.calc_score_in_wpd_features(features, calc_func)

WPD_AXIS = 'ax'
# More axis at once significantly reduces MI
features = features[features['axis'] == WPD_AXIS]                 # One axis
features['fault'] = features['fault'].astype('category')
#features = features[features['axis'].isin(['ax', 'ay', 'az'])]  # One measuremnt position

columns = [col for col in features.columns 
           if col not in ('fault', 'severity', 'seq', 'rpm', 'axis', 'feature')]
features.head()

In [None]:
features_energy = features[features['feature'] == 'energy']
print(len(features_energy))

mi = calc_func(features_energy, columns)
mi.iloc[:30].plot.bar(figsize=(20, 4), grid=True, ylabel=title, title='WPD Energy')
plt.show()

In [None]:
plot_wpd_energy_ratio_per_level(features, ['ax', 'ay', 'az'])

In [None]:
features_entropy = features[features['feature'] == 'negentropy']
print(len(features_entropy))

mi = calc_func(features_entropy, columns)
mi.iloc[:30].plot.bar(figsize=(20, 4), grid=True, ylabel=title, title='WPD Negentropy')
plt.show()

In [None]:
features_kurtosis = features[features['feature'] == 'kurtosis']
print(len(features_kurtosis))

mi = calc_func(features_entropy, columns)
mi.iloc[:30].plot.bar(figsize=(20, 4), grid=True, ylabel=title, title='WPD Kurtosis')
plt.show()