In [None]:
METRICS = ['C', 'F', 'MI']
METRIC = METRICS[0]
# TODO: fix

In [None]:
from zipfile import ZipFile
import numpy as np
import pandas as pd
from itertools import pairwise
from typing import List, Tuple, Callable

from numpy.lib.stride_tricks import sliding_window_view
from scipy.stats import skew, kurtosis, entropy
from scipy.signal import welch, windows, find_peaks
from scipy.interpolate import interp1d
from scipy.fft import rfft

from scipy.signal import butter, iirfilter, freqz, lfilter, decimate
import pywt
from tsfel import feature_extraction as ft
import matplotlib.pylab as plt


def filter_out_metadata_columns(df):
    return df[df.columns[~df.columns.isin(METADATA_COLUMNS_ALL)]]


def normalize_features(features, columns):
    standard_transformer = Pipeline(steps=[('standard', StandardScaler())])
    minmax_transformer = Pipeline(steps=[('minmax', MinMaxScaler())])
    preprocessor = ColumnTransformer(
        remainder='passthrough',
        transformers=[
            ('std', standard_transformer, columns)
        ],
        verbose_feature_names_out=False
    )
    features_normalized = preprocessor.fit_transform(features)
    features_normalized = pd.DataFrame(features_normalized, columns=preprocessor.get_feature_names_out())
    return features_normalized


def calc_score_in_wpd_features(src, func):
    c = pd.DataFrame()

    for metric, group in src.groupby(by='feature', observed=True):
        columns = list(set(group.columns) - WPD_COLUMNS_EXCLUDE)
        df = func(group, columns)
        df['metric'] = metric
        c = pd.concat([c, df])

    c['metric'] = c['metric'].astype('category')
    return c


def plot_fscore_part(df, part, title, n=None):
    num_of_windows = len(df[part].cat.categories)
    fig, ax = plt.subplots(1, num_of_windows, figsize=(20, 4))

    for i, grouper in enumerate(df.groupby(by=part, observed=True)):
        h, group = grouper
        if n is not None:
            group = group.iloc[:n]
        group.plot.bar(grid=True, xlabel='Feature', ylabel=title, legend=False, title=h, ax=ax[i])

    fig.tight_layout()
    plt.show()


def features_wavelet_domain(zip_file: ZipFile, filename: str) -> pd.DataFrame:
    print(f'Processing: {filename}')

    max_level = 6
    wavelet = 'dmey'
    ordering = 'freq'

    columns = mafaulda.COLUMNS
    ts = mafaulda.csv_import(zip_file, filename)
    fault, severity, seq = mafaulda.parse_filename(filename)

    rpm = ts['rpm'].mean()
    result = []

    for col in columns:
        wp = pywt.WaveletPacket(data=ts[col], wavelet=wavelet, mode='symmetric')

        for feature in ('energy', 'energy_ratio', 'kurtosis', 'negentropy'):
            wpd_header = []
            row = {
                'fault': fault,
                'severity': severity,
                'seq': seq,
                'rpm': rpm,
                'axis': col,
                'feature': feature
            }

            feature_vector = []
            for level in range(1, max_level + 1):
                nodes = wp.get_level(level, ordering)

                if feature == 'energy':
                    e = [energy(node.data) for node in nodes]
                    feature_vector.extend(e)

                elif feature == 'energy_ratio':
                    e = [energy(node.data) for node in nodes]
                    total_energy = np.sum(e)
                    energy_ratios = [energy(node.data) / total_energy for node in nodes]
                    feature_vector.extend(energy_ratios)

                elif feature == 'kurtosis':
                    kurts = [kurtosis(node.data) for node in nodes]
                    feature_vector.extend(kurts)

                elif feature == 'negentropy':
                    negentropies = [negentropy(node.data) for node in nodes]
                    feature_vector.extend(negentropies)

                wpd_header.extend([f'L{level}_{i}' for i in range(len(nodes))])

            row.update(dict(zip(wpd_header, feature_vector)))
            result.append(row)

    return pd.DataFrame(result).reset_index(drop=True)


def dc_blocker(x: np.array, cutoff: float, order, fs, plot=False):
    b, a = iirfilter(1, cutoff, btype='highpass', fs=fs)
    if plot:
        plot_filter_response(b, a)
    y = lfilter(b, a, x)
    return y

def downsample(x: np.array, k, fs_reduced, fs):
    if k is None:
        k = fs // fs_reduced
    return decimate(x, k, ftype='iir')

def lowpass_filter(x: np.array, cutoff: float, order, fs, plot=False):
    b, a = butter(2, cutoff, btype='lowpass', fs=fs)
    if plot:
        plot_filter_response(b, a)
    y = lfilter(b, a, x)
    return y

def plot_filter_response(b, a, title=''):
    w, h = freqz(b, a, fs=50000)
    fig, ax = plt.subplots()
    ax.plot(w, 20 * np.log10(abs(h)), 'b')
    ax.set_xlabel('Frequency [Hz]')
    ax.set_ylabel('Amplitude [dB]')
    ax.set_title(title)

def find_harmonics(f: np.array, Pxx: np.array) -> (np.array, np.array):
    threshold = Pxx.mean() +  2*np.std(Pxx)
    peaks, _ = find_peaks(Pxx)
    f_harmonics = f[peaks]
    y_harmonics = Pxx[peaks]

    cond = y_harmonics >= threshold
    loc_harmonics = peaks[cond]
    f_harmonics = f_harmonics[cond]
    return loc_harmonics, f_harmonics

DB_REF = 0.000001                                # 1 dB = 1 um/s^2

def resolution_calc(fs, window):
    print('Window size:', window)
    print('Heinsenberg box')
    print('\tTime step:', window / fs * 1000, 'ms')
    print('\tFrequency step:', fs / window, 'Hz')


Time domain

Unnormalized vs. Normalized features
- Result found: F score is independent of scaling

In [None]:
features = load_td_feat(['az'], all=True, path=FEATURES_PATH)
columns = filter_out_metadata_columns(features).columns
fscore = calc_func(features, columns)

features_normalized = normalize_features(features, columns)
fscore_norm = calc_func(features_normalized, columns)

fig, ax = plt.subplots(1, 2, figsize=(20, 5))
fscore.plot.bar(figsize=(10, 4), grid=True, xlabel='Feature', ylabel=title, legend=False, title='Unnormalized', ax=ax[0])
fscore_norm.plot.bar(figsize=(10, 4), grid=True, xlabel='Feature', ylabel=title, legend=False, title='Normalized', ax=ax[1])
plt.show()

Wavelet packet transform

In [None]:
features = load_wavelet_domain_features(['ax', 'ay', 'az'], path=FEATURES_PATH, all=True)
df = calc_score_in_wpd_features(features, calc_func)
sel.plot_fscore_part(df, 'metric', title, n=20)

WPD features in one layer

In [None]:
level = 3
df = sel.calc_score_in_wpd_features(features, calc_func)
layer = df[df.index.str.startswith(f'L{level}')]
sel.plot_fscore_part(layer, 'metric', title)

In [None]:
level = 4
df = sel.calc_score_in_wpd_features(features, calc_func)
layer = df[df.index.str.startswith(f'L{level}')]
sel.plot_fscore_part(layer, 'metric', title)

In [None]:
def plot_wpd_energy_ratio_per_level(features, wpd_axis):
    features = features[features['axis'].isin(wpd_axis)]  
    features_energy_ratio = features[features['feature'] == 'energy_ratio']
    # print(len(features_energy_ratio))
    
    fig, ax = plt.subplots(6, 1, figsize=(15, 20))
    
    for level in range(1, 7):
        cols = np.array(columns)
        cols = cols[np.char.startswith(cols, f'L{level}')]
        mi = calc_func(features_energy_ratio, cols)
        
        o = ax.flatten()[level-1]
        o.bar(mi.index, mi.values.T[0])
        o.grid(True)
        o.set_xlabel('Feature')
        o.set_ylabel('MI')
        
        # Rotate x labels by 45 deg
        o.set_xticks(o.get_xticks())
        o.set_xticklabels(o.get_xticklabels(), rotation=45, ha='right')

    fig.suptitle(f'WPD energy ratio: Axis "{wpd_axis}"', fontsize=16, y=0.9)
    plt.show()

In [None]:
def level_to_frequency_bands(level, fs):
    bin_count = 2 ** level
    bin_width = (fs / 2) / bin_count
    for bin in range(bin_count):
        a = bin * bin_width
        b = a + bin_width
        print(f'L{level}_{bin} = [{a}; {b}] Hz')

level_to_frequency_bands(level=4, fs=50000)

Features in Wavelets

In [None]:
features = sel.load_wavelet_domain_features(['ax', 'ay', 'az'], path=FEATURES_PATH, all=True)
# df = sel.calc_score_in_wpd_features(features, calc_func)

WPD_AXIS = 'ax'
# More axis at once significantly reduces MI
features = features[features['axis'] == WPD_AXIS]                 # One axis
features['fault'] = features['fault'].astype('category')
#features = features[features['axis'].isin(['ax', 'ay', 'az'])]  # One measuremnt position

columns = [col for col in features.columns 
           if col not in ('fault', 'severity', 'seq', 'rpm', 'axis', 'feature')]
features.head()

In [None]:
features_energy = features[features['feature'] == 'energy']
print(len(features_energy))

mi = calc_func(features_energy, columns)
mi.iloc[:30].plot.bar(figsize=(20, 4), grid=True, ylabel=title, title='WPD Energy')
plt.show()

In [None]:
plot_wpd_energy_ratio_per_level(features, ['ax', 'ay', 'az'])

In [None]:
features_entropy = features[features['feature'] == 'negentropy']
print(len(features_entropy))

mi = calc_func(features_entropy, columns)
mi.iloc[:30].plot.bar(figsize=(20, 4), grid=True, ylabel=title, title='WPD Negentropy')
plt.show()

In [None]:
features_kurtosis = features[features['feature'] == 'kurtosis']
print(len(features_kurtosis))

mi = calc_func(features_entropy, columns)
mi.iloc[:30].plot.bar(figsize=(20, 4), grid=True, ylabel=title, title='WPD Kurtosis')
plt.show()