Feature discovery

In [None]:
from zipfile import ZipFile
import pandas as pd
import numpy as np
import padasip as pa
import matplotlib.pyplot as plt

import pywt
import ewtpy
import warnings

import sys
sys.path.append('../../')
from feature import mafaulda
from feature import discovery as fdiscovery

# How many files to import is set by N. None means all files
N = None  
SAVE = True
MAFAULDA_PATH = '../../datasets/MAFAULDA.zip'

Extract metadata about recordings for dataset spliting

In [None]:
file_index = mafaulda.dataset_index(MAFAULDA_PATH)
file_index.to_csv('mafaulda_metadata.csv', index=False)
file_index.head()

Preprocessing
1. DC Blocker
2. Downsampling (with antialiasing filter)
3. Automatic Noise Control (no noise signal available)

In [None]:
def preprocess_filter(ts_source):
    ts = {}
    for col in mafaulda.COLUMNS:
        x = fdiscovery.dc_blocker(ts_source[col], cutoff=1)
        x = fdiscovery.downsample(x, fs_reduced=FS_DOWN)    
        # x = ts_source[col]  (Original)
        # anc = pa.filters.FilterNLMS(n=4, mu=0.1, w="random")
        # y, e, w = anc.run(x, noise)
        ts[col] = x
    
    n_original = len(ts_source['ax'])
    duration =  n_original / mafaulda.FS_HZ
    n_resampled = len(ts['ax'])
    
    ts['t'] = np.linspace(0, duration, n_resampled)
    ts['key'] = ts_source['key'][0]
    ts = pd.DataFrame(ts).set_index('t')
    return ts


def spectogram(x):
    plt.figure(figsize=(20, 4))
    Pxx, freqs, t, im = plt.specgram(x, NFFT=256, Fs=FS_DOWN, mode='magnitude', scale='dB')
    plt.xlabel('Time [s]')
    plt.ylabel('Frequency [Hz]')

In [None]:
FS_DOWN = 20000
zip_file = ZipFile(MAFAULDA_PATH)
files = mafaulda.get_mafaulda_files(zip_file)
df = mafaulda.csv_import(zip_file, files[0])
preprocess_filter(df)

Time domain features

In [None]:
dataset = ZipFile(MAFAULDA_PATH)
files = mafaulda.get_mafaulda_files(dataset)
if N is not None:
    files = files[:N]
td_features = mafaulda.import_files(dataset, files, fdiscovery.features_time_domain)
td_features.info()

In [None]:
if SAVE:
    td_features.to_csv('all_td_features.csv', index=False)

Frequency domain features
- Read also: https://librosa.org/doc/0.10.1/feature.html

Extract frequency domain features

In [None]:
OVERLAP = 0.5
WINDOW_SIZES = (2**8, 2**10, 2**12, 2**14, 2**16)

for w in WINDOW_SIZES:
    mafaulda.resolution_calc(mafaulda.FS_HZ, w)
    print()

In [None]:
dataset = ZipFile(MAFAULDA_PATH)
files = mafaulda.get_mafaulda_files(dataset)
if N is not None:
    files = files[:N]
fd_features = mafaulda.import_files(dataset, files, fdiscovery.features_frequency_domain)
fd_features.head()

In [None]:
fd_features.info()

In [None]:
if SAVE:
    fd_features.to_csv('all_fd_features.csv', index=False)

TSFEL package features

In [None]:
dataset = ZipFile(MAFAULDA_PATH)
files = mafaulda.get_mafaulda_files(dataset)
if N is not None:
    files = files[:N]
tsfel_features = mafaulda.import_files(dataset, files, fdiscovery.tsfel_features_import)
tsfel_features.head()

Time-frequency domain features

Wavelet packet decomposition

In [None]:
dataset = ZipFile(MAFAULDA_PATH)
files = mafaulda.get_mafaulda_files(dataset)
if N is not None:
    files = files[:N]
wp_features = mafaulda.import_files(dataset, files, fdiscovery.features_wavelet_domain)
wp_features.head()

In [None]:
if SAVE:
    wp_features.to_csv('all_wpd_features.csv', index=False)

Multilevel 1D Discrete Wavelet Transform
- https://www.mathworks.com/help/wavelet/gs/choose-a-wavelet.html
- https://www.mathworks.com/help/wavelet/gs/introduction-to-the-wavelet-families.html

In [None]:
axis = 'ax'
wavelet = 'dmey'
ts = mafaulda.csv_import(ZipFile(MAFAULDA_PATH), 'vertical-misalignment/1.78mm/51.8144.csv')
result = pywt.wavedec(ts[axis], wavelet, mode='symmetric', level=3)
print(len(ts[axis]))
print([len(x) for x in result], '\n', result)

Energy in partitioned regions
- store multiple levels to compare = {3, 6, 9}
- multiple wavelets = {db1, ...}
- https://stackoverflow.com/questions/30808430/how-to-select-columns-from-dataframe-by-regex

In [None]:
pywt.families()
pywt.wavelist()
w = pywt.Wavelet('dmey')
print(w)

Empirical Wavelet transform

In [None]:
warnings.simplefilter(action='ignore', category=FutureWarning)

def ewt_transform(dataset: pd.DataFrame, axis: str, scales: int):
    ewt, mfb, boundaries = ewtpy.EWT1D(
        dataset[axis], N=scales,
        log=0, detect='locmax', completion=0, 
        reg='average', lengthFilter=10, sigmaFilter=5
    )
    return ewt, mfb, boundaries
import matplotlib.pyplot as plt
ewt, mfb, boundaries = ewt_transform(ts, 'ax', 3)
ewt