In [None]:
import os
from zipfile import ZipFile

import pandas as pd
import matplotlib.pyplot as plt

import sys
sys.path.append('../../')
from feature.mafaulda import dataset_index, import_files_split
from feature.selection import TIME_FEATURES_PATH, FREQ_FEATURES_PATH
from feature.discovery import plot_frequency_spectrum
from feature.models import fault_labeling


EXTRACT = False
MAFAULDA_PATH = '../../datasets/MAFAULDA.zip'
FEATURES_PATH =  '../../datasets/features_data/'
MAFAULDA_METADATA = os.path.join(FEATURES_PATH, 'mafaulda_metadata.csv')

Choose fault types and revolution speeds of interest

In [None]:
FAULT_CLASSES = {'normal': 'N', 'imbalance': 'I', 'horizontal-misalignment': 'HM', 'vertical-misalignment': 'VM'}
RPM = 2900
RPM_RANGE = 500

def load_dataset_info():
    meta = pd.read_csv(MAFAULDA_METADATA, index_col='filename')
    files = meta[
        (meta['fault'].isin(FAULT_CLASSES)) &
        (meta['rpm'].between(RPM - RPM_RANGE, RPM + RPM_RANGE, inclusive='both'))
    ].copy()
    return files

Extract metadata about files from whole dataset

In [None]:

if EXTRACT:
    file_index = dataset_index(MAFAULDA_PATH)
    file_index.to_csv(MAFAULDA_METADATA, index=False)

Import metadata about Mafaulda

File names selection
- Choose 4 types of faults within limited rpm range

In [None]:
files = load_dataset_info()   
files.info()
files.head(10)

Frequency spectrum comparison of faults in low and high RPM

In [None]:
def plot_rpm_comparison(files, fault, dB):
    table = files[
        (files['rpm'] == files['rpm'].min()) |
        (files['rpm'] == files['rpm'].max())
    ] 
    dataset = ZipFile(MAFAULDA_PATH)
    
    fig, ax = plt.subplots(1, 1, figsize=(15, 3), sharey=True)
    ax.set_title(f'{fault}')
    for filename, series in table.iterrows():
        plot_frequency_spectrum(dataset, filename, 'ax', ax, dB=dB, label=f'{series["rpm"]:.2f}')

    ax.legend(loc="upper right")
    fig.tight_layout()
    plt.show()

In [None]:
files = fault_labeling(files, FAULT_CLASSES, 0.6, debug=True)
files.head(5)

Scale in m/s^2: frequency spectrum between lowest rpm and highest RPM

In [None]:
for fault, level in [('N', 0), ('I', 1), ('VM', 1), ('HM', 1)]:
    sources = files[(files['fault'] == fault) &  (files['severity_level'] == level)]
    plot_rpm_comparison(sources, fault, dB=False)

Scale in dB (baseline is 1 um/s^2): frequency spectrum between lowest rpm and highest RPM

In [None]:
for fault, level in [('N', 0), ('I', 1), ('VM', 1), ('HM', 1)]:
    sources = files[(files['fault'] == fault) &  (files['severity_level'] == level)]
    plot_rpm_comparison(sources, fault, dB=True)

1. Feature extraction

In [None]:
files

Export features for chosen files

In [None]:
dataset = ZipFile(MAFAULDA_PATH)
filenames = list(files.index)

Time domain features

In [None]:
if EXTRACT:
    features = import_files_split(dataset, filenames, fdiscovery.features_time_domain, parts=5)
    features.to_csv(FEATURES_PATH + TIME_FEATURES_PATH, index=False)
    features.head(10)

Frequency domain features

In [None]:
if EXTRACT:
    features = import_files_split(dataset, filenames, fdiscovery.features_frequency_domain, parts=5)
    features.to_csv(FEATURES_PATH + FREQ_FEATURES_PATH, index=False)
    features.head(10)

TSFEL package features

In [None]:
if EXTRACT:
    features = import_files_split(dataset, filenames, fdiscovery.tsfel_features_import, parts=5)
    features.to_csv(FEATURES_PATH + 'tsfel_features.csv', index=False)
    features.head(10)

Wavelet packet features (Meyer wavelet)

In [None]:
if EXTRACT:
    features = import_files_split(dataset, filenames, fdiscovery.features_wavelet_domain, parts=5)
    features.to_csv(FEATURES_PATH + 'tsfel_features.csv', index=False)
    features.head(10)