In [None]:
import pandas as pd
import os
import sys
sys.path.append('../')
from vibrodiagnostics import selection, models

FEATURES_PATH =  '../../datasets/features_data/'
FAULT_CLASSES = {
        'A': {
            'normal': 'normal',
            'imbalance': 'imbalance',
            'horizontal-misalignment': 'misalignment',
            'vertical-misalignment': 'misalignment',
            'underhang-outer_race': 'outer race fault',
            'underhang-cage_fault': 'cage fault',
            'underhang-ball_fault': 'ball fault'
        },
        'B': {
            'normal': 'normal',
            'imbalance': 'imbalance',
            'horizontal-misalignment': 'misalignment',
            'vertical-misalignment': 'misalignment',
            'overhang-cage_fault': 'cage fault',
            'overhang-ball_fault': 'ball fault',
            'overhang-outer_race': 'outer race fault'
        }
}


MAFAULDA_PATH = '../../datasets/MAFAULDA.zip'
FEATURES_PATH =  '../../datasets/features_data/'
MAFAULDA_METADATA = os.path.join(FEATURES_PATH, selection.MAFAULDA_METADATA)

TD_FEATURES = os.path.join(FEATURES_PATH, selection.TIME_FEATURES_PATH)
FD_FEATURES = os.path.join(FEATURES_PATH, selection.FREQ_FEATURES_PATH)
TD_FD_FEATURES = os.path.join(FEATURES_PATH, selection.TIME_AND_FREQ_FEATURES_PATH)

In [None]:
RPM = 2500
RPM_RANGE = 500

def load_dataset_all(place='A', anomaly=None):
    meta = pd.read_csv(MAFAULDA_METADATA, index_col='filename')
    files = meta[meta['fault'].isin(tuple(FAULT_CLASSES[place]))]
    if anomaly is not None:
        return models.fault_labeling(files, FAULT_CLASSES[place], anomaly_severity=anomaly)
    else:
        return models.fault_labeling(files, FAULT_CLASSES[place])

def load_dataset_rpm_limited(place='A', anomaly=None):
    meta = pd.read_csv(MAFAULDA_METADATA, index_col='filename')
    meta = meta[meta['fault'].isin(tuple(FAULT_CLASSES[place]))]
    files = meta[
        (meta['rpm'].between(RPM - RPM_RANGE, RPM + RPM_RANGE, inclusive='both'))
    ].copy()
    if anomaly is not None:
        return models.fault_labeling(files, FAULT_CLASSES[place], anomaly_severity=anomaly)
    else:
        return models.fault_labeling(files, FAULT_CLASSES[place])

Faults

In [None]:
files_all = load_dataset_all('A')
summary = pd.DataFrame()
summary['A_rpm_nolimit'] = files_all['fault'].value_counts()
summary['A_rpm_nolimit_percent'] = files_all['fault'].value_counts()  / len(files_all) * 100
filenames = load_dataset_rpm_limited('A')
summary['A_rpm_limit'] = filenames['fault'].value_counts()
summary['A_rpm_limit_percent'] = filenames['fault'].value_counts()  / len(filenames) * 100

files_all = load_dataset_all('B')
summary['B_rpm_nolimit'] = files_all['fault'].value_counts()
summary['B_rpm_nolimit_percent'] = files_all['fault'].value_counts()  / len(files_all) * 100
filenames = load_dataset_rpm_limited('B')
summary['B_rpm_limit'] = filenames['fault'].value_counts()
summary['B_rpm_limit_percent'] = filenames['fault'].value_counts()  / len(filenames) * 100

summary

Anomaly, 0.6

In [None]:
files_all = load_dataset_all('A', anomaly=0.6)
summary = pd.DataFrame()
summary['A_rpm_nolimit'] = files_all['anomaly'].value_counts()
summary['A_rpm_nolimit_percent'] = files_all['anomaly'].value_counts()  / len(files_all) * 100
filenames = load_dataset_rpm_limited('A')
summary['A_rpm_limit'] = filenames['anomaly'].value_counts()
summary['A_rpm_limit_percent'] = filenames['anomaly'].value_counts()  / len(filenames) * 100

files_all = load_dataset_all('B', anomaly=0.6)
summary['B_rpm_nolimit'] = files_all['anomaly'].value_counts()
summary['B_rpm_nolimit_percent'] = files_all['anomaly'].value_counts()  / len(files_all) * 100
filenames = load_dataset_rpm_limited('B')
summary['B_rpm_limit'] = filenames['anomaly'].value_counts()
summary['B_rpm_limit_percent'] = filenames['anomaly'].value_counts()  / len(filenames) * 100

summary

Anomaly, 0.9

In [None]:
files_all = load_dataset_all('A', anomaly=0.9)
summary = pd.DataFrame()
summary['A_rpm_nolimit'] = files_all['anomaly'].value_counts()
summary['A_rpm_nolimit_percent'] = files_all['anomaly'].value_counts()  / len(files_all) * 100
filenames = load_dataset_rpm_limited('A')
summary['A_rpm_limit'] = filenames['anomaly'].value_counts()
summary['A_rpm_limit_percent'] = filenames['anomaly'].value_counts()  / len(filenames) * 100

files_all = load_dataset_all('B', anomaly=0.9)
summary['B_rpm_nolimit'] = files_all['anomaly'].value_counts()
summary['B_rpm_nolimit_percent'] = files_all['anomaly'].value_counts()  / len(files_all) * 100
filenames = load_dataset_rpm_limited('B')
summary['B_rpm_limit'] = filenames['anomaly'].value_counts()
summary['B_rpm_limit_percent'] = filenames['anomaly'].value_counts()  / len(filenames) * 100

summary

RPM limited counts

In [None]:
filenames.groupby(by='fault')['rpm'].plot.hist(bins=100)

In [None]:
features = pd.read_csv(TD_FEATURES)
features

In [None]:
features = pd.read_csv(FD_FEATURES)
features = models.fault_labeling(features, FAULT_CLASSES['A'])
features

In [None]:
x = features[features.columns[~features.columns.isin(selection.METADATA_COLUMNS_ALL) | features.columns.isin(('rpm',))]]
st = x.corr()['rpm'].abs().sort_values(ascending=False)
st.describe()