### MaFaulDa
- Description: https://www02.smt.ufrj.br/~offshore/mfs/page_01.html

In [None]:
from zipfile import ZipFile
from pprint import pprint

import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sb

from scipy.signal import find_peaks, welch
from scipy.stats import skew, kurtosis, kstest
import statsmodels.api as sm

import sys
sys.path.append('../')
from vibrodiagnostics import mafaulda

MAFAULDA_PATH = '../../datasets/MAFAULDA.zip'

View some file names in machinery database and display their count.

In [None]:
files = mafaulda.get_mafaulda_files(ZipFile(MAFAULDA_PATH))
print(f'Measurements: {len(files)}')
pprint(files[:10])

Show details of one measurments

In [None]:
misalign = mafaulda.csv_import(ZipFile(MAFAULDA_PATH), 'horizontal-misalignment/1.5mm/16.7936.csv')
misalign.describe()
misalign

Plot tachometer pulses and calculate revolutions per minute (rpm)

In [None]:
fig, ax = plt.subplots(nrows=1, ncols=2, figsize=(10, 3))
misalign['tachometer'][:0.5].plot(ax=ax[0], title='Tachometer', xlabel='Time [s]', ylabel='Impulse')
misalign['rpm'].plot(ax=ax[1], title='RPM', xlabel='Time [s]', ylabel='RPM')
plt.show()

Plot histograms for each column to get sense of the distributions

In [None]:
misalign.hist(figsize=(15, 10), bins=50)
plt.show()

Plot short segment of time domain signal

In [None]:
x = 1024
l = 4096
g = misalign[['ax', 'ay', 'az']].iloc[x:x+l].plot(subplots=True, figsize=(15, 5), grid=True)
plt.show()

Plot subsampled time domain signal with subsampling factor

In [None]:
factor = 100
misalign[['ax', 'ay', 'az']].iloc[::factor, :].plot(subplots=True, figsize=(15, 5), grid=True)
plt.show()

Plot magnitutes of each acceleration vector

In [None]:
misalign[['mag_a', 'mag_b']].plot(subplots=True, figsize=(10, 5), grid=True)
plt.show()

In [None]:
def normality_tests(ts, columns=None):
    columns = columns or ['ax', 'ay', 'az', 'bx', 'by', 'bz']
    figure, axes = plt.subplots(2, 3, figsize=(10, 5))

    for i, col in enumerate(columns):
        print('Normality test p-value: ', kstest(ts[col], 'norm').pvalue, '(<0.05 is not normal)')
        sm.qqplot(ts[col], line='45', ax = axes[i // 3, i % 3])


normality_tests(misalign)
plt.tight_layout()
plt.show()

In [None]:
def axis_spectrograms(df):
    fig, ax = plt.subplots(3, 1, figsize=(20, 8))

    RESOLUTION = 8
    WINDOW = mafaulda.FS_HZ // RESOLUTION
    mafaulda.resolution_calc(mafaulda.FS_HZ, WINDOW)

    for i, col in enumerate(['ax', 'ay', 'az']):
        pxx, freq, t, cax = ax[i].specgram(
            df[col],
            Fs=mafaulda.FS_HZ, 
            mode='magnitude',
            window=np.hamming(WINDOW), 
            NFFT=WINDOW, 
            noverlap=WINDOW//2
        )
    
    for i in range(3):
        ax[i].set_ylabel('Frequency [Hz]')
    ax[2].set_xlabel('Time [s]')
    
    g = plt.colorbar(cax, ax=ax)


axis_spectrograms(misalign)

Do not use magnitude - it is not oscilatory. Various axis have different responses, but the signal in each axis is **stationary**

In [None]:
def rms_orbitals(ts, n=100):
    fig, ax = plt.subplots(1, 3, figsize=(20, 5))
    
    ax_rms = ts['ax'].rolling(n).apply(mafaulda.rms)
    ay_rms = ts['ay'].rolling(n).apply(mafaulda.rms)
    az_rms = ts['az'].rolling(n).apply(mafaulda.rms)

    ax[0].set_xlabel('x')
    ax[0].set_ylabel('y')
    ax[0].scatter(ax_rms, ay_rms, s=1)
    
    ax[1].set_xlabel('x')
    ax[1].set_ylabel('z')
    ax[1].scatter(ax_rms, az_rms, s=1)
    
    ax[2].set_xlabel('y')
    ax[2].set_ylabel('z')
    ax[2].scatter(ay_rms, az_rms, s=1)

subsample = 50
rms_orbitals(misalign.iloc[::subsample, :])

Load normal conditions

In [None]:
zip_file = ZipFile(MAFAULDA_PATH)
filenames = mafaulda.get_mafaulda_files(zip_file)
normal_files = [
    name for name in filenames if name.startswith('normal')
]
normal_cond = mafaulda.import_files(zip_file, normal_files, mafaulda.csv_import)
normal_cond.head()

Calculate time domain features on normal conditions in one accelerometer axis

In [None]:
def time_domain_features(ts, col):
    return pd.concat([
            ts.groupby(by='key')[col].mean().rename('mean'),
            ts.groupby(by='key')[col].std().rename('std'),
            ts.groupby(by='key')[col].apply(lambda x: skew(x)).rename('skew'),
            ts.groupby(by='key')[col].apply(lambda x: kurtosis(x)).rename('kurtosis'),
            ts.groupby(by='key')[col].apply(mafaulda.rms).rename('rms')
        ],
        axis=1
    )


td_featues = time_domain_features(normal_cond, 'ax')
td_featues.head(10)

In [None]:
sb.pairplot(td_featues)
plt.show()

In [None]:
sb.heatmap(td_featues.corr(), annot=True)
plt.show()

In [None]:
def csv_import_td_features(zip_file, filename, col='ax'):
    frame = mafaulda.csv_import(zip_file, filename)
    info = filename.split('/')
    frame = frame.assign(load=int(info[1].strip(' g')), no=info[2])

    return pd.concat([
            frame.groupby(by=['load', 'no'])[col].mean().rename('mean'),
            frame.groupby(by=['load', 'no'])[col].std().rename('std'),
            frame.groupby(by=['load', 'no'])[col].apply(lambda x: skew(x)).rename('skew'),
            frame.groupby(by=['load', 'no'])[col].apply(lambda x: kurtosis(x)).rename('kurtosis'),
            frame.groupby(by=['load', 'no'])[col].apply(mafaulda.rms).rename('rms'),
            frame.groupby(by=['load', 'no'])[col].apply(lambda x: max(abs(x.max()), abs(x.min()))).rename('amplitude')
        ],
        axis=1
    ).reset_index()


imbalance = csv_import_td_features(zip_file, 'imbalance/10g/56.9344.csv')
imbalance.reset_index(inplace=True)
imbalance

In [None]:
from multiprocessing.pool import ThreadPool

def fft_csv_import(zip_file, filename, window=4096, overlap=0.5, fs=50000, is_welch=False):
    STEP = window * overlap
    col = 'ax'
    info = filename.split('/')
    load = int(info[1].strip(' g'))

    frame = mafaulda.csv_import(zip_file, filename)
    v = frame[col].to_numpy()

    if is_welch is False:
        spectra = [
            np.abs(rfft(v[i:i+window] * np.hamming(window)))
            for i in range(0, len(v) - window, int(STEP))
        ]
        freqs = [i * (fs / window) for i in range(window // 2 + 1)]

    else:
        freqs, spectra = welch(v, fs, 'hann', nperseg=window, scaling='spectrum', average='mean')
        spectra = [spectra]


    return (
        pd.DataFrame(data=spectra, columns=freqs.astype(int))
        .assign(load=load, no=info[2])
        .set_index(['load', 'no'])
    )


pool = ThreadPool(processes=4)
imbalance_files = [
    name for name in filenames if name.startswith('imbalance')
]
WINDOW = 2**14
mafaulda.resolution_calc(mafaulda.FS_HZ, WINDOW)
spectra = fft_csv_import(
    zip_file, 'imbalance/10g/56.9344.csv', fs=mafaulda.FS_HZ, 
    window=WINDOW, overlap=0.5, is_welch=True
)
spectra.head(10)

In [None]:
spectra.iloc[0].loc[:500].plot(legend=False)

In [None]:
np.log(spectra.iloc[0]).iloc[:200].plot(legend=False, grid=True)

In [None]:
WINDOW = 2**13
from tqdm.notebook import tqdm

zip_file = ZipFile(MAFAULDA_PATH)
imbalancePSD = pd.concat([
    pool.apply_async(fft_csv_import, (zip_file, name, WINDOW, 0.5, mafaulda.FS_HZ, True)).get()
    for name in tqdm(imbalance_files[:10])
])

In [None]:
imbalancePSD

In [None]:
imbalancePSD.T[(10, '21.7088.csv')].loc[:500].plot()

In [None]:
def extract_peaks(psd, max_freq=1000):
    MAX_FREQ = 1000
    frames = []

    for index, bins in psd.iterrows():
        peaks, properties = find_peaks(bins[:max_freq], prominence=0.02)
        row = {
            'load': index[0],
            'no': index[1],
            'f': bins.index[peaks],
            'y': bins[bins.index[peaks]]
        }
    
        frame = pd.DataFrame(data=row, columns=['load', 'no', 'f', 'y'])
        frames.append(frame)
        
    harmonics = (
        pd.concat(frames)
          .sort_values(by=['load', 'y', 'f'], ascending=[True, False, True])
    )
    
    f0 = harmonics.groupby('load').nth(0)
    f1 = harmonics.groupby('load').nth(1)
    peak_features = f0.join(f1, lsuffix='_f0', rsuffix='_f1').reset_index()
    return peak_features


peak_features = extract_peaks(imbalancePSD)
sb.lmplot(x='y_f0', y='y_f1', data=peak_features, fit_reg=False, hue='load_f0', legend=False)
plt.legend(loc='lower right')
plt.show()

In [None]:
sb.lmplot(x='f_f0', y='f_f1', data=peak_features, fit_reg=False, hue='load_f0', legend=False)
plt.legend(loc='lower right')
plt.show()

In [None]:
# Spectral statistics
from scipy.stats import skew, kurtosis

rms = lambda x: np.sqrt((x ** 2).mean())
spectral_centroid = lambda x: np.average(x.index, weights=x)

fd_features = pd.concat([
    imbalancePSD.mean(axis=1).rename('mean'),
    imbalancePSD.std(axis=1).rename('std'),
    imbalancePSD.T.apply(lambda x: skew(x)).rename('skew'),
    imbalancePSD.T.apply(lambda x: kurtosis(x)).rename('kurtosis'),
    imbalancePSD.T.apply(rms).rename('rms'),
    imbalancePSD.T.apply(spectral_centroid).rename('centroid')
    ],
    axis=1
)
fd_features.head(10)

In [None]:
sb.pairplot(fd_features)
plt.show()

In [None]:
fd_plain = fd_features.reset_index()
sb.lmplot(x='rms', y='kurtosis', data=fd_plain, fit_reg=False, hue='load', legend=False)
plt.legend(loc='lower right')
plt.show()

In [None]:
fig = plt.figure(figsize=(5, 5))
ax = fig.add_subplot(projection='3d')
ax.scatter(fd_plain['kurtosis'], fd_plain['centroid'], fd_plain['rms'])

ax.set_xlabel('Kurtosis')
ax.set_ylabel('Centroid')
ax.set_zlabel('RMS')

PSD of severe faults by axis (Welch) - up to 2 kHz (120 000 RPM)

In [None]:

def fft_csv_import_by_axis(zip_file, filename, axis='ax', window=4096, overlap=0.5, fs=50000, is_welch=False):
    STEP = window * overlap

    frame = mafaulda.csv_import(zip_file, filename)
    v = frame[axis].to_numpy()

    if is_welch is False:
        spectra = [
            np.abs(rfft(v[i:i+window] * np.hamming(window)))
            for i in range(0, len(v) - window, int(STEP))
        ]
        freqs = [i * (fs / window) for i in range(window // 2 + 1)]

    else:
        freqs, spectra = welch(v, fs, 'hann', nperseg=window, scaling='spectrum', average='mean')
        spectra = [spectra]


    return (
        pd.DataFrame(data=spectra, columns=freqs.astype(int))
        .assign(name=filename, rpm=frame['rpm'].median())
        .set_index(['name'])
    )


FS = 50000
WINDOW = 2**13
fault_files = [
    'horizontal-misalignment/2.0mm/60.8256.csv',
    'vertical-misalignment/1.90mm/61.44.csv',
    'imbalance/35g/56.7296.csv',
    'normal/61.44.csv',
    'overhang/ball_fault/35g/32.1536.csv',
    'overhang/cage_fault/35g/54.0672.csv',
    'overhang/outer_race/35g/53.4528.csv',
    'underhang/ball_fault/35g/50.7904.csv',
    'underhang/cage_fault/35g/56.5248.csv',
    'underhang/outer_race/35g/58.9824.csv'
]

fft_csv_import_by_axis(ZipFile(MAFAULDA_PATH), 'normal/61.44.csv', 'az', WINDOW, 0.5, mafaulda.FS_HZ, True)

Measurement place A - import worst faults and compare each axis's PSD

In [None]:
zip_file = ZipFile(MAFAULDA_PATH)
faultPSD_X = pd.concat([
    pool.apply_async(fft_csv_import_by_axis, (zip_file, name, 'ax', WINDOW, 0.5, mafaulda.FS_HZ, True)).get()
    for name in tqdm(fault_files)
])

faultPSD_Y = pd.concat([
    pool.apply_async(fft_csv_import_by_axis, (zip_file, name, 'ay', WINDOW, 0.5, mafaulda.FS_HZ, True)).get()
    for name in tqdm(fault_files)
])

faultPSD_Z = pd.concat([
    pool.apply_async(fft_csv_import_by_axis, (zip_file, name, 'az', WINDOW, 0.5, mafaulda.FS_HZ, True)).get()
    for name in tqdm(fault_files)
])

In [None]:
f_cutoff = 2000

faultPSD_X_v = faultPSD_X.drop('rpm', axis=1)
x_psd = (
    faultPSD_X_v
    .T[faultPSD_X_v.T.index < f_cutoff]
)
axis = x_psd.plot(
    subplots=True,
    figsize=(20, 15),
    xlabel='Frequency [Hz]',
    ylabel='Amplitude'
)

# Graph RPM
for ax, rpm  in zip(axis, faultPSD_X['rpm']):
    f0 = rpm / 60
    ax.axvline(x=f0, color='red')
    # n - harmonics of rotating frequency
    n = 6
    for i in range(2, n):
        ax.axvline(x=f0 * i, color='orange')

In [None]:
faultPSD_Y_v = faultPSD_Y.drop('rpm', axis=1)
y_psd = faultPSD_Y_v.T[faultPSD_Y_v.T.index < f_cutoff]
p = y_psd.plot(subplots=True, figsize=(20, 15))

In [None]:
faultPSD_Z_v = faultPSD_Z.drop('rpm', axis=1)
z_psd = faultPSD_Z_v.T[faultPSD_Z_v.T.index < f_cutoff]
p = z_psd.plot(subplots=True, figsize=(20, 15))