# MaFaulDa
https://www02.smt.ufrj.br/~offshore/mfs/page_01.html

In [1]:
from zipfile import ZipFile
from pprint import pprint

import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
import mafaulda as src

MAFAULDA_PATH = '../../datasets/MAFAULDA.zip'

View some file names in machinery database and display their count.

In [None]:
files = src.get_mafaulda_files(ZipFile(MAFAULDA_PATH))
print(f'Measurements: {len(files)}')
pprint(files[:10])

Show details of one measurments

In [3]:
misalign = src.csv_import(ZipFile(MAFAULDA_PATH), 'horizontal-misalignment/1.5mm/16.7936.csv')
misalign.describe()
misalign

Unnamed: 0_level_0,tachometer,ax,ay,az,bx,by,bz,mic,mag_a,mag_b,rev,rpm,key
t,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1
0.00000,4.46390,1.27330,1.297185,0.033427,0.052611,0.057577,0.023704,-0.021927,1.297185,0.057577,False,977.835724,horizontal-misalignment/1.5mm/16.7936.csv
0.00002,4.46570,-0.74724,0.852230,-0.061118,-0.021684,0.032023,-0.077300,0.077566,0.852230,0.032023,False,977.835724,horizontal-misalignment/1.5mm/16.7936.csv
0.00004,4.45350,1.10910,1.197030,-0.014861,0.056175,0.062646,-0.009375,-0.115730,1.197030,0.062646,False,977.835724,horizontal-misalignment/1.5mm/16.7936.csv
0.00006,4.46870,-0.43897,0.622929,-0.091402,-0.016462,0.032701,-0.011475,0.142450,0.622929,0.032701,False,977.835724,horizontal-misalignment/1.5mm/16.7936.csv
0.00008,4.45640,0.49322,0.597385,0.014023,0.022246,0.037437,0.078617,-0.138580,0.597385,0.037437,False,977.835724,horizontal-misalignment/1.5mm/16.7936.csv
...,...,...,...,...,...,...,...,...,...,...,...,...,...
4.74358,-0.87862,-1.98680,2.053727,-0.083187,-1.568500,1.569004,-0.459080,0.361660,2.053727,1.569004,False,977.517107,horizontal-misalignment/1.5mm/16.7936.csv
4.74360,-0.79904,0.85936,0.892822,0.043119,-1.441900,1.442412,-0.368830,-0.231960,0.892822,1.442412,False,977.517107,horizontal-misalignment/1.5mm/16.7936.csv
4.74362,-0.72662,-1.44100,1.524046,-0.038522,-1.514100,1.514641,-0.429730,0.440660,1.524046,1.514641,False,977.517107,horizontal-misalignment/1.5mm/16.7936.csv
4.74364,-0.67869,-0.21970,0.287777,0.068971,-1.451600,1.452055,-0.365110,-0.182140,0.287777,1.452055,False,977.517107,horizontal-misalignment/1.5mm/16.7936.csv


Plot tachometer pulses and calculate revolutions per minute (rpm)

In [None]:
fig, ax = plt.subplots(nrows=1, ncols=2, figsize=(10, 3))
g = misalign['tachometer'][:0.5].plot(ax=ax[0], title='Tachometer', xlabel='Time [s]', ylabel='Impulse')
g = misalign['rpm'].plot(ax=ax[1], title='RPM', xlabel='Time [s]', ylabel='RPM')

Plot histograms for each column to get sense of the distributions

In [None]:
g = misalign.hist(figsize=(15, 10), bins=50)

Plot short segment of time domain signal

In [None]:
x = 1024
l = 4096
g = misalign[['ax', 'ay', 'az']].iloc[x:x+l].plot(subplots=True, figsize=(15, 5), grid=True)

Plot subsampled time domain signal with subsampling factor

In [None]:
factor = 100
g = misalign[['ax', 'ay', 'az']].iloc[::factor, :].plot(subplots=True, figsize=(15, 5), grid=True)

Plot magnitutes of each acceleration vector

In [None]:
g = misalign[['mag_a', 'mag_b']].plot(subplots=True, figsize=(10, 5), grid=True)

In [None]:
src.normality_tests(misalign)

In [None]:
src.axis_spectrograms(misalign)

Do not use magnitude - it is not oscilatory. Various axis have different responses, but the signal in each axis is **stationary**

In [None]:
subsample = 50
src.rms_orbitals(misalign.iloc[::subsample, :])

Load normal conditions

In [None]:
zip_file = ZipFile(MAFAULDA_PATH)
filenames = src.get_mafaulda_files(zip_file)
normal_files = [
    name for name in filenames if name.startswith('normal')
]
normal_cond = src.import_files(zip_file, normal_files, src.csv_import)
normal_cond.head()

Calculate time domain features on normal conditions in one accelerometer axis

In [None]:
td_featues = src.time_domain_features(normal_cond, 'ax')
td_featues.head(10)

In [None]:
#sns.pairplot(td_featues)

In [None]:
sns.heatmap(td_featues.corr(), annot=True)

In [None]:
imbalance = src.csv_import_td_features(zip_file, 'imbalance/10g/56.9344.csv')
#imbalance.to_csv('imbalance_features.csv')
imbalance.reset_index(inplace=True)

In [None]:
fig = plt.figure(figsize=(5, 5))
ax = fig.add_subplot(projection='3d')
ax.scatter(imbalance['mean'], imbalance['std'], imbalance['rms'])

ax.set_xlabel('Mean')
ax.set_ylabel('Standard deviation')
ax.set_zlabel('Amplitude')

plt.show()

In [None]:
from multiprocessing.pool import ThreadPool

pool = ThreadPool(processes=4)
imbalance_files = [
    name for name in filenames if name.startswith('imbalance')
]
WINDOW = 2**14
src.resolution_calc(src.FS_HZ, WINDOW)
spectra = src.fft_csv_import(zip_file, 'imbalance/10g/56.9344.csv', fs=src.FS_HZ, window=WINDOW, overlap=0.5, is_welch=True)
spectra.head(10)

In [None]:
spectra.iloc[0].loc[:500].plot(legend=False)

In [None]:
np.log(spectra.iloc[0]).iloc[:200].plot(legend=False, grid=True)

In [None]:
WINDOW = 2**13
from tqdm.notebook import tqdm

zip_file = ZipFile(MAFAULDA_PATH)
imbalancePSD = pd.concat([
    pool.apply_async(src.fft_csv_import, (zip_file, name, WINDOW, 0.5, src.FS_HZ, True)).get()
    for name in tqdm(imbalance_files)
])

In [None]:
imbalancePSD

In [None]:
imbalancePSD.T[(10, '56.9344.csv')].loc[:500].plot()

In [None]:
bins = imbalancePSD.T[(10, '56.9344.csv')]
peaks, properties = find_peaks(bins, prominence=0.01)
plt.plot(bins.index, bins)
plt.scatter(bins.index[peaks], bins[bins.index[peaks]], color='r')
plt.xlim(0, 1000)
plt.xlabel('Frequency [Hz]')
plt.ylabel('Amplitude')

In [None]:
peak_features = src.extract_peaks(imbalancePSD)
sns.lmplot(x='y_f0', y='y_f1', data=peak_features, fit_reg=False, hue='load_f0', legend=False)
plt.legend(loc='lower right')
plt.show()

In [None]:
sns.lmplot(x='f_f0', y='f_f1', data=peak_features, fit_reg=False, hue='load_f0', legend=False)
plt.legend(loc='lower right')
plt.show()

In [None]:
# Spectral statistics
from scipy.stats import skew, kurtosis

rms = lambda x: np.sqrt((x ** 2).mean())
spectral_centroid = lambda x: np.average(x.index, weights=x)

fd_features = pd.concat([
    imbalancePSD.mean(axis=1).rename('mean'),
    imbalancePSD.std(axis=1).rename('std'),
    imbalancePSD.T.apply(lambda x: skew(x)).rename('skew'),
    imbalancePSD.T.apply(lambda x: kurtosis(x)).rename('kurtosis'),
    imbalancePSD.T.apply(rms).rename('rms'),
    imbalancePSD.T.apply(spectral_centroid).rename('centroid')
    ],
    axis=1
)
fd_features.head(10)

In [None]:
# sns.pairplot(fd_features)

In [None]:
fd_plain = fd_features.reset_index()
sns.lmplot(x='rms', y='kurtosis', data=fd_plain, fit_reg=False, hue='load_f0', legend=False)
plt.legend(loc='lower right')
plt.show()

In [None]:
fig = plt.figure(figsize=(5, 5))
ax = fig.add_subplot(projection='3d')
ax.scatter(fd_plain['kurtosis'], fd_plain['centroid'], fd_plain['rms'])

ax.set_xlabel('Kurtosis')
ax.set_ylabel('Centroid')
ax.set_zlabel('RMS')

In [None]:
# Compare different faults

### PSD of severe faults by axis (Welch) - up to 2 kHz (120 000 RPM)

In [None]:
FS = 50000
WINDOW = 2**13
fault_files = [
    'horizontal-misalignment/2.0mm/60.8256.csv',
    'vertical-misalignment/1.90mm/61.44.csv',
    'imbalance/35g/56.7296.csv',
    'normal/61.44.csv',
    'overhang/ball_fault/35g/32.1536.csv',
    'overhang/cage_fault/35g/54.0672.csv',
    'overhang/outer_race/35g/53.4528.csv',
    'underhang/ball_fault/35g/50.7904.csv',
    'underhang/cage_fault/35g/56.5248.csv',
    'underhang/outer_race/35g/58.9824.csv'
]

src.fft_csv_import_by_axis(ZipFile(MAFAULDA_PATH), 'normal/61.44.csv', 'az', WINDOW, 0.5, src.FS_HZ, True)

### Measurement place A - import worst faults and compare each axis's PSD

In [None]:
zip_file = ZipFile(MAFAULDA_PATH)
faultPSD_X = pd.concat([
    pool.apply_async(src.fft_csv_import_by_axis, (zip_file, name, 'ax', WINDOW, 0.5, src.FS_HZ, True)).get()
    for name in tqdm(fault_files)
])

faultPSD_Y = pd.concat([
    pool.apply_async(src.fft_csv_import_by_axis, (zip_file, name, 'ay', WINDOW, 0.5, src.FS_HZ, True)).get()
    for name in tqdm(fault_files)
])

faultPSD_Z = pd.concat([
    pool.apply_async(src.fft_csv_import_by_axis, (zip_file, name, 'az', WINDOW, 0.5, src.FS_HZ, True)).get()
    for name in tqdm(fault_files)
])

In [None]:
f_cutoff = 2000

faultPSD_X_v = faultPSD_X.drop('rpm', axis=1)
x_psd = (
    faultPSD_X_v
    .T[faultPSD_X_v.T.index < f_cutoff]
)
axis = x_psd.plot(
    subplots=True,
    figsize=(20, 15),
    xlabel='Frequency [Hz]',
    ylabel='Amplitude'
)

# Graph RPM
for ax, rpm  in zip(axis, faultPSD_X['rpm']):
    f0 = rpm / 60
    ax.axvline(x=f0, color='red')
    # n - harmonics of rotating frequency
    n = 6
    for i in range(2, n):
        ax.axvline(x=f0 * i, color='orange')

In [None]:
faultPSD_Y_v = faultPSD_Y.drop('rpm', axis=1)
y_psd = faultPSD_Y_v.T[faultPSD_Y_v.T.index < f_cutoff]
p = y_psd.plot(subplots=True, figsize=(20, 15))

In [None]:
faultPSD_Z_v = faultPSD_Z.drop('rpm', axis=1)
z_psd = faultPSD_Z_v.T[faultPSD_Z_v.T.index < f_cutoff]
p = z_psd.plot(subplots=True, figsize=(20, 15))