In [None]:
import os
import os.path
import numpy as np
import pandas as pd
from datetime import datetime
import matplotlib.pyplot as plt

from scipy.fft import fft, ifft, rfft, irfft
from ssqueezepy import ssq_cwt, ssq_stft
from scipy.stats import skew, kurtosis


directories = (
    '../../datasets/NASA-Bearing-Dataset/No.1',
    #'NASA-Bearing-Dataset/No.2',
    #'NASA-Bearing-Dataset/No.3'
)
columns = ['b1_x', 'b1_y', 'b2_x', 'b2_y', 'b3_x', 'b3_y', 'b4_x', 'b4_y']
blocks = []

for directory in directories:
    for record in os.listdir(directory)[::8]:
        filename = os.path.join(directory, record)
        df = pd.read_csv(filename,  sep='\t', header=None, names=columns)
        df['t'] = df.index * (1 / 20000)
        df['timestamp'] = datetime.strptime(record, '%Y.%m.%d.%H.%M.%S')
        blocks.append(df)


bearings = pd.concat(blocks, join='inner')
bearings

In [None]:
bearing_mean = bearings.groupby(by=['timestamp']).mean()
bearing_mean[columns].plot(
    figsize=(20, 6), grid=True, xlabel='Date', ylabel='Mean [g]'
)
bearing_mean

In [None]:
bearing_rms = bearings.groupby(by=['timestamp']).apply(
    lambda x: np.sqrt((x ** 2).mean())
)
bearing_rms[columns].plot(figsize=(20, 6), grid=True, ylim=(0.1, 0.3), xlabel='Date', ylabel='RMS [g]')
bearing_rms

In [None]:
bearing_std = bearings.groupby(by=['timestamp']).apply(
    lambda x: x.std()
)
bearing_std[columns].plot(figsize=(20, 6), grid=True, ylim=(0, 0.3), xlabel='Date', ylabel='Standard deviation [g]')
bearing_std

In [None]:
bearing_pp = bearings.groupby(by=['timestamp']).apply(
    lambda x: x.max() - x.min()
)
bearing_pp.plot(figsize=(20, 6), grid=True, xlabel='Date', ylabel='Peak-to-peak [g]')
bearing_pp

In [None]:
tm = datetime(2003, 11, 9)
bearing_good_std = bearing_std[bearing_std.index <= tm]
bearing_bad_std = bearing_std[bearing_std.index > tm]

bearing_good_mean = bearing_mean[bearing_mean.index <= tm]
bearing_bad_mean = bearing_mean[bearing_mean.index > tm]

plt.scatter(bearing_good_std['b3_x'], bearing_good_mean['b3_x'], s=4)
plt.scatter(bearing_bad_std['b3_x'], bearing_bad_mean['b3_x'], s=4)
plt.grid()
plt.xlim(0, 0.3)
plt.ylim(-0.2, 0.3)

In [None]:
profile = bearings[bearings['timestamp'] == datetime.fromisoformat('2003-10-22T12:06:24')]
m = profile[['b3_x', 'b3_y']].rolling(window=64).apply(lambda x: x.abs().max())
plt.scatter(m['b3_x'], m['b3_y'], s=2)

fault = bearings[bearings['timestamp'] == datetime.fromisoformat('2003-11-25T23:39:56')]
n = fault[['b3_x', 'b3_y']].rolling(window=64).apply(lambda x: x.abs().max())
plt.scatter(n['b3_x'], n['b3_y'], s=2)

In [None]:
#m[(m['b1_x'] > 0.5) & (m['b1_y'] > 0.5)]

In [None]:
fig, ax = plt.subplots(2, 1, figsize=(20, 8))

FS = 20000
WINDOW = 2**14 

pxx, freq, t, cax = ax[0].specgram(
    np.hypot(profile['b3_x'], profile['b3_y']),
    Fs=FS, 
    mode='magnitude',
    window=np.hamming(WINDOW), 
    NFFT=WINDOW, 
    noverlap=WINDOW//2
)

pxx, freq, t, cax = ax[1].specgram(
    np.hypot(fault['b3_x'], fault['b3_y']),
    Fs=FS, 
    mode='magnitude',
    window=np.hamming(WINDOW), 
    NFFT=WINDOW, 
    noverlap=WINDOW//2
)

ax[0].set_xlabel('Time [s]')
ax[0].set_ylabel('Frequency [Hz]')
plt.colorbar(cax, ax=ax)

In [None]:
WINDOW = 2 ** 12
OVERLAP = 0.5
STEP = WINDOW * OVERLAP

v = np.hypot(fault['b3_x'], fault['b3_y']).to_numpy()

# Frekvečné spektrum v okná
freqs = [int(i * (FS/WINDOW)) for i in range(WINDOW//2+1)] 
matrix = [
    np.abs(
        rfft(v[i:i+WINDOW] * np.hamming(WINDOW)) 
    )
    for i in range(0, len(v) - WINDOW, int(STEP))
]
tf_spectrum = pd.DataFrame(data=matrix, columns=freqs)
tf_spectrum.index = (tf_spectrum.index * STEP) / FS
tf_spectrum

In [None]:
slicetf = tf_spectrum.iloc[2]
slicetf.plot(
    figsize=(20, 6), 
    xlabel='Frequency [Hz]', 
    ylabel='Amplitude', 
    logy=True
)

In [None]:
src = tf_spectrum.iloc[2]

fig, ax = plt.subplots(2, 1, figsize=(20, 10))
ax[0].plot(np.convolve(src, np.ones(9) / 9, mode='full'))
ax[0].set_yscale('log')
ax[0].grid()

y = np.abs(irfft(20 * np.log(src.to_numpy() / 100))) 
ax[1].plot(y)
ax[1].set_ylim(0, 1)

In [None]:
# Smooth spectrum with its fourier transform


In [None]:
def viz(x, Tx, Wx):
    plt.imshow(np.abs(Wx), aspect='auto', cmap='turbo')
    plt.show()
    plt.imshow(np.abs(Tx), aspect='auto', vmin=0, vmax=.2, cmap='turbo')
    plt.show()
    
xo = np.hypot(fault['b3_x'], fault['b3_y']).to_numpy()

Twxo, Wxo, *_ = ssq_cwt(xo)
viz(xo, Twxo, Wxo)

Tsxo, Sxo, *_ = ssq_stft(xo)
viz(xo, np.flipud(Tsxo), np.flipud(Sxo))

In [None]:
FS = 20000
WINDOW = 2 ** 14
OVERLAP = 0.5
STEP = WINDOW * OVERLAP

spectra = []
for index, measure in bearings.groupby(by='timestamp'):
    v = np.hypot(measure['b3_x'], measure['b3_y']).to_numpy()
    s = [
        np.abs(rfft(v[i:i+WINDOW] * np.hamming(WINDOW)))
        for i in range(0, len(v) - WINDOW, int(STEP))
    ]
    spectra.extend(s)

freqs = [int(i * (FS/WINDOW)) for i in range(WINDOW//2+1)]
psd = pd.DataFrame(data=spectra, columns=freqs)
psd

In [None]:
src = psd.iloc[0]
src.plot(
    figsize=(20, 6), 
    xlabel='Frequency [Hz]', 
    ylabel='Amplitude', 
    logy=True
)

In [None]:
fig, ax = plt.subplots(1, 1, figsize=(20, 5))
ax.plot(np.convolve(src, np.ones(15) / 15, mode='full'))
ax.set_yscale('log')
ax.set_ylim(2, 30)
ax.grid()

In [None]:
max_freq_index = psd.iloc[:,2:].to_numpy().argmax(axis=1)
max_freqs = max_freq_index * (FS / WINDOW)
# zrátaj druhú najvýznamnejšiu frekv. - v riadku frekvencie zoradené podľa ich amplitúdy
# potom takto zrátaj spectral flatness
plt.plot(max_freqs, marker='o', markersize=3, linestyle='None')

In [None]:
from scipy.signal import find_peaks

# Nájdi najvýznamnejšie frekvencie zoradené od najväčšej amplitúdy
# Z jedného frekvenčného spektra
x = 20 * np.log(psd.iloc[2].to_numpy() / 100)
peaks, properties = find_peaks(x, prominence=50, width=1, distance=20)
plt.plot(x)
plt.plot(peaks, x[peaks], 'o', markersize=3)

pd.DataFrame(
    data=np.array([(peaks * (FS / WINDOW)), x[peaks]]).T,
    columns=['f', 'amp']
).sort_values(by='amp', ascending=False)

In [None]:
# Nájdenie troch najvýznamnejších frekvencií
# Riadok (i, f1, f2, f3)

dbPSD = 20 * np.log(psd / 100)
freqs = []
for index, bins in dbPSD.iterrows():
    peaks, properties = find_peaks(bins, prominence=50, width=1, distance=20)
    f_max = pd.DataFrame(
        data=np.array([(peaks * (FS / WINDOW)), x[peaks]]).T,
        columns=['f', 'amp']
    ).sort_values(by='amp', ascending=False)

    freqs.append(
        f_max.iloc[:2,0].to_numpy()
    )
    
freqs = np.array(freqs)
    
plt.scatter(freqs.T[0], freqs.T[1], s=3)

In [None]:
from scipy.stats.mstats import gmean

spectral_flatness = []
for index, bins in psd.iterrows():
    sf = gmean(bins) / np.mean(bins)
    spectral_flatness.append(sf)

plt.grid()
plt.plot(spectral_flatness)

# Change detection veličín (algoritmus)

In [None]:
# TODO: štatistika na oknách v času (rolling)

plt.plot(np.mean(psd, axis=1), label='mean')
plt.plot(np.std(psd, axis=1), label='std')
plt.plot(skew(psd, axis=1), label='skew')
#plt.plot(kurtosis(psd, axis=1), label='kurtosis')
plt.legend(); plt.grid()
#plt.plot(psd.std(axis=1), label='std')