In [None]:
import pandas as pd
import os
from typing import List, Tuple
from IPython.display import Markdown
import matplotlib.pylab as plt
import numpy as np
from scipy.signal import windows
from datetime import datetime, date
import sys
sys.path.append('../')
from vibrodiagnostics import (
    datasets,
    discovery
)

In [None]:
Fs = 26866  # 26667
path = '../../datasets/Pumps-and-Compressors/'
list_of_files = datasets.steval_files(path)
DATASET = datasets.steval_dataset(list_of_files, Fs)
len(DATASET)

#### Side by side welch spectra from same places on different days

In [None]:
waveforms = []
all_metadata = []
for name, ts in DATASET:
    header = name.split(os.path.sep)
    metadata = {
        'place': header[-5],
        'date': datetime.fromisoformat(header[-4]).date(),
        'device': header[-3],
        'position': header[-2],
        'seq': int(header[-1].split('.')[0]),
        'ts': ts
    }
    metacopy = metadata.copy()
    metacopy.pop('ts')

    all_metadata.append(metacopy)
    waveforms.append(metadata)

all_metadata = pd.DataFrame.from_records(all_metadata)

In [None]:
def side_by_side(
        waveforms: list,
        location: str,
        plot_type: str = 'psd', # psd, psd_db, hist
        axis: str = 'x',
        filt_pos: str | None = None,
        figsize: tuple | None = None,
        ylim: tuple | None = None):
    machines = filter(lambda m: m['place'] == location, waveforms)

    if filt_pos is not None:
        machines = filter(lambda m: filt_pos in m['position'], machines)
        if 'noise' not in filt_pos:
            machines = filter(lambda m: '-noise' not in m['position'], machines)

    machines = list(machines)
    columns = set([(c['device'], c['position']) for c in machines])
    rows = len(machines) // len(columns)

    if figsize is None:
        figsize = (15, 20)
    fig, ax = plt.subplots(rows, len(columns), figsize=figsize, sharey=True)
    if rows == 1:
        ax = [ax]

    for j, col in enumerate(sorted(list(columns))):
        device, position = col
        placements = list(filter(lambda m: m['device'] == device and m['position'] == position, machines))

        ax[-1][j].set_xlabel('Frequency [kHz]')
        
        for i, row in enumerate(sorted(placements, key=lambda m: (m['date'], m['seq']))):
            date, seq = row['date'], row['seq']
            # y = ts['x'].iloc[int(T_WAVEFORM*Fs):int(T_WAVEFORM*Fs+Fs*TIME)]
            # subplot.plot(y.index, y)
            ts = row['ts']
            subplot = ax[i][j]
            if plot_type == 'psd':
                freqs, pxx = discovery.spectral_transform(ts, axis, 2**14, Fs)
                subplot.plot(freqs / 1000, pxx, color='darkblue', linewidth=0.5)
    
            elif plot_type == 'psd_db':
                freqs, pxx = discovery.spectral_transform(ts, axis, 2**14, Fs)
                pxx = dB = 20 * np.log10(pxx / 0.000001) # 1 dB = 1 um/s^2   
                subplot.plot(freqs / 1000, pxx, color='darkblue', linewidth=0.5)

            elif plot_type == 'hist':
                subplot.hist(ts[axis], color='darkblue', bins=100, edgecolor='black', linewidth=0.5)
            
            if ylim is not None:
                subplot.set_ylim(0, ylim)

            subplot.grid(True)
            subplot.set_title(f'{device}, {position} | {date}, {seq}.')

            if plot_type == 'psd':
                subplot.set_xlim(0, 5)
                ax[i][0].set_ylabel('Amplitude [m/s\u00B2]')
    
            elif plot_type == 'psd_db':
                subplot.set_xlim(0, 5)
                ax[i][0].set_ylabel('Amplitude [dB]')
            

    plt.tight_layout()
    plt.show()

# rows: (date - seq) / columns: (device - position)

#### Plot one spectrum in all axis

In [None]:
all_metadata.sort_values(by=['place', 'device', 'position', 'date', 'seq']).reset_index(drop=True)

In [None]:
query = {
    'place': 'SHC3',
    'date': date(2024, 2, 20),
    'device': 'K3',
    'position': '001',
    'seq': 1
}
F_LIMIT_KHZ = 2

signal = filter(
    lambda s: (
        s['place'] == query['place'] and
        s['date'] == query['date'] and
        s['device'] == query['device'] and
        s['position'] == query['position'] and
        s['seq'] == query['seq']
    ),
    waveforms
)
ts = list(signal)[0]['ts']

fig, ax = plt.subplots(3, 1, figsize=(20, 10))

for i, axis in enumerate(('x', 'y', 'z')):
    freqs, pxx = discovery.spectral_transform(ts, axis, 2**14, Fs)
    freqs /= 1000
    ax[i].plot(freqs, pxx, color='darkblue')
    ax[i].fill_between(freqs, pxx, color='lightblue', alpha=0.3)
    ax[i].grid(True)
    ax[i].set_xlim(0, F_LIMIT_KHZ)
    ax[i].set_xlabel('Frequency [kHz]')
    ax[i].set_ylabel('Amplitude [m/s\u00B2]')

### Choose measurement direction

In [None]:
AXIS = 'x'          # x, y, z

### Noise

In [None]:
side_by_side(waveforms, 'BVS', 'psd', AXIS, filt_pos='noise', figsize=(15, 4), ylim=0.5)

In [None]:
side_by_side(waveforms, 'BVS', 'psd_db', AXIS, filt_pos='noise', figsize=(15, 4))

In [None]:
side_by_side(waveforms, 'BVS', 'hist', AXIS, filt_pos='noise', figsize=(15, 4))

### Frequency spectra

In [None]:
side_by_side(waveforms, 'SHC3', 'psd', AXIS, figsize=(15, 20), ylim=1)

In [None]:
side_by_side(waveforms, 'BVS', 'psd', AXIS, filt_pos='MTR', figsize=(15, 10), ylim=0.3)

In [None]:
side_by_side(waveforms, 'BVS', 'psd', AXIS, filt_pos='PMP', figsize=(15, 10), ylim=0.3)

### Decibel frequency spectra

In [None]:
side_by_side(waveforms, 'SHC3', 'psd_db', AXIS, figsize=(15, 20))

In [None]:
side_by_side(waveforms, 'BVS', 'psd_db', AXIS, filt_pos='MTR', figsize=(15, 10))

In [None]:
side_by_side(waveforms, 'BVS', 'psd_db', AXIS, filt_pos='PMP', figsize=(15, 10))

### Histograms of time domain

In [None]:
side_by_side(waveforms, 'SHC3', 'hist', AXIS, figsize=(15, 20))

In [None]:
side_by_side(waveforms, 'BVS', 'hist', AXIS, filt_pos='MTR', figsize=(15, 10))

In [None]:
side_by_side(waveforms, 'BVS', 'hist', AXIS, filt_pos='PMP', figsize=(15, 10))

### Plot each waveform

#### Histograms

In [None]:
for name, ts in DATASET:
    display(Markdown(f'**{name}**'))
    axis = ts.columns
    ax = ts[axis].hist(figsize=(15, 3), grid=True, bins=100, layout=(1, 3), edgecolor='black', linewidth=0.5)
    plt.show()

#### Waveform in full length (60 s)

In [None]:
for name, ts in DATASET:
    display(Markdown(f'**{name}**'))
    axis = ts.columns
    
    ax = ts[axis].plot(figsize=(20, 8), grid=True, subplots=True)
    for i, axname in enumerate(axis):
        ax[i].set_xlabel('Time [s]')
        ax[i].set_ylabel(f'Amplitude ({axname}) [m/s^2]')
    plt.show() 

#### Waveform in detail: T_WAVEFORM s and interval of TIME s

In [None]:
T_WAVEFORM = 10
TIME = 0.1

for name, ts in DATASET:
    axis = ts.columns
    display(Markdown(f'**{name}**'))
    ax = (ts[axis].iloc[int(T_WAVEFORM*Fs):int(T_WAVEFORM*Fs+Fs*TIME)]
                  .plot(figsize=(20, 10), grid=True, subplots=True))
    
    for i, axname in enumerate(axis):
        ax[i].set_xlabel('Time [s]')
        ax[i].set_ylabel(f'Amplitude ({axname}) [m/s^2]')
    plt.show() 

#### Frequency spectrum of one window

In [None]:
NFFT = 2**14
T_SEC = 10
def compare_limited_specrograms(spectrograms: list, axis: str, t: float, dB=False):
    fig, ax = plt.subplots(len(spectrograms), 1, figsize=(20, 80))
    i = 0
    for name, ts in spectrograms:
        signal = ts[axis].loc[t:t+NFFT/Fs].to_numpy()
        n = len(signal)
        pxx = np.abs(np.fft.rfft(signal * windows.hann(n)) / n)
        if dB is True:
            pxx = 20 * np.log10(pxx / 0.000001)  # 1 dB = 1 um/s^2    
        freqs = np.fft.fftfreq(n, d=1/Fs)[:len(pxx)]

        ax[i].plot(freqs, pxx, color='darkblue')
        ax[i].fill_between(freqs, pxx, color='lightblue', alpha=0.3)
        ax[i].grid(True)
        ax[i].set_xlabel('Frequency [Hz]')
        ax[i].set_ylabel('Amplitude [m/s\u00B2]')
        #ax[i].set_xlim(0, F_LIMIT)
        ax[i].set_title(name)
        i += 1

compare_limited_specrograms(DATASET, 'x', T_SEC)
plt.tight_layout()
plt.show()

In [None]:
compare_limited_specrograms(DATASET, 'y', T_SEC)
plt.tight_layout()
plt.show()

In [None]:
compare_limited_specrograms(DATASET, 'z', T_SEC)
plt.tight_layout()
plt.show()

In [None]:
def spectogram(x):
    fig, ax = plt.subplots(figsize=(15, 4))
    cmap = plt.get_cmap('inferno')
    pxx, freqs, t, im = plt.specgram(
        x, NFFT=NFFT, Fs=Fs,
        detrend='mean',
        mode='magnitude', scale='dB',
        cmap=cmap, vmin=-60
    )
    fig.colorbar(im, aspect=20, pad=0.04)
    ax.set_xlabel('Time [s]')
    ax.set_ylabel('Frequency [Hz]')
    return freqs, pxx

def get_spectrograms(DATASET: List[pd.DataFrame], axis: str) -> list:
    spectrograms = []

    for name, ts in DATASET:
        display(Markdown(f'**{name}**'))
        freqs, Pxx = spectogram(ts[axis])
        spectrograms.append((name, freqs, Pxx))
        plt.show()
    
    return spectrograms

In [None]:
x_spectra = get_spectrograms(DATASET, 'x')

#### Calculate welch spectrogram (window average over whole signal)

In [None]:

def welch_spectra(spectrograms: list, axis: str):
    fig, ax = plt.subplots(len(spectrograms), 1, figsize=(20, 80))
    i = 0
    amplitudes = []
    for name, ts in spectrograms:
        freqs, pxx = discovery.spectral_transform(ts, axis, 2**14, Fs)
        amplitudes.append(pxx)
        ax[i].plot(freqs, pxx, color='darkblue')
        ax[i].fill_between(freqs, pxx, color='lightblue', alpha=0.3)
        ax[i].grid(True)
        ax[i].set_xlabel('Frequency [Hz]')
        ax[i].set_ylabel('Amplitude [m/s\u00B2]')
        ax[i].set_title(name)
        i += 1
    return amplitudes

In [None]:
amp_X = welch_spectra(DATASET, 'x')
plt.tight_layout()
plt.show()

In [None]:
amp = welch_spectra(DATASET, 'y')
plt.tight_layout()
plt.show()

In [None]:
amp = welch_spectra(DATASET, 'z')
plt.tight_layout()
plt.show()