# KSB Cloud for BVS pumps

## Recommendations for data collection
1. **Download RMS velocity for entire 5 years** to calculate total hours each pump was on / off
    - According to last year: ksb#1 is more damaged than ksb#7 
    - Indicators (ksb#1 / ksb#7):
        - Ratios of hours in service (0.57 / 0.40)
        - Average RMS vibration velocity in ON state: x (/), y (/), z (/)
    - Risks: damage may be caused by diffent factors than hours in service
2. **Download spectra** from second pump for last year or last 5 years

In [None]:
import os
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import matplotlib.dates as mdates
from tsfel import feature_extraction as ft
from scipy.stats import entropy
import seaborn as sb
import sys
sys.path.append('../')
from vibrodiagnostics import discovery

In [None]:
# RMS velocity in mm/s over year for two pumps: ksb#1 and ksb#7
def input_dataset(filename: str) -> pd.DataFrame:
    path = '../../inspections/pump-station/rms-vibrations'
    table = pd.read_csv(
        os.path.join(path, filename), 
        parse_dates=['Dátum'], dayfirst=True, 
        delimiter=';', decimal=','
    )
    table.rename(columns={
        'Dátum': 'timestamp', 
        'RMS X [mm/s]': 'x',
        'RMS Y [mm/s]': 'y',
        'RMS Z [mm/s]': 'z'
        }, inplace=True)
    table.set_index('timestamp', inplace=True)
    return table

pumps = [input_dataset(name) for name in ('ksb1.csv', 'ksb7.csv')]
pumps

In [None]:
pumps[0].plot(figsize=(20, 5), grid=True, xlabel='Date', ylabel='RMS velocity [mm/s]')

In [None]:
pumps[1].plot(figsize=(20, 5), grid=True, xlabel='Date', ylabel='RMS velocity [mm/s]')

In [None]:
# Merge and plot on-off states for pumps throughout the year
station = pumps[0].join(pumps[1], how='outer', lsuffix='-p1', rsuffix='-p7')
station = station.bfill().dropna()

# Pump is ON if y velocity is greater than mean in mm/s (arbitrary number)
operations = pd.DataFrame(index=station.index)
for col in ('p1', 'p7'):
    station[col] = (station[f'y-{col}'] > station[f'y-{col}'].mean()).astype(int)
station[['p1', 'p7']]

In [None]:
station

In [None]:
station.describe()

In [None]:
ax = station[['p1', 'p7']].plot.area(figsize=(20, 5), xlabel='Date', ylabel='On', color=['#FFD23F', '#EE4266'], ylim=(0, 1))
ax.xaxis.set_major_locator(mdates.MonthLocator(bymonth=range(1, 13)))
ax.xaxis.set_major_formatter(mdates.DateFormatter('%Y-%b'))

In [None]:
intervals = {}
operations = station[['p1', 'p7']]
for col in operations.columns:
    section = pd.DataFrame()
    section['status'] = operations[col] 
    section['switch'] = (operations[col].diff().abs() >= 1).astype(int).cumsum()
    intervals[col] = pd.Series([
        # (group.index.min(), group.index.max(), group.index.max() - group.index.min())
        group.index.max() - group.index.min()
        for pos, group in section[section['status'] == 1].groupby(by='switch')
    ])
intervals

In [None]:
# Stats per pump
# how many hours (avg, min, max) consecutive in operation
# total hours in operation and proportion in % of total days
stats = []
for pump, period in intervals.items():
    stats.append({
        'pump': pump,
        'min': period.min(),
        'max': period.max(),
        'avg': period.mean(),
        'sum': period.sum(),
        'ratio_on_state': period.sum() / (operations.index.max() - operations.index.min()),
    })

stats = pd.DataFrame.from_records(stats).set_index('pump')
stats

In [None]:
# Average RMS velocity in ON state (last year)
# https://stackoverflow.com/questions/70025048/pandas-average-if

average_velocity = []
std_velocity = []
for name in ('p1', 'p7'):
    v = station.loc[station[name] == True, [f'x-{name}', f'y-{name}', f'z-{name}']]
    average_velocity.append(v.mean())
    std_velocity.append(v.std())

average_velocity = pd.concat(average_velocity).to_frame()
std_velocity = pd.concat(std_velocity).to_frame()

ax = average_velocity.plot.bar(
    legend=False,
    ylabel='Velocity [mm/s]',
    xlabel='Axis and Pump',
    title='Average vibration RMS velocity in ON state',
    grid=True,
    yerr=std_velocity
)
plt.show()

In [None]:
# Average velocity per on state (time section)
def sliding_velocity_mean(station: pd.DataFrame, column: str) -> pd.DataFrame:
    rows = []
    for name, group in station.loc[
            station[column] == 1,
            [f'x-{column}', f'y-{column}', f'z-{column}', f'switch-{column}']
        ].groupby(by=f'switch-{column}'):
        point = {
            'timestamp': group.index.min(),
            'x': group[f'x-{column}'].mean(), 
            'y': group[f'y-{column}'].mean(),
            'z': group[f'z-{column}'].mean()
        }
        rows.append(point.copy())
        point['timestamp'] = group.index.max()
        rows.append(point)

    return pd.DataFrame.from_records(rows).set_index('timestamp')

operations = station[['p1', 'p7']]
for col in operations.columns:
    station[f'switch-{col}'] = (operations[col].diff().abs() >= 1).astype(int).cumsum()

sliding_velocity_mean(station, 'p1').join(
    sliding_velocity_mean(station, 'p7'),
    how='outer',
    lsuffix='-p1',
    rsuffix='-p7'
).bfill().plot(
    figsize=(20, 5),
    grid=True,
    marker='s'
)
plt.show()

In [None]:
# Import monthly frequency spectra
samples = {}
fa_path = '../../inspections/pump-station/monthly-frequency-ksb-1'
for filename in os.listdir(fa_path):
    freqs = pd.read_csv(
        os.path.join(fa_path, filename),
        delimiter=';', decimal=','
    )
    freqs.rename(columns={
        'Frequency [Hertz]': 'frequency', 
        'AmplitudeX [mm/s]': 'x',
        'AmplitudeY [mm/s]': 'y',
        'AmplitudeZ [mm/s]': 'z'
    }, inplace=True)
    freqs = freqs.set_index('frequency')
    month = int(filename.split('-')[1]) # pd.to_datetime(name, format='%m-%y')
    samples[month] = freqs

observations = pd.concat(samples).sort_index()
observations

In [None]:
observations['x'].unstack()

In [None]:
# Plot spectra for all months
for axis in ('x', 'y', 'z'):
    observations[axis].unstack().T.plot(
        figsize=(20, 4),
        grid=True,
        xlabel='Frequency [Hz]',
        ylabel=f'Amplitude {axis.upper()} [mm/s]'
    )
    plt.show()

In [None]:
# Correlation among spectra in different dates and same axis
fig, ax = plt.subplots(1, 3, figsize=(20, 5))
for i, x in enumerate(('x', 'y', 'z')):
    sb.heatmap(observations[x].unstack().T.corr(), annot=True, ax=ax[i])
plt.show()

In [None]:
# Plot spectra for one month
month = 1
fig, ax = plt.subplots(3, 1, figsize=(20, 10))
for i, axis in enumerate(('x', 'y', 'z')):
    pxx = observations[axis].unstack().T[month]

    print(f'{axis.upper()}:')
    print(f'\tMax.frequency {pxx.idxmax()} Hz (RPM: {pxx.idxmax() * 60})')
    deltaF = pxx.index.diff().dropna().to_numpy().mean()
    fs = 2 * pxx.index.max()
    print(f'\tResolution: {deltaF} Hz (RPM: {deltaF * 60})')
    print(f'\tWindow length: {int(fs / deltaF)}')

    pxx.plot(
        grid=True,
        xlabel='Frequency [Hz]',
        ylabel=f'Amplitude {axis.upper()} [mm/s]',
        ax=ax[i],
        color='darkblue'
    )
plt.tight_layout()
plt.show()

In [None]:
# Calculate features
def calc_features(df: pd.DataFrame, axis: str) -> pd.DataFrame:
    features = []
    pxx_prev = np.array([])

    for month, pxx in df[axis].unstack().iterrows():
        f = np.array(pxx.index)
        pxx = np.array(pxx)
        envelope_spectrum = discovery.envelope_signal(f, pxx)
        row = {
            'month': month,
            'centroid': np.average(f, weights=pxx),
            'std': ft.calc_std(pxx),
            'skewness': ft.skewness(pxx),
            'kurtosis': ft.kurtosis(pxx),
            'roll_on': discovery.spectral_roll_off_frequency(f, pxx, 0.05),
            'roll_off': discovery.spectral_roll_off_frequency(f, pxx, 0.85),
            'noisiness': discovery.signal_to_noise(pxx),
            'flux': (1 - np.corrcoef(pxx_prev, pxx))[0,1] if len(pxx_prev) == len(pxx) else np.nan,
            'energy': discovery.energy(pxx),
            'entropy': entropy(pxx / np.sum(pxx)),
            'negentropy': discovery.negentropy(envelope_spectrum)
        }
        features.append(row)
        pxx_prev = pxx

    return pd.DataFrame.from_records(features).set_index('month')

features = {
    'x': calc_features(observations, 'x'),
    'y': calc_features(observations, 'y'),
    'z': calc_features(observations, 'z')
}
features = pd.concat(features)
features


In [None]:
# Compute magnitude of features
v = features.unstack().T # na magnitude
featues_mag = (np.sum(v ** 2, axis=1) ** (1 / 2)).unstack().T
featues_mag

In [None]:
#sb.pairplot(features)

In [None]:
# Get time domain signal by inverse fourier transform
# If max f = 1 kHz, sampling frequency was 2 kHz
months = {}
for month, group in observations.groupby(level=0):
    pxx = group.droplevel(0)
    signal = np.fft.irfft(pxx, axis=0)
    fs = pxx.index.max() * 2

    ts = pd.DataFrame(signal, columns=['x', 'y', 'z'])
    ts.index = ts.index * (1 / fs)
    months[month] = ts

waveforms = pd.concat(months)
waveforms

In [None]:
waveforms.T[1].T.plot(
    grid=True,
    xlabel='Time [s]',
    ylabel='Amplitude',
    ylim=(-0.03, 0.03),
    subplots=True,
    figsize=(10, 7)
)
plt.show()