# KSB Guard Monitoring for BVS pumps

In [None]:
import os
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import matplotlib.dates as mdates
import seaborn as sb

plt.rcParams.update({'font.size': 15})
PATH = '../datasets/ksb-cloud/'
fa_path = os.path.join(PATH, 'fft-pumps')

RMS velocity in mm/s over peroid of one year for two pumps: ksb#1 and ksb#7

In [None]:
def input_dataset(filename: str) -> pd.DataFrame:
    path = os.path.join(PATH, 'rms-vibrations')
    table = pd.read_csv(
        os.path.join(path, filename), 
        parse_dates=['Dátum'], dayfirst=True, 
        delimiter=';', decimal=','
    )
    table.rename(columns={
        'Dátum': 'timestamp', 
        'RMS X [mm/s]': 'x',
        'RMS Y [mm/s]': 'y',
        'RMS Z [mm/s]': 'z'
        }, inplace=True)
    table.set_index('timestamp', inplace=True)
    return table

pumps = [input_dataset(name) for name in ('ksb1.csv', 'ksb7.csv')]
pumps

Levels of vibrations are for class III machines (large rigid foundation) with velocities in mm/s

In [None]:
levels = [
    {
        'name': 'good (A)',
        'velocity': 0,
        'color': 'green'
    },
    {
        'name': 'satisfactory (B)',
        'velocity': 1.8,
        'color': 'yellow'
    },
    {
        'name': 'unsatisfactory (C)',
        'velocity': 4.5,
        'color': 'orange'
    },
    {
        'name': 'unacceptable (D)',
        'velocity': 11.2,
        'color': 'red'
    }
]

ax = pumps[0].plot(figsize=(12, 6), grid=True, xlabel='Date', ylabel='RMS velocity [mm/s]', lw=1)
ax.set_ylim(0, 5)
ax.get_legend().remove()
for i in range(0, len(levels) - 1):
    y0 = levels[i]['velocity']
    y1 = levels[i+1]['velocity']
    ax.axhspan(y0, y1, color=levels[i]['color'], label=levels[i]['name'], alpha=0.2)
plt.show()

In [None]:
ax = pumps[1].plot(figsize=(12, 6), grid=True, xlabel='Date', ylabel='RMS velocity [mm/s]', lw=1)
ax.set_ylim(0, 5)
ax.get_legend().remove()
for i in range(0, len(levels) - 1):
    y0 = levels[i]['velocity']
    y1 = levels[i+1]['velocity']
    ax.axhspan(y0, y1, color=levels[i]['color'], label=levels[i]['name'], alpha=0.2)
ax.legend()

Merge and graph on-off states for pumps throughout the year. Pump is ON if y velocity is greater than mean in mm/s

In [None]:
station = pumps[0].join(pumps[1], how='outer', lsuffix='-p1', rsuffix='-p7')
station = station.bfill().dropna()

operations = pd.DataFrame(index=station.index)
for col in ('p1', 'p7'):
    station[col] = (station[f'y-{col}'] > station[f'y-{col}'].mean()).astype(int)
station[['p1', 'p7']]

In [None]:
station

In [None]:
station.describe()

In [None]:
ax = station[['p1', 'p7']].plot.area(
    figsize=(20, 5),
    xlabel='Date',
    ylabel='On',
    color=['#FFD23F', '#EE4266'],
    ylim=(0, 1)
)
ax.xaxis.set_major_locator(mdates.MonthLocator(bymonth=range(1, 13)))
ax.xaxis.set_major_formatter(mdates.DateFormatter('%Y-%b'))

In [None]:
intervals = {}
operations = station[['p1', 'p7']]
for col in operations.columns:
    section = pd.DataFrame()
    section['status'] = operations[col] 
    section['switch'] = (operations[col].diff().abs() >= 1).astype(int).cumsum()
    intervals[col] = pd.Series([
        group.index.max() - group.index.min()
        for pos, group in section[section['status'] == 1].groupby(by='switch')
    ])
intervals

Statistics of individual pumps
- how many  consecutive hours (avg, min, max) is in service
- total hours in service and proportion in % of total days

In [None]:
stats = []
for pump, period in intervals.items():
    stats.append({
        'pump': pump,
        'min': period.min(),
        'max': period.max(),
        'avg': period.mean(),
        'sum': period.sum(),
        'ratio_on_state': period.sum() / (operations.index.max() - operations.index.min()),
    })

stats = pd.DataFrame.from_records(stats).set_index('pump')
stats

Average RMS velocity in ON state (last year)

In [None]:
average_velocity = []
std_velocity = []
for name in ('p1', 'p7'):
    v = station.loc[station[name] == True, [f'x-{name}', f'y-{name}', f'z-{name}']]
    average_velocity.append(v.mean())
    std_velocity.append(v.std())

average_velocity = pd.concat(average_velocity).to_frame()
std_velocity = pd.concat(std_velocity).to_frame()

ax = average_velocity.plot.bar(
    legend=False,
    ylabel='Velocity [mm/s]',
    xlabel='Axis and Pump',
    title='Average vibration RMS velocity in ON state',
    grid=True,
    yerr=std_velocity
)
plt.show()

Average velocity per ON state (time section)

In [None]:
def sliding_velocity_mean(station: pd.DataFrame, column: str) -> pd.DataFrame:
    rows = []
    for name, group in station.loc[
            station[column] == 1,
            [f'x-{column}', f'y-{column}', f'z-{column}', f'switch-{column}']
        ].groupby(by=f'switch-{column}'):
        point = {
            'timestamp': group.index.min(),
            'x': group[f'x-{column}'].mean(), 
            'y': group[f'y-{column}'].mean(),
            'z': group[f'z-{column}'].mean()
        }
        rows.append(point.copy())
        point['timestamp'] = group.index.max()
        rows.append(point)

    return pd.DataFrame.from_records(rows).set_index('timestamp')

operations = station[['p1', 'p7']]
for col in operations.columns:
    station[f'switch-{col}'] = (operations[col].diff().abs() >= 1).astype(int).cumsum()

sliding_velocity_mean(station, 'p1').join(
    sliding_velocity_mean(station, 'p7'),
    how='outer',
    lsuffix='-p1',
    rsuffix='-p7'
).bfill().plot(
    figsize=(20, 5),
    grid=True,
    marker='s'
)
plt.show()

Import monthly frequency spectra

In [None]:
samples = {}
for filename in os.listdir(fa_path):
    freqs = pd.read_csv(
        os.path.join(fa_path, filename),
        delimiter=';', decimal=','
    )
    freqs.rename(columns={
        'Frequency [Hertz]': 'frequency', 
        'AmplitudeX [mm/s]': 'x',
        'AmplitudeY [mm/s]': 'y',
        'AmplitudeZ [mm/s]': 'z'
    }, inplace=True)
    freqs = freqs.set_index('frequency')
    month = int(filename.split('-')[1])
    samples[month] = freqs

observations = pd.concat(samples).sort_index()
observations

In [None]:
observations['x'].unstack()

Graph of spectra for all months

In [None]:
for axis in ('x', 'y', 'z'):
    observations[axis].unstack().T.plot(
        figsize=(20, 4),
        grid=True,
        xlabel='Frequency [Hz]',
        ylabel=f'Amplitude {axis.upper()} [mm/s]'
    )
    plt.show()

Correlation among spectra in different dates and same axis

In [None]:
fig, ax = plt.subplots(1, 3, figsize=(20, 5))
for i, x in enumerate(('x', 'y', 'z')):
    sb.heatmap(observations[x].unstack().T.corr(), annot=True, ax=ax[i])

fig.tight_layout()
plt.show()

Plot spectra for one month

In [None]:
month = 1
fig, ax = plt.subplots(3, 1, figsize=(20, 10))
for i, axis in enumerate(('x', 'y', 'z')):
    pxx = observations[axis].unstack().T[month]

    print(f'{axis.upper()}:')
    print(f'\tMax.frequency {pxx.idxmax()} Hz (RPM: {pxx.idxmax() * 60})')
    deltaF = pxx.index.diff().dropna().to_numpy().mean()
    fs = 2 * pxx.index.max()
    print(f'\tResolution: {deltaF} Hz (RPM: {deltaF * 60})')
    print(f'\tWindow length: {int(fs / deltaF)}')

    pxx.plot(
        grid=True,
        xlabel='Frequency [Hz]',
        ylabel=f'Amplitude {axis.upper()} [mm/s]',
        ax=ax[i],
        color='darkblue'
    )
plt.tight_layout()
plt.show()

Trial to get time domain signal by inverse fourier transform
- If max f = 1 kHz, sampling frequency could have been 2 kHz

In [None]:
months = {}
for month, group in observations.groupby(level=0):
    pxx = group.droplevel(0)
    signal = np.fft.irfft(pxx, axis=0)
    fs = pxx.index.max() * 2

    ts = pd.DataFrame(signal, columns=['x', 'y', 'z'])
    ts.index = ts.index * (1 / fs)
    months[month] = ts

waveforms = pd.concat(months)
waveforms

In [None]:
waveforms.T[1].T.plot(
    grid=True,
    xlabel='Time [s]',
    ylabel='Amplitude',
    ylim=(-0.03, 0.03),
    subplots=True,
    figsize=(10, 7)
)
plt.show()

Frequency spectra subplots

In [None]:
samples = {}

for filename in os.listdir(fa_path):
    freqs = pd.read_csv(
        os.path.join(fa_path, filename),
        delimiter=';', decimal=','
    )
    freqs.rename(columns={
        'Frequency [Hertz]': 'frequency', 
        'AmplitudeX [mm/s]': 'x',
        'AmplitudeY [mm/s]': 'y',
        'AmplitudeZ [mm/s]': 'z'
    }, inplace=True)
    freqs = freqs.set_index('frequency')
    samples[filename] = freqs

observations = pd.concat(samples)
observations

Plot spectra for all months

In [None]:
for axis in ('x', 'y', 'z'):
    observations[axis].unstack().T.plot(
        figsize=(20, 4),
        grid=True,
        xlabel='Frequency [Hz]',
        ylabel=f'Amplitude {axis.upper()} [mm/s]'
    )
    plt.show()