In [1]:
import numpy as np
import pandas as pd
import os
import librosa
import librosa.display
import matplotlib.pyplot as plt

In [3]:
audio_path_abnormal = 'C:/Users/gulce/codit_use_case/0_dB_pump/abnormal/00000000.wav'

In [4]:
# Load the example clip
y, sr = librosa.load(audio_path_abnormal)
y, sr

(array([-0.00352098, -0.00223476, -0.00142802, ..., -0.00242872,
         0.00042851,  0.00107594], dtype=float32),
 22050)

In [5]:
# Set the hop length; at 22050 Hz, 512 samples ~= 23ms
hop_length = 512

In [6]:
# Separate harmonics and percussives into two waveforms
y_harmonic, y_percussive = librosa.effects.hpss(y)

In [7]:
y_harmonic, y_percussive

(array([-0.00501095, -0.00389382, -0.00291642, ...,  0.        ,
         0.        ,  0.        ], dtype=float32),
 array([0.00148997, 0.00165907, 0.0014884 , ..., 0.        , 0.        ,
        0.        ], dtype=float32))

In [8]:
# Beat track on the percussive signal
tempo, beat_frames = librosa.beat.beat_track(y=y_percussive,
                                             sr=sr)

In [9]:
tempo, beat_frames

(123.046875,
 array([  3,  21,  39,  60,  81, 101, 120, 140, 161, 183, 205, 225, 246,
        266, 286, 306, 326, 346, 367]))

In [10]:
# Compute MFCC features from the raw signal
mfcc = librosa.feature.mfcc(y=y, sr=sr, hop_length=hop_length, n_mfcc=13)

In [11]:
mfcc.shape

(13, 431)

In [12]:
# And the first-order differences (delta features)
mfcc_delta = librosa.feature.delta(mfcc)

In [13]:
mfcc_delta.shape

(13, 431)

In [14]:
# Stack and synchronize between beat events
# This time, we'll use the mean value (default) instead of median
beat_mfcc_delta = librosa.util.sync(np.vstack([mfcc, mfcc_delta]),
                                    beat_frames)

In [15]:
beat_mfcc_delta.shape

(26, 20)

In [16]:
# Compute chroma features from the harmonic signal
chromagram = librosa.feature.chroma_cqt(y=y_harmonic,
                                        sr=sr)

In [17]:
chromagram.shape

(12, 431)

In [18]:
# Aggregate chroma features between beat events
# We'll use the median value of each feature between beat frames
beat_chroma = librosa.util.sync(chromagram,
                                beat_frames,
                                aggregate=np.median)

In [19]:
beat_chroma.shape

(12, 20)

In [20]:
# Finally, stack all beat-synchronous features together
beat_features = np.vstack([beat_chroma, beat_mfcc_delta])

In [21]:
beat_features.shape

(38, 20)