In [1]:
import numpy as np
import pandas as pd
import os
import librosa
import librosa.display
import matplotlib.pyplot as plt

In [2]:
audio_path = 'C:/Users/gulce/codit_use_case/0_dB_pump/normal/normal/00000000.wav'

In [3]:
# Load the example clip
y, sr = librosa.load(audio_path)
y, sr

(array([-0.00828569, -0.00836229, -0.00613663, ..., -0.00380668,
        -0.00376435, -0.00208453], dtype=float32),
 22050)

In [4]:
# Set the hop length; at 22050 Hz, 512 samples ~= 23ms
hop_length = 512

In [5]:
# Separate harmonics and percussives into two waveforms
y_harmonic, y_percussive = librosa.effects.hpss(y)

In [6]:
y_harmonic, y_percussive

(array([-0.00648977, -0.00574145, -0.00487929, ...,  0.        ,
         0.        ,  0.        ], dtype=float32),
 array([-0.00179592, -0.00262084, -0.00125733, ...,  0.        ,
         0.        ,  0.        ], dtype=float32))

In [7]:
# Beat track on the percussive signal
tempo, beat_frames = librosa.beat.beat_track(y=y_percussive,
                                             sr=sr)

In [8]:
tempo, beat_frames

(112.34714673913044,
 array([  3,  23,  42,  63,  88, 108, 130, 151, 174, 196, 219, 242, 266,
        290, 313, 336, 358, 382]))

In [9]:
# Compute MFCC features from the raw signal
mfcc = librosa.feature.mfcc(y=y, sr=sr, hop_length=hop_length, n_mfcc=13)

In [22]:
mfcc.shape

(13, 431)

In [11]:
# And the first-order differences (delta features)
mfcc_delta = librosa.feature.delta(mfcc)

In [23]:
mfcc_delta.shape

(13, 431)

In [13]:
# Stack and synchronize between beat events
# This time, we'll use the mean value (default) instead of median
beat_mfcc_delta = librosa.util.sync(np.vstack([mfcc, mfcc_delta]),
                                    beat_frames)

In [24]:
beat_mfcc_delta.shape

(26, 19)

In [15]:
# Compute chroma features from the harmonic signal
chromagram = librosa.feature.chroma_cqt(y=y_harmonic,
                                        sr=sr)

In [25]:
chromagram.shape

(12, 431)

In [17]:
# Aggregate chroma features between beat events
# We'll use the median value of each feature between beat frames
beat_chroma = librosa.util.sync(chromagram,
                                beat_frames,
                                aggregate=np.median)

In [26]:
beat_chroma.shape

(12, 19)

In [19]:
# Finally, stack all beat-synchronous features together
beat_features = np.vstack([beat_chroma, beat_mfcc_delta])

In [27]:
beat_features.shape

(38, 19)