In [None]:
import seaborn
import numpy as np
import scipy, matplotlib.pyplot as plt, IPython.display as ipd
import librosa, librosa.display
from pathlib import Path

kicks = [librosa.load(p)[0] for p in Path().glob('Kicks/Kick*.wav')]
snares = [librosa.load(p)[0] for p in Path().glob('Snares/Snare*.wav')]

def extract_features(signal):
    return [
        librosa.feature.zero_crossing_rate(signal)[0,0],
        librosa.feature.spectral_centroid(y=signal)[0,0]
    ]

kick_features = np.array([extract_features(x) for x in kicks])
snare_features = np.array([extract_features(x) for x in snares])

snare_features.shape

In [None]:
from sklearn.preprocessing import MinMaxScaler

feature_table = np.vstack((kick_features, snare_features))
feature_table.shape

scaler = MinMaxScaler(feature_range=(-1,1))
training_features = scaler.fit_transform(feature_table)
training_features[:,0]

plt.scatter(training_features[:52,0],training_features[:52,1],c='b')
plt.scatter(training_features[42:,0],training_features[42:,1],c='r')


In [None]:
x,sr = librosa.load('funkydrummer.wav')
onset_times = librosa.onset.onset_detect(y=x, sr=sr,units='time')
print(onset_frames) # frame numbers of estimated onsets

In [None]:
S = librosa.stft(x)
logS = librosa.amplitude_to_db(abs(S))

plt.figure(figsize=(14,5))
librosa.display.specshow(logS,x_axis="time",y_axis='log',cmap='Reds')
plt.vlines(onset_times,0,10000,color='blue')

In [None]:
plt.figure(figsize=(14, 5))
librosa.display.waveshow(x[:30000], sr=sr)
plt.vlines(onset_times[:20], -0.8, 0.79, color='r', alpha=0.5)

In [None]:
clicks = librosa.clicks(times=onset_times,sr=sr,length=len(x))

ipd.Audio(x+clicks,rate=sr)