In [3]:
import librosa
import librosa.display
import numpy as np


In [4]:
CNN_INPUT_SIZE = (128, 20)
DEFAULT_SR = 22050

In [5]:

def resize_axis(array, N):
    if(array.shape[1] > N):
        resized = array[:,:N]
    else:
        resized = np.lib.pad(array, ((0,0),(0,N - array.shape[1])),\
            'constant', constant_values=(np.min(array)))
    return resized

In [6]:
def extract_cnn_input(raw_audio):
    frame_length = min(2048, len(raw_audio))
    mel_spec = librosa.core.power_to_db(librosa.feature.melspectrogram(
        y=raw_audio, sr=DEFAULT_SR, n_fft=frame_length,
        hop_length=frame_length//4, n_mels=CNN_INPUT_SIZE[0])
    )
    # Truncate number of frames stored
    m = min(CNN_INPUT_SIZE[1], mel_spec.shape[1])
    N =20
    mell = resize_axis(mel_spec[:, 0:m], N)
    return mell

---

In [7]:
MIN_LEN = 6
MAX_LEN = 14

def len_profit(y):
    return True if len(y[0]) >= MIN_LEN else False

def chg_len(y):
    if len(y) > MAX_LEN:
        return y[:MAX_LEN]
    else:
        for i in range(len(y), MAX_LEN):
            y = np.append(y, np.array([0]), axis=0)
        return y
    

In [8]:
import os

def onehot(str):
    path = './Drum'
    kits = os.listdir(path)
    #kits.remove('.DS_Store')
    
    oh = []

    for kit in kits:
        if str == kit:
            oh.append(1)
        else:
            oh.append(0)
    
    return np.array(oh)

In [9]:
import warnings as w
w.filterwarnings('ignore')


import pandas as pd

data = []
drumkit_path = './Drum'
kits = os.listdir(drumkit_path)
#kits = kits.remove('.DS_Store')

for kit in kits:
    path = os.path.join(drumkit_path, kit)
    sounds = os.listdir(path)
    #sounds = sounds.remove('.DS_Store')

    for sound in sounds:
        wavfile = os.path.join(path, sound)
        # load
        y, sr = librosa.load(wavfile)
        # trim
        yt, index = librosa.effects.trim(y=y, top_db=30)
        # normalize
        yt = librosa.util.normalize(yt)
        # gonna use data whose len is at least MIN_LEN
        imsi_zcr = librosa.feature.zero_crossing_rate(yt)

        if len_profit(imsi_zcr):
            # type
            typ = onehot(kit)
            # Duration
            dur = librosa.get_duration(yt)
            durr = np.array([dur])
            # Zero Crossing Rate
            zcr = librosa.feature.zero_crossing_rate(yt)
            zcr = chg_len(zcr[0])
            # Spectral Flatness
            flt = librosa.feature.spectral_flatness(yt)
            flt = chg_len(flt[0])
            # Spectral Bandwidth
            bdw = librosa.feature.spectral_bandwidth(yt)
            bdw = chg_len(bdw[0])
            # Spectral RollOff
            rlf = librosa.feature.spectral_rolloff(yt, roll_percent=0.8)
            rlf = chg_len(rlf[0])
            # --------
            # Mel Spectogram
            # mel = extract_cnn_input(yt)
            # Constant-Q Power
            # cqt = np.abs(librosa.cqt(yt))
            # --------
            # Concat
            feature = np.concatenate((typ, durr, zcr, flt, bdw, rlf))
            data.append(feature)

feature_dataset = pd.DataFrame(data)
#feature_dataset.to_csv('Feature_Dataset.csv', index = False)

./Drum\Conga&Bongo\00_BngLClsdSlp_SP_10_01.wav
./Drum\Conga&Bongo\00_BongoHiOpen_SP_10_01.wav
./Drum\Conga&Bongo\00_BongoHiTip_SP_10_02.wav
./Drum\Conga&Bongo\00_BongoLoOpenSlap_SP_10_02 (1).wav
./Drum\Conga&Bongo\00_BongoLoOpenSlap_SP_10_02.wav
./Drum\Conga&Bongo\00_BongoLoOpen_SP_10_01.wav
./Drum\Conga&Bongo\00_BongoLoTip_SP_10_01.wav
./Drum\Conga&Bongo\00_CngHClsdSlp_SP_10_02.wav
./Drum\Conga&Bongo\00_CongaHiPalm_SP_10_02.wav
./Drum\Conga&Bongo\00_CongaHiSlide_SP_10_02.wav
./Drum\Conga&Bongo\00_CongaLoOpen_SP_10_01.wav
./Drum\Conga&Bongo\00_CongaLoOpen_SP_10_02.wav
./Drum\Conga&Bongo\48_MemphisConga_813.wav
./Drum\Conga&Bongo\51_CarpenterBongoShort_802.wav
./Drum\Conga&Bongo\626_14_HiConga_340.wav
./Drum\Conga&Bongo\Bm_SaladSpinnerBongo_02_631.wav
./Drum\Conga&Bongo\Bm_VaseBongoLow_01_604.wav
./Drum\Conga&Bongo\Bngo_1.wav
./Drum\Conga&Bongo\Bngo_2.wav
./Drum\Conga&Bongo\Bngo_3.wav
./Drum\Conga&Bongo\Bngo_4.wav
./Drum\Conga&Bongo\BongoLowOpenQuiet_SP_223_01.wav
./Drum\Conga&Bongo\Bon

---

In [10]:
feature_dataset

Unnamed: 0,0,1,2,3,4,5,6,7,8,9,...,54,55,56,57,58,59,60,61,62,63
0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.139320,0.017578,0.027832,...,829.028320,1076.660156,1141.259766,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000
1,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.139320,0.041016,0.061035,...,2002.587891,2357.885742,2573.217773,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000
2,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.139320,0.025879,0.035645,...,1679.589844,1711.889648,1475.024414,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000
3,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.232200,0.024902,0.037598,...,829.028320,818.261719,559.863281,559.863281,559.863281,785.961914,1711.889648,0.000000,0.000000,0.000000
4,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.232200,0.024902,0.037598,...,829.028320,818.261719,559.863281,559.863281,559.863281,785.961914,1711.889648,0.000000,0.000000,0.000000
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
673,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.232200,0.023926,0.027832,...,506.030273,366.064453,236.865234,183.032227,172.265625,204.565430,1195.092773,0.000000,0.000000,0.000000
674,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.232200,0.010742,0.013672,...,269.165039,258.398438,215.332031,236.865234,204.565430,204.565430,236.865234,0.000000,0.000000,0.000000
675,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.255420,0.010742,0.013184,...,236.865234,204.565430,172.265625,150.732422,150.732422,161.499023,409.130859,2088.720703,0.000000,0.000000
676,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.441179,0.150391,0.164062,...,5964.697266,5490.966797,5200.268555,5006.469727,4823.437500,4812.670898,4898.803711,4834.204102,4844.970703,4780.371094
