In [65]:
import librosa
import librosa.display
import numpy as np

In [66]:
CNN_INPUT_SIZE = (128, 20)
DEFAULT_SR = 22050

In [67]:
def resize_axis(array, N):
    if(array.shape[1] > N):
        resized = array[:,:N]
    else:
        resized = np.lib.pad(array, ((0,0),(0,N - array.shape[1])),\
            'constant', constant_values=(np.min(array)))
    return resized

In [68]:
def extract_cnn_input(raw_audio):
    frame_length = min(2048, len(raw_audio))
    mel_spec = librosa.core.power_to_db(librosa.feature.melspectrogram(
        y=raw_audio, sr=DEFAULT_SR, n_fft=frame_length,
        hop_length=frame_length//4, n_mels=CNN_INPUT_SIZE[0])
    )
    # Truncate number of frames stored
    m = min(CNN_INPUT_SIZE[1], mel_spec.shape[1])
    N =20
    mell = resize_axis(mel_spec[:, 0:m], N)
    return mell

---

In [69]:
import os

def onehot(str):
    path = './Drum'
    kits = os.listdir(path)
    #kits.remove('.DS_Store')
    
    oh = []

    for kit in kits:
        if str == kit:
            oh.append(1)
        else:
            oh.append(0)
    
    return np.array(oh)

In [70]:
#path = 'Drum\Snare\9th Snare 38.wav'

y_label = None
x_label = None

drumkit_path = './Drum'
kits = os.listdir(drumkit_path)

for kit in kits:
    path = os.path.join(drumkit_path, kit)
    sounds = os.listdir(path)
    
    for sound in sounds:
        wavfile = os.path.join(path, sound)
        y, sr = librosa.load(wavfile)
        yt, index = librosa.effects.trim(y=y, top_db=30)
        yt = librosa.util.normalize(yt)
        k = extract_cnn_input(yt)
        k = np.expand_dims(k, axis=0)
        
        typ = onehot(kit)
        typ = np.expand_dims(typ, axis=0)
        
        if x_label is None:
            x_label = k.copy()
            y_label = typ.copy()
        else:
            x_label = np.concatenate((x_label, k), axis=0)
            y_label = np.concatenate((y_label, typ), axis=0)
        
        

  return f(*args, **kwargs)


In [78]:
#Shuffle Data

shuffle = np.arange(y_label.shape[0])
np.random.shuffle(shuffle)

x_label = x_label[shuffle]
y_label = y_label[shuffle]

In [83]:
y_label

array([[1, 0, 0, ..., 0, 0, 0],
       [1, 0, 0, ..., 0, 0, 0],
       [1, 0, 0, ..., 0, 0, 0],
       ...,
       [0, 0, 0, ..., 0, 0, 1],
       [0, 0, 0, ..., 0, 0, 1],
       [0, 0, 0, ..., 0, 0, 1]])

In [80]:
from sklearn.model_selection import train_test_split

x_train, x_test, y_train, y_test = train_test_split(x_label, y_label, test_size=0.2)

In [81]:
x_train.shape

(560, 128, 20)

In [84]:
y_train

array([[0, 0, 0, ..., 1, 0, 0],
       [0, 0, 0, ..., 0, 0, 1],
       [1, 0, 0, ..., 0, 0, 0],
       ...,
       [1, 0, 0, ..., 0, 0, 0],
       [1, 0, 0, ..., 0, 0, 0],
       [0, 0, 1, ..., 0, 0, 0]])

In [113]:
# change y_label to fit sci-kit specification (one-hot to categorical #)

yy_test = np.array([])
for i in range(x_test.shape[0]):
    imsi = np.array([])
    count = 0
    for j in range(7):
        if int(y_test[i,j]) == 0:
            count += 1
        else:
            yy_test = np.concatenate((yy_test, np.array([count])), axis=0)
            break


In [117]:
yy_train = np.array([])
for i in range(x_train.shape[0]):
    imsi = np.array([])
    count = 0
    for j in range(7):
        if int(y_train[i,j]) == 0:
            count += 1
        else:
            yy_train = np.concatenate((yy_train, np.array([count])), axis=0)
            break



---

In [106]:
y_train.shape

(560, 7)

In [107]:
x_train.shape

(560, 128, 20)

In [85]:
import tensorflow as tf
from tensorflow.keras import datasets, layers, models



In [118]:
model = models.Sequential()
model.add(layers.Conv2D(32, (3, 3), activation='relu', input_shape=(128, 20,1)))
model.add(layers.MaxPooling2D())
model.add(layers.Conv2D(64, (3, 3), activation='relu'))
model.add(layers.MaxPooling2D())
model.add(layers.Conv2D(64, (3, 3), activation='relu'))
model.add(layers.Flatten())
model.add(layers.Dense(32, activation='relu'))
model.add(layers.Dense(7))

In [121]:
model.compile(optimizer='adam',
              loss=tf.keras.losses.SparseCategoricalCrossentropy(from_logits=True),
              metrics=['accuracy'])

model.fit(x_train, yy_train, epochs=20)

Epoch 1/20
Epoch 2/20
Epoch 3/20
Epoch 4/20
Epoch 5/20
Epoch 6/20
Epoch 7/20
Epoch 8/20
Epoch 9/20
Epoch 10/20
Epoch 11/20
Epoch 12/20
Epoch 13/20
Epoch 14/20
Epoch 15/20
Epoch 16/20
Epoch 17/20
Epoch 18/20
Epoch 19/20
Epoch 20/20


<keras.callbacks.History at 0x16bdf9d7df0>

In [122]:
model.evaluate(x_test, yy_test)



[0.5843215584754944, 0.8642857074737549]

In [123]:
probability_model = tf.keras.Sequential([model, 
                                         tf.keras.layers.Softmax()])

In [124]:
predict = probability_model.predict(x_test)

In [129]:
np.argmax(predict[0])

3

In [130]:
yy_test[0]

3.0

In [135]:
tf.math.confusion_matrix(
    labels=yy_test,
    predictions=predict
)

InvalidArgumentError: Shapes of all inputs must match: values[0].shape = [140] != values[1].shape = [140,7] [Op:Pack] name: stack