In [1]:
import os
import glob
import random
import numpy as np
import matplotlib.pyplot as plt
import librosa.display
%matplotlib inline

In [2]:
def plot_chromagram(data):
    plt.style.use('fivethirtyeight')
    plt.figure(figsize=(15, 8))
    librosa.display.specshow(data, y_axis='chroma', x_axis='time', cmap='coolwarm')
    plt.colorbar()
    plt.title('Chromagram',size=30)
    plt.xticks(size=20)
    plt.yticks(size=20)
    plt.tight_layout()
    plt.ylabel('Pitch Class',size=20)
    plt.xlabel('Time',size=20)
    plt.show()

In [3]:
def binarizer(data) : 
    for i in range(0,len(data.T)):
        if data[data[:,i].argmax(),i] !=0 : 
            data[data[:,i].argmax(),i]==1
    data[data!=1]=0
    return(data)

In [4]:
def divide(data, n_samples):
    n_subarrys=int(len(data)/n_samples)
    data=np.array_split(data,n_subarrys)
    return(data) 

In [5]:
n_samples=100000
def pad_and_slice(data):
    if len(data.T) >= n_samples:
        data=data[:,0:n_samples]
    else:
        pad=n_samples-len(data.T)
        pad_array=np.zeros((12,pad))
        data=np.hstack((data,pad_array))
    return(data)

In [6]:
def preprocessing_audio_data(audio):
    hop_length = 512 ## how much to move during fft
    n_fft = 2048 ## fft bins
    data=divide(audio,n_samples)
    print(len(data))
    complete_data=[]
    for i in range(0,len(data)):
        audio=data[i]
        data1=librosa.feature.chroma_stft(audio,n_fft=n_fft, hop_length=hop_length)
       # plot_chromagram(data) 
       # data1=binarizer(data1)
        data1=pad_and_slice(data1) 
        complete_data.append(data1)
    complete_data=np.array(complete_data)
    return(complete_data)

In [7]:
audio_files=glob.glob('audio_files/*.WAV')

In [8]:
#audio_files=audio_files[2:3]

In [9]:
complete_data=[]
for file in audio_files:
    print('Processing file %s'%(file))
    audio,sr=librosa.load(file,sr=22050)
    data=preprocessing_audio_data(audio)
    complete_data.append(data)
complete_data=np.array(complete_data)

Processing file audio_files/EEW_AUDI.WAV
4
Processing file audio_files/A#W_AUDI.WAV
5
Processing file audio_files/G#2_AUDI.WAV
18
Processing file audio_files/E2W_AUDI.WAV
18
Processing file audio_files/D#W_AUDI.WAV
4
Processing file audio_files/C#W_AUDI.WAV
4
Processing file audio_files/GEW_AUDI.WAV
4
Processing file audio_files/B2W_AUDI.WAV
18
Processing file audio_files/F#W_AUDI.WAV
4
Processing file audio_files/G2W_AUDI.WAV
18
Processing file audio_files/BEW_AUDI.WAV
5
Processing file audio_files/F#2_AUDI.WAV
18
Processing file audio_files/D2W_AUDI.WAV
15




Processing file audio_files/AEW_AUDI.WAV
4
Processing file audio_files/DEW_AUDI.WAV
6
Processing file audio_files/A2W_AUDI.WAV
17
Processing file audio_files/C3W_AUDI.WAV
18
Processing file audio_files/C#2_AUDI.WAV
18
Processing file audio_files/D#2_AUDI.WAV
18
Processing file audio_files/G#W_AUDI.WAV
4
Processing file audio_files/F2W_AUDI.WAV
18
Processing file audio_files/CEW_AUDI.WAV
4
Processing file audio_files/FEW_AUDI.WAV
4
Processing file audio_files/C2W_AUDI.WAV
5
Processing file audio_files/A#2_AUDI.WAV
18


In [10]:
complete_data=np.concatenate(complete_data.ravel())

In [11]:
complete_data.shape

(269, 12, 100000)

### A : 0, A# : 1 , B: 2, C:3 , C# : 4 , D:5, D# : 6, E:7, F:8, F#:9 , G:10, G#:11  

In [12]:
def label_generator(number,repeats):
    if number ==0:
        labels=np.zeros(repeats)
    else:
        labels=np.zeros(repeats)+number
    return(labels)

In [13]:
labels=[]

labels.append(label_generator(7,4))
labels.append(label_generator(1,5))
labels.append(label_generator(11,18))
labels.append(label_generator(7,18))
labels.append(label_generator(6,4))
labels.append(label_generator(4,4))
labels.append(label_generator(10,4))
labels.append(label_generator(2,18))
labels.append(label_generator(9,4))
labels.append(label_generator(10,18))
labels.append(label_generator(2,5))
labels.append(label_generator(9,18))
labels.append(label_generator(5,15))
labels.append(label_generator(0,4))
labels.append(label_generator(5,6))
labels.append(label_generator(0,17))
labels.append(label_generator(3,18))
labels.append(label_generator(4,18))
labels.append(label_generator(6,18))
labels.append(label_generator(11,4))
labels.append(label_generator(8,18))
labels.append(label_generator(3,4))
labels.append(label_generator(8,4))
labels.append(label_generator(3,5))
labels.append(label_generator(1,18))


In [14]:
labels=np.concatenate(np.array(labels).ravel())

## Neural Network

In [15]:
from sklearn.preprocessing import label_binarize
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import LSTM,Dense, Dropout,Flatten

In [16]:
encoded_labels=label_binarize(labels,classes=[0,1,2,3,4,5,6,7,8,9,10,11])

In [17]:
labels=random.Random(4).shuffle(encoded_labels)
complete_data=random.Random(4).shuffle(complete_data)

In [22]:
complete_data

In [19]:
x_train,x_test=complete_data[:-5],complete_data[-5:]
y_train,y_test=encoded_labels[:-5],encoded_labels[-5:]

TypeError: 'NoneType' object is not subscriptable

In [118]:
neurons=64
n_classes=12
model=Sequential()
model.add(LSTM(units=neurons,return_sequences=True, input_shape=(x_train.shape[1:])))
model.add(Dropout(0.3))
model.add(LSTM(units=neurons,return_sequences=True))
model.add(Dropout(0.3))
model.add(LSTM(units=neurons))
model.add(Dense(n_classes, activation='softmax'))

model.compile(optimizer='adam',loss='categorical_crossentropy',metrics=['accuracy'])
model.fit(x_train,y_train, epochs=15, batch_size=1, verbose=2)

Train on 264 samples
Epoch 1/15
264/264 - 186s - loss: 2.5047 - accuracy: 0.0795
Epoch 2/15
264/264 - 172s - loss: 2.4761 - accuracy: 0.0909
Epoch 3/15
264/264 - 177s - loss: 2.3226 - accuracy: 0.1402
Epoch 4/15
264/264 - 173s - loss: 2.1092 - accuracy: 0.2500
Epoch 5/15
264/264 - 172s - loss: 1.8998 - accuracy: 0.2955
Epoch 6/15
264/264 - 173s - loss: 1.6490 - accuracy: 0.4015
Epoch 7/15
264/264 - 174s - loss: 1.4743 - accuracy: 0.4811
Epoch 8/15
264/264 - 169s - loss: 1.3913 - accuracy: 0.4924
Epoch 9/15
264/264 - 171s - loss: 1.1015 - accuracy: 0.5833
Epoch 10/15
264/264 - 171s - loss: 0.9069 - accuracy: 0.6439
Epoch 11/15
264/264 - 170s - loss: 0.8292 - accuracy: 0.7045
Epoch 12/15
264/264 - 169s - loss: 0.7977 - accuracy: 0.6970
Epoch 13/15
264/264 - 177s - loss: 0.6511 - accuracy: 0.7765
Epoch 14/15
264/264 - 190s - loss: 0.4521 - accuracy: 0.8523
Epoch 15/15
264/264 - 169s - loss: 0.5046 - accuracy: 0.8409


<tensorflow.python.keras.callbacks.History at 0x12e6d0390>

In [119]:
y_pred=model.predict(x_test)

In [120]:
y_test

array([[0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0],
       [0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0],
       [0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0],
       [0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0],
       [0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0]])

In [123]:

b = np.zeros_like(y_pred)
b[np.arange(len(y_pred)), y_pred.argmax(1)] = 1

In [124]:
b

array([[1., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.],
       [0., 0., 0., 0., 0., 0., 0., 0., 0., 1., 0., 0.],
       [0., 0., 0., 0., 0., 0., 0., 0., 0., 1., 0., 0.],
       [1., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.],
       [0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 1.]], dtype=float32)