In [17]:
import os
import sys
import numpy as np
import pandas as pd
%matplotlib inline
import matplotlib.pyplot as plt
from matplotlib.pyplot import specgram

import librosa
import librosa.display
from IPython.display import Audio

from sklearn.model_selection import train_test_split
from sklearn.preprocessing import LabelEncoder
from sklearn.metrics import classification_report

import tensorflow as tf
import tensorflow.keras
from tensorflow.keras import layers
from tensorflow.keras.models import Sequential, load_model
from tensorflow.keras.layers import Conv1D, MaxPooling1D, AveragePooling1D
from tensorflow.keras.layers import Input, Flatten, Dropout, Activation, BatchNormalization, Dense
from tensorflow.keras.callbacks import EarlyStopping, ModelCheckpoint
from keras.preprocessing import image
from tensorflow.keras.utils import to_categorical
from tensorflow.keras.regularizers import l2

In [3]:
devdata = []
data_folder = './datasets/audio_data/out/'
labels = []

for f in os.listdir(data_folder):
    if(f.endswith(".jpeg")):
      image=tf.keras.preprocessing.image.load_img(data_folder + f, color_mode='rgb')
      image=np.array(image)
      devdata.append(image)
      part = f.split('-')
      labels.append(int(part[2]))

In [4]:
X_train, X_test, y_train, y_test = train_test_split(devdata, labels, test_size=0.3,random_state=22, stratify=labels)

In [5]:
X_train[0][100][100]

array([36, 11, 77], dtype=uint8)

In [6]:
#Normalize inputs
mean = np.mean(X_train)
std = np.std(X_train)
X_train = (X_train - mean)/std
X_test = (X_test - mean)/std
X_train[0][100][100]

array([-0.91431043, -1.1401532 , -0.54392828])

In [7]:
X_train = np.array(X_train)
y_train = np.array(y_train)
X_test = np.array(X_test)
y_test = np.array(y_test)

In [8]:
X_train[0][100][100]

array([-0.91431043, -1.1401532 , -0.54392828])

In [9]:
#One hot encoding
lb = LabelEncoder()
y_train = to_categorical(lb.fit_transform(y_train))
y_test = to_categorical(lb.fit_transform(y_test))

print(y_test[0:10])

[[0. 0. 1. 0. 0. 0. 0. 0.]
 [1. 0. 0. 0. 0. 0. 0. 0.]
 [1. 0. 0. 0. 0. 0. 0. 0.]
 [0. 0. 0. 0. 0. 0. 0. 1.]
 [1. 0. 0. 0. 0. 0. 0. 0.]
 [1. 0. 0. 0. 0. 0. 0. 0.]
 [1. 0. 0. 0. 0. 0. 0. 0.]
 [1. 0. 0. 0. 0. 0. 0. 0.]
 [1. 0. 0. 0. 0. 0. 0. 0.]
 [1. 0. 0. 0. 0. 0. 0. 0.]]


In [10]:
print(lb.classes_)

[1 2 3 4 5 6 7 8]


In [37]:
X_train[10].shape

(288, 432, 3)

In [38]:
model = tf.keras.Sequential()
model.add(layers.Conv2D(16, 7, activation="relu", padding="same",
                       input_shape=[288,432,3]))
model.add(layers.MaxPooling2D(pool_size=(2)))
model.add(layers.Conv2D(256, 3, activation="relu", padding="same"))
model.add(layers.Conv2D(256, 3, activation="relu", padding="same"))
model.add(layers.MaxPooling2D(2))
model.add(layers.Conv2D(256, 3, activation="relu", padding="same"))
model.add(layers.Conv2D(256, 3, activation="relu", padding="same"))
model.add(layers.MaxPooling2D(2))
model.add(layers.Flatten())
model.add(layers.Dense(128, activation="relu"))
model.add(layers.Dropout(0.5))
model.add(layers.Dense(64, activation="relu"))
model.add(layers.Dropout(0.5))
model.add(layers.Dense(8, activation="sigmoid"))

opt = tf.keras.optimizers.Adam(lr=0.001)
model.compile(loss='categorical_crossentropy', optimizer=opt,metrics=['accuracy'])
model.summary()

Model: "sequential_12"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
conv2d_30 (Conv2D)           (None, 288, 432, 16)      2368      
_________________________________________________________________
max_pooling2d_20 (MaxPooling (None, 72, 108, 16)       0         
_________________________________________________________________
conv2d_31 (Conv2D)           (None, 72, 108, 256)      37120     
_________________________________________________________________
conv2d_32 (Conv2D)           (None, 72, 108, 256)      590080    
_________________________________________________________________
max_pooling2d_21 (MaxPooling (None, 36, 54, 256)       0         
_________________________________________________________________
conv2d_33 (Conv2D)           (None, 36, 54, 256)       590080    
_________________________________________________________________
conv2d_34 (Conv2D)           (None, 36, 54, 256)     

In [None]:
checkpoint = ModelCheckpoint("champion_model.hdf5", monitor='val_accuracy', verbose=1,
    save_best_only=True, mode='max', period=1, save_weights_only=True)

model_history=model.fit(X_train, y_train,batch_size=32, epochs=10, validation_data=(X_test, y_test),callbacks=[checkpoint])

Epoch 1/10

Epoch 00001: val_accuracy improved from -inf to 0.66667, saving model to champion_model.hdf5
Epoch 2/10

Epoch 00002: val_accuracy did not improve from 0.66667
Epoch 3/10

Epoch 00003: val_accuracy did not improve from 0.66667
Epoch 4/10

Epoch 00004: val_accuracy did not improve from 0.66667
Epoch 5/10

Epoch 00005: val_accuracy did not improve from 0.66667
Epoch 6/10

Epoch 00006: val_accuracy did not improve from 0.66667
Epoch 7/10

Epoch 00007: val_accuracy did not improve from 0.66667
Epoch 8/10