In [None]:
import os
from glob import glob

import numpy as np
import tensorflow as tf
from tensorflow import keras
from tensorflow.keras.backend import set_session

os.environ["CUDA_VISIBLE_DEVICES"] = "0"

In [None]:
config = tf.ConfigProto()
config.gpu_options.allow_growth = True  # dynamically grow the memory used on the GPU
config.log_device_placement = True  # to log device placement (on which device the operation ran)
sess = tf.Session(config=config)
set_session(sess)

### Create data generator for N classes classification to pretrain a model

In [None]:
class SpectrogramDataLoader(keras.utils.Sequence):
    def __init__(
            self,
            spectrograms_folder: str,
            batch_size: int = 16,
            shuffle: bool = True,
    ):
        self.classes_list = glob(os.path.join(spectrograms_folder, '*'))
        self.classes_map = {}
        self.audio_files = []

        self.min_samples = -1
        for class_id, class_folder in enumerate(self.classes_list):
            class_name = class_folder.split('/')[-1]  # folder name
            self.classes_map[class_name] = class_id

            spectograms = glob(os.path.join(class_folder, '*.npy'))
            if len(spectograms) < self.min_samples or self.min_samples < 0:
                self.min_samples = len(spectograms)

        self.batch_size = batch_size
        self.shuffle = shuffle

        self.on_epoch_end()

    def __len__(self):
        return len(self.spectograms) // self.batch_size

    def __getitem__(self, index):
        X = np.empty((self.batch_size, 128, 251, 1))
        y = np.empty((self.batch_size), dtype=int)

        for index_offset in range(self.batch_size):
            current_idx = index * self.batch_size + index_offset
            with open(self.spectograms[current_idx], 'rb') as f:
                spectrogram = np.load(f)

            X[index_offset,] = np.expand_dims(spectrogram, axis=-1)
            class_name = self.spectograms[current_idx].split('/')[-2]  # folder name

            y[index_offset] = self.classes_map[class_name]

        return X, keras.utils.to_categorical(y, num_classes=len(self.classes_map))

    def on_epoch_end(self):
        # Get from each class the same amount of random samples
        self.spectograms = []
        for class_id, class_folder in enumerate(self.classes_list):
            spectograms = glob(os.path.join(class_folder, '*'))
            if self.shuffle:
                np.random.shuffle(spectograms)
            self.spectograms += spectograms[:self.min_samples]

        if self.shuffle:
            np.random.shuffle(self.spectograms)

### Create dataloader

In [None]:
dataloader_train = SpectrogramDataLoader(
    spectrograms_folder="./datasets/train_spectrograms",
    batch_size=16,
    shuffle=True,
)
dataloader_eval = SpectrogramDataLoader(
    spectrograms_folder="./datasets/eval_spectrograms",
    batch_size=16,
    shuffle=False,
)

print("Train batches count:", len(dataloader_train))
print("Eval batches count:", len(dataloader_eval))

### Create model

I used MobileNetV2 as a backbone and added a head to it to classify 12 types of sounds.

In [None]:
backbone = keras.applications.MobileNetV2(
    input_shape=(128, 251, 1),
    alpha=1.0,
    include_top=False,
    weights=None,
)
for layer in backbone.layers:
    if type(layer) == type(keras.layers.BatchNormalization()):
        layer.momentum = 0.9

backbone = keras.Model(inputs=backbone.input, outputs=backbone.get_layer("block_16_project_BN").output)
backbone.summary()

model = keras.Sequential([
    backbone,
    keras.layers.GlobalAveragePooling2D(),
    keras.layers.Dropout(0.1),
    keras.layers.Dense(32),
    keras.layers.BatchNormalization(momentum=0.9),
    keras.layers.ReLU(),
    keras.layers.Dense(12, activation='softmax'),
])
model.summary()

model.compile(optimizer=keras.optimizers.Adam(learning_rate=0.0001),
              loss=keras.losses.CategoricalCrossentropy(from_logits=False),
              metrics=['accuracy'])

### Load pretrained model from previous training

If we have already trained a model with a higher learning rate and now want to tune it a little, we must load the weights from the previous training.

In [None]:
model.load_weights("./models_mnv2_1/pretrain_checkpoint_2.hdf5")

In [None]:
model.evaluate(dataloader_eval)

### Train

In [None]:
checkpoint_filepath = './models_mnv2_1/pretrain_checkpoint_2.hdf5'
model_checkpoint_callback = keras.callbacks.ModelCheckpoint(
    filepath=checkpoint_filepath,
    save_weights_only=False,
    monitor='val_acc',
    mode='max',
    save_best_only=True)

model.fit(
    x=dataloader_train,
    validation_data=dataloader_eval,
    epochs=100,
    use_multiprocessing=True,
    workers=6,
    callbacks=[model_checkpoint_callback],
)

### Save backbone weights only

In [None]:
backbone.save("./models_mnv2_1/pretrained_backbone.hdf5")