In [None]:
%matplotlib inline
import os
from glob import glob

import librosa
import matplotlib.pyplot as plt
import numpy as np
import tensorflow as tf
from tensorflow import keras
from tensorflow.keras.backend import set_session

from spectrogram_utils import generate_spectrogram

os.environ["CUDA_VISIBLE_DEVICES"] = "0"

In [None]:
config = tf.ConfigProto()
config.gpu_options.allow_growth = True  # dynamically grow the memory used on the GPU
config.log_device_placement = True  # to log device placement (on which device the operation ran)
sess = tf.Session(config=config)
set_session(sess)

### Dataloader for one class classification

Data loader for single-class classification. Takes the target sound folder and other folders. Generate a mix of target and background sounds as positive examples.

In [None]:
class SpectrogramDataLoader(keras.utils.Sequence):
    def __init__(
            self,
            fart_spectrograms_folder: str,
            other_spectrograms_folder: str,
            batch_size: int = 16,
            shuffle: bool = True,
    ):
        self.fart_spectrograms = glob(os.path.join(fart_spectrograms_folder, '*.npy'))
        self.other_spectrograms = glob(os.path.join(other_spectrograms_folder, '[!fart]*/*.npy'))
        
        self.batch_size = batch_size
        self.shuffle = shuffle

        self.on_epoch_end()

    def __len__(self):
        return len(self.fart_spectrograms) // self.batch_size

    def __getitem__(self, index):
        X = np.empty((self.batch_size, 128, 251, 1))
        y = np.empty((self.batch_size), dtype=int)

        for index_offset in range(self.batch_size):
            current_idx = index * self.batch_size + index_offset
            
            if index_offset < self.batch_size // 2:
                with open(self.fart_spectrograms[current_idx], 'rb') as f:
                    spectrogram = np.load(f)
                y[index_offset] = 1
            else:
                if self.shuffle:
                    other_file = np.random.choice(self.other_spectrograms)
                else:
                    other_file = self.other_spectrograms[current_idx % len(self.other_spectrograms)]
                with open(other_file, 'rb') as f:
                    spectrogram = np.load(f)
                y[index_offset] = 0
            
            X[index_offset,] = np.expand_dims(spectrogram, axis=-1)

        shuffled_indeces = np.random.permutation(len(X))
        return X[shuffled_indeces], y[shuffled_indeces]

    def on_epoch_end(self):
        if self.shuffle:
            np.random.shuffle(self.fart_spectrograms)


### Create dataloader

In [None]:
dataloader_train = SpectrogramDataLoader(
    "./datasets/train_mobile_spectrograms/fart",
    "./datasets/train_mobile_spectrograms/",
    batch_size=16,
    shuffle=True,
)
dataloader_eval = SpectrogramDataLoader(
    "./datasets/eval_mobile_spectrograms/fart",
    "./datasets/eval_mobile_spectrograms/",
    batch_size=16,
    shuffle=False,
)

print("Train batches count:", len(dataloader_train))
print("Eval batches count:", len(dataloader_eval))

In [None]:
X, y = dataloader_eval[0]
plt.imshow(X[10,:,:,0], 'gray')
plt.show()

### Create model

In [None]:
backbone = keras.applications.MobileNetV2(
    input_shape=(128, 251, 1),
    alpha=1.0,
    include_top=False,
    weights=None,
)
for layer in backbone.layers:
    if type(layer) == type(keras.layers.BatchNormalization()):
        layer.momentum = 0.9

backbone = keras.Model(inputs=backbone.input, outputs=backbone.get_layer("block_16_project_BN").output)

# Use pretrained backbone from pretrain_model.ipynb
#backbone.load_weights("./models_mnv2_1/pretrained_backbone.hdf5")
backbone.trainable = False

backbone.summary()

model = keras.Sequential([
    backbone,
    keras.layers.GlobalAveragePooling2D(),
    keras.layers.Dropout(0.1),
    keras.layers.Dense(32),
    keras.layers.BatchNormalization(momentum=0.9),
    keras.layers.ReLU(),
    keras.layers.Dense(1, activation='sigmoid', name='fart_predict'),
])

### Load pretrained model from previous training

If we have already trained a model with a higher learning rate and now want to tune it a little, we must load the weights from the previous training.

In [None]:
model.load_weights("./models_mnv2_1/finetune_checkpoint_mobile_2.hdf5")
backbone.trainable = True
backbone.summary()

### Compile model

In [None]:
model.compile(optimizer=keras.optimizers.Adam(learning_rate=0.0001),
              loss=keras.losses.BinaryCrossentropy(from_logits=False),
              metrics=[keras.metrics.AUC()])

### Train

In [None]:
checkpoint_filepath = './models_mnv2_1/finetune_checkpoint_mobile_3.hdf5'
model_checkpoint_callback = keras.callbacks.ModelCheckpoint(
    filepath=checkpoint_filepath,
    save_weights_only=False,
    monitor='val_auc_1',
    mode='max',
    save_best_only=True)

model.fit(
    x=dataloader_train,
    validation_data=dataloader_eval,
    epochs=1000,
    use_multiprocessing=True,
    workers=3,
    callbacks=[model_checkpoint_callback],
)

### Fast check model predictions

In [None]:
model.evaluate(dataloader_eval)

In [None]:
X, y = dataloader_eval[2]
model.predict(X), y

### Test on recorded audio example

In [None]:
audio_sample, sr = librosa.load("./datasets/voice_eval_mic.wav", 16000)
sr

In [None]:
detects = []
for start_idx in range(0, len(audio_sample)-sr*2, sr):
    signal_crop = audio_sample[start_idx: start_idx+sr*2]

    spectrogram = generate_spectrogram(
                signal=signal_crop,
                sample_rate=16000,
                n_fft=1024,
                hop_length=128,
            )
    spectrogram = np.expand_dims(spectrogram, axis=[0, -1])
    
    detect = model.predict(spectrogram)
    detects.append(detect[0][0])
    
plt.plot(detects)
plt.show()