In [1]:
import keras
import keras.backend as K
import kapre
import pandas as pd
import scipy.io.wavfile as wav
import numpy as np
import random
import arrow
import threading
from soph import soph_scaler, center_wave, ex_generator

# https://github.com/keunwoochoi/kapre

Using TensorFlow backend.


In [2]:
ex_df = pd.read_pickle("data/ex_df.pkl")
current_time = arrow.now().to('US/Eastern').format('YYYY-MM-DD-HH-mm')


src_args = {
    "dropout_prob": .4,
    "activation": "elu",
    "batch_size": 64,
    "l2_reg": 0.0005,
    "cnn_blocks": [
        [[64, 3]],
        [[64, 3]],
        [[128, 3]],
        [[128, 3]],
    ],
    "mel_trainable": False,
    "train_state": ["train", "val"],
    "val_state": ["test"],
    "p_transform": 0,
    "vol_range": 0,
    "displacement": 0,
    "shift":0,
    "n_mels": 26,
    "n_dft": 1024,
}
num_cat = 12
print(src_args)
    

{'displacement': 0, 'l2_reg': 0.0005, 'val_state': ['test'], 'n_mels': 26, 'cnn_blocks': [[[64, 3]], [[64, 3]], [[128, 3]], [[128, 3]]], 'p_transform': 0, 'batch_size': 64, 'shift': 0, 'n_dft': 1024, 'dropout_prob': 0.4, 'vol_range': 0, 'train_state': ['train', 'val'], 'activation': 'elu', 'mel_trainable': False}


In [3]:
act = src_args["activation"]
kern = (3, 3)
drop = src_args["dropout_prob"]
cnn_blocks = src_args["cnn_blocks"]

input_layers = [
    keras.layers.InputLayer(input_shape=(1, 16000)),
    kapre.time_frequency.Melspectrogram(
        sr=16000,
        n_mels=src_args["n_mels"],
        return_decibel_melgram=True,
        n_dft=src_args["n_dft"]),
]

cnn_layers = []

for block in cnn_blocks:
    for layer in block:
        cnn_layers.extend([
            keras.layers.Conv2D(
                layer[0], layer[1], padding="same", activation=act),
        ])
    cnn_layers.extend([
        keras.layers.BatchNormalization(),
        keras.layers.Dropout(drop),
        keras.layers.AvgPool2D(padding='valid'),
    ])

class_layers = [
    keras.layers.Flatten(),
    keras.layers.Dense(num_cat, activation="softmax"),
]
cnn_model = keras.Sequential(input_layers + cnn_layers + class_layers)
cnn_model.summary()

_________________________________________________________________
Layer (type)                 Output Shape              Param #   
input_1 (InputLayer)         (None, 1, 16000)          0         
_________________________________________________________________
melspectrogram_1 (Melspectro (None, 26, 32, 1)         1063962   
_________________________________________________________________
conv2d_1 (Conv2D)            (None, 26, 32, 64)        640       
_________________________________________________________________
batch_normalization_1 (Batch (None, 26, 32, 64)        256       
_________________________________________________________________
dropout_1 (Dropout)          (None, 26, 32, 64)        0         
_________________________________________________________________
average_pooling2d_1 (Average (None, 13, 16, 64)        0         
_________________________________________________________________
conv2d_2 (Conv2D)            (None, 13, 16, 64)        36928     
__________

In [28]:
log_base = "logs/cnn/{}/".format(current_time)
callbacks = [
    keras.callbacks.TensorBoard(
        log_dir= log_base+'tb',
        batch_size=src_args["batch_size"],
        histogram_freq=0,
        write_grads=False, 
        write_images=True
    ),
    keras.callbacks.ModelCheckpoint(
        filepath=log_base+'model-checkpoint.hdf5',
        monitor='val_loss',
        verbose=1,
        save_best_only=True,
        save_weights_only=False,
        mode='auto',
        period=1),
    keras.callbacks.CSVLogger(log_base+'training.log'),
    keras.callbacks.EarlyStopping(patience=5, verbose=1),
    keras.callbacks.ReduceLROnPlateau(
        factor=0.5, patience=1, verbose=1, min_lr=1e-6)
]

cnn_model.compile(
    loss='sparse_categorical_crossentropy',
    optimizer='nadam',
    metrics=['accuracy'])

def launchTensorBoard():
    import os
    os.system('pkill tensorboard')
    os.system('tensorboard --logdir=' + log_base+'tb')
    return

t = threading.Thread(target=launchTensorBoard, args=([]))
t.start()

val_data = next(ex_generator(
        batch_size=sum(ex_df.state.isin(src_args["val_state"])),
        shuffle=False,
        state=src_args["val_state"],
        vol_range=0,
        displacement=0,
        p_transform=0))

cnn_model.fit_generator(
    generator=ex_generator(
        batch_size=src_args["batch_size"],
        shuffle=True,
        state=src_args["train_state"],
        shift=src_args["shift"],
        vol_range=src_args["vol_range"],
        displacement=src_args["displacement"],
        p_transform=src_args["p_transform"]),
    steps_per_epoch=sum(ex_df.state.isin(src_args["train_state"])) / src_args["batch_size"],
    epochs=200,
    verbose=1,
    max_queue_size=100,
    callbacks=callbacks,
    validation_data=val_data
)

Epoch 1/200
Epoch 2/200
Epoch 3/200
Epoch 4/200

Epoch 00004: reducing learning rate to 0.0010000000474974513.
Epoch 5/200
Epoch 6/200
Epoch 7/200

Epoch 00007: reducing learning rate to 0.0005000000237487257.
Epoch 8/200
Epoch 9/200
Epoch 10/200

Epoch 00010: reducing learning rate to 0.0002500000118743628.
Epoch 11/200
Epoch 12/200
Epoch 13/200
Epoch 14/200
Epoch 15/200
Epoch 16/200
Epoch 17/200
Epoch 18/200
Epoch 19/200

Epoch 00019: reducing learning rate to 0.0001250000059371814.
Epoch 20/200
Epoch 21/200
Epoch 22/200

Epoch 00022: reducing learning rate to 6.25000029685907e-05.
Epoch 23/200
Epoch 24/200
Epoch 25/200
Epoch 26/200
Epoch 27/200


Epoch 28/200
Epoch 29/200

Epoch 00029: reducing learning rate to 3.125000148429535e-05.
Epoch 30/200

Epoch 00030: reducing learning rate to 1.5625000742147677e-05.
Epoch 31/200
Epoch 32/200
Epoch 33/200

Epoch 00033: reducing learning rate to 7.812500371073838e-06.
Epoch 34/200

Epoch 00034: reducing learning rate to 3.906250185536919e-06.
Epoch 35/200

Epoch 00035: reducing learning rate to 1.9531250927684596e-06.
Epoch 36/200

Epoch 00036: reducing learning rate to 9.765625463842298e-07.
Epoch 37/200

Epoch 00037: reducing learning rate to 4.882812731921149e-07.
Epoch 38/200

Epoch 00038: reducing learning rate to 2.4414063659605745e-07.
Epoch 39/200

Epoch 00039: reducing learning rate to 1.2207031829802872e-07.
Epoch 40/200

Epoch 00040: reducing learning rate to 6.103515914901436e-08.
Epoch 41/200

Epoch 00041: reducing learning rate to 3.051757957450718e-08.
Epoch 42/200

Epoch 00042: reducing learning rate to 1.525878978725359e-08.
Epoch 43/200

Epoch 00043: reducing learning 

<keras.callbacks.History at 0x7f019d42bbe0>