In [None]:
from pathlib import Path
import tensorflow as tf
from tensorflow import keras as k
import numpy as np
import matplotlib.pyplot as plt

from rennet.utils.plotting_utils import plot_speclike
from rennet.utils.np_utils import confusion_matrix, normalize_confusion_matrix, print_prec_rec
import feat_ext as fx
from train_utils import *

---
# Define

In [None]:
name = '03-training-00-keras_2mlp_clsw0'  # CHANGE!!

In [None]:
embedding_dim = (128,)
nclasses = 2
batchsize = 128
n_passes_per_epoch = 5
n_epochs = 50
class_weight=[1.0, 1.0]

# keras model
model = k.Sequential()
model.add(k.layers.InputLayer(embedding_dim, name='input'))  # DO NOT CHANGE

model.add(k.layers.Dense(64, activation='relu', name='dense_01'))
model.add(k.layers.Dense(32, activation='relu', name='dense_02'))

model.add(k.layers.Dense(nclasses, activation='softmax', name='output'))  # DO NOT CHANGE

model.compile(
    optimizer='adam',
    loss='categorical_crossentropy',
    metrics=['categorical_accuracy'],
)

model.summary()

---
# Prep

In [None]:
dir_pickles_root = Path.cwd().joinpath("data/prepared/pickles/20190909-vggish_embedding/")

fp_trn = dir_pickles_root.joinpath("trn.tfrecord")
fp_val = dir_pickles_root.joinpath("val.tfrecord")

print(f'tfrecords found? (trn: {fp_trn.exists()}), (val: {fp_val.exists()})')

In [None]:
dset_trn = get_dataset(fp_trn, batchsize=batchsize).apply(tf.data.experimental.shuffle_and_repeat(16384, seed=9899))
dset_val = get_dataset(fp_val, batchsize=batchsize).repeat()

In [None]:
trn_steps_per_pass = dataset_shape(fp_trn)
val_steps = dataset_shape(fp_val)

print(f'total num samples per pass: (trn: {trn_steps_per_pass}), (val: {val_steps})')

trn_steps_per_pass = trn_steps_per_pass // batchsize
val_steps = val_steps // batchsize

In [None]:
dir_outputs = Path.cwd().joinpath(f"outputs/{name}")
dir_outputs.mkdir(parents=True, exist_ok=False)

In [None]:
MODEL_CHECKPOINT_PATTERN = 'w.{epoch:03d}-{val_loss:.3f}-{val_categorical_accuracy:.3f}.h5'
callbacks = [
    k.callbacks.ModelCheckpoint(
        str(dir_outputs.joinpath(MODEL_CHECKPOINT_PATTERN)),
        save_best_only=False,
        save_weights_only=False,
        period=1,
        verbose=1,
    ),
    k.callbacks.TensorBoard(
        log_dir=str(dir_outputs),
        write_graph=True,
        write_images=True,
    ),
    k.callbacks.EarlyStopping(
        patience=3,
        monitor='val_loss'
    ),
]

---
# Train

In [None]:
model.fit(
    dset_trn,
    steps_per_epoch=trn_steps_per_pass * n_passes_per_epoch,
    epochs=n_epochs,
    validation_data=dset_val,
    validation_steps=val_steps,
    callbacks=callbacks,
    verbose=1,
    class_weight=class_weight,
)

---
# Evaluate

In [None]:
dset_val = get_dataset(fp_val, batchsize=batchsize).repeat()
preds = model.predict(dset_val, steps=val_steps)
preds.shape

In [None]:
dset_val = get_dataset(fp_val, batchsize=batchsize).repeat()
n = dset_val.make_one_shot_iterator().get_next()
expected = []

with tf.Session() as sess:
    for _ in range(val_steps):
        expected.append(sess.run(n)[1])
        
expected = np.concatenate(expected)
expected.shape

In [None]:
plot_speclike([preds.argmax(axis=1), 
               expected.argmax(axis=1)])

In [None]:
s = np.s_[-1000:]  # last 1000 * 0.12 seconds
plot_speclike([preds.argmax(axis=1)[s], 
               expected.argmax(axis=1)[s]])

In [None]:
# true positive
_ = plt.hist(preds[:, 0][expected[:, 0].astype(np.bool)], bins=20, alpha=0.3, label='pred=0,exp=0')
_ = plt.hist(preds[:, 1][expected[:, 1].astype(np.bool)], bins=20, alpha=0.3, label='pred=1,exp=1')
plt.legend()

In [None]:
# false positives
_ = plt.hist(preds[:, 0][expected[:, 1].astype(np.bool)], bins=20, alpha=0.3, label='pred=0,exp=1')
_ = plt.hist(preds[:, 1][expected[:, 0].astype(np.bool)], bins=20, alpha=0.3, label='pred=1,exp=0')
plt.legend()

In [None]:
c = confusion_matrix(expected.argmax(axis=1), preds.argmax(axis=1))
n = normalize_confusion_matrix(c)

print("classwise precision and recall")
print_prec_rec(*n, onlydiag=True)