In [None]:
import json
import os
import random

from keras import layers
from keras.models import Sequential
from matplotlib import collections  as mc
from matplotlib import pyplot as plt
from sklearn.cluster import KMeans
import numpy as np
import keras

In [None]:
random.seed(1337)
np.random.seed(451)

In [None]:
def data_from_file(file):
    # Read json sequence data
    with open(file) as f:
        data = json.load(f)
    return data

In [None]:
# Constants
SILENCE_CLASS = 0

# Hyperparameters
# ======================================================================
batch_size = 128         # How many samples are in a batch
seq_len = 10             # How long is the sequence / sample to train
data_split = 1/3         # Percentage for validation & testing set

num_layers = 0           # Number of hidden LSTM layers
num_units = 32           # Number of units per LSTM layer
epochs = 50              # How many epochs to train
dropout = 0.2            # Dropout after every layer

In [None]:
# Load mono sequence data
dataset = data_from_file('./files/mono-experiment/mono-sequence-original-6-998cls-2pca-6db-100ms.json')

print(dataset['args'])

In [None]:
# Convert dataset to our training sequence data
sequence_data = []

# Prepare dataset
for step in dataset['steps']:
    sound_event = dataset['events'][str(step['event_id'])]
    
    sound_class = sound_event['class']
    
    # Convert average RMS to dynamic class (0 - 9)
    if sound_class is SILENCE_CLASS:
        dynamic_class = 0
    else:
        dynamic_class = round(sound_event['rms_avg'] * 9)
    
    sequence_step = [
        sound_class,
        dynamic_class,
    ]
    
    sequence_data.append(sequence_step)
    
n_sound_classes = dataset['args']['n_clusters'] + 2
n_dynamic_classes = 10
    
# One hot encode it ..
if False:
    sound_class_hot = keras.utils.to_categorical(np.array(sequence_data)[:,0])
    dynamic_class_hot = keras.utils.to_categorical(np.array(sequence_data)[:,1])

    sequence_data = []
    for x in range(len(sound_class_hot)):
        # This is our data vector
        sequence_data.append([
            sound_class_hot[x],
            dynamic_class_hot[x],
        ])

    n_sound_classes = len(sequence_data[0][0])
    n_dynamic_classes = len(sequence_data[0][1])

print('Sound classes: {}\nDynamic classes: {}'.format(n_sound_classes, n_dynamic_classes))
    
sequence_data = np.array(sequence_data)

In [None]:
sequence_data[11200]

In [None]:
def generator(data, seq_len, min_index, max_index):
    i = min_index
    while 1:
        if i + batch_size >= max_index:
            i = min_index
        rows = np.arange(i, min(i + batch_size, max_index))
        i += len(rows)
        samples = np.zeros((len(rows), seq_len), dtype='int32')
        targets = np.zeros((len(rows)), dtype='int32')
        for j, _ in enumerate(rows):
            indices = range(rows[j], rows[j] + seq_len)
            if indices[-1] < max_index:
                targets[j] = data[:, 0][indices][-1]
                samples[j] = data[:, 0][indices]
        yield samples, targets

In [None]:
# Split in 3 sets for training, validation and testing
validation_steps = round((data_split / 2) * len(sequence_data))

train_max = len(sequence_data) - (validation_steps * 2)
val_min = train_max + 1
val_max = train_max + validation_steps + 1
test_min = train_max + validation_steps + 2
test_max = len(sequence_data) - 1

training_steps = test_max - test_min

train_gen = generator(sequence_data,
                      seq_len=seq_len,
                      min_index=0,
                      max_index=train_max)

val_gen = generator(sequence_data,
                    seq_len=seq_len,
                    min_index=val_min,
                    max_index=val_max)

test_gen = generator(sequence_data,
                     seq_len=seq_len,
                     min_index=test_min,
                     max_index=test_max)

steps_per_epoch = train_max // batch_size

print('Batch size:', batch_size)
print('Steps per epoch:', steps_per_epoch)

print('\nSplit for validation & test @ {0:.2f}%'.format(data_split * 100))
print('Training set:', (0, train_max))
print('Validation set:', (val_min, val_max))
print('Test set:', (test_min, test_max))

In [None]:
model = Sequential()
model.add(layers.Embedding(input_dim=n_sound_classes,
                           output_dim=num_units,
                           input_length=seq_len))
for n in range(num_layers - 1):
    model.add(layers.LSTM(num_units, return_sequences=True))
    if dropout > 0.0:
        model.add(layers.Dropout(dropout))
model.add(layers.LSTM(num_units))
if dropout > 0.0:
    model.add(layers.Dropout(dropout))
model.add(layers.Dense(n_sound_classes, activation='softmax'))

model.compile(loss='sparse_categorical_crossentropy',
              optimizer='adam',
              metrics=['acc'])
model.summary()

In [None]:
history = model.fit_generator(train_gen,
                              steps_per_epoch=steps_per_epoch,
                              epochs=epochs,
                              validation_data=val_gen,
                              validation_steps=validation_steps)

In [None]:
# Plot validation and training loss
loss = history.history['loss']
val_loss = history.history['val_loss']
acc = history.history['acc']
val_acc = history.history['val_acc']
epochs = range(1, len(loss) + 1)

plt.figure()
plt.plot(epochs, loss, 'r', label='Training loss')
plt.plot(epochs, val_loss, 'b', label='Validation loss')
plt.title('Training and validation loss')
plt.legend()
plt.show()

plt.figure()
plt.plot(epochs, acc, 'g', label='Training accuracy')
plt.plot(epochs, val_acc, 'b', label='Validation accuracy')
plt.title('Training and validation accuracy')
plt.legend()
plt.show()