# 1. Download and Import Required Packages

In [1]:
pip install keras-tuner

Looking in indexes: https://pypi.org/simple, https://pip.repos.neuron.amazonaws.com
Collecting keras-tuner
  Downloading keras_tuner-1.3.5-py3-none-any.whl (176 kB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m176.1/176.1 kB[0m [31m8.5 MB/s[0m eta [36m0:00:00[0m
Collecting kt-legacy
  Downloading kt_legacy-1.0.5-py3-none-any.whl (9.6 kB)
Installing collected packages: kt-legacy, keras-tuner
Successfully installed keras-tuner-1.3.5 kt-legacy-1.0.5
Note: you may need to restart the kernel to use updated packages.


In [2]:
conda install -c conda-forge librosa

Collecting package metadata (current_repodata.json): done
Solving environment: \ 
The environment is inconsistent, please check the package plan carefully
The following packages are causing the inconsistency:

  - conda-forge/noarch::bleach==5.0.1=pyhd8ed1ab_0
  - conda-forge/noarch::pytest==7.2.0=pyhd8ed1ab_2
  - conda-forge/noarch::python-lsp-jsonrpc==1.0.0=pyhd8ed1ab_0
  - conda-forge/noarch::qtpy==2.3.0=pyhd8ed1ab_0
  - conda-forge/linux-64::sip==6.7.5=py310hd8f1fbe_0
  - conda-forge/noarch::terminado==0.17.1=pyh41d4057_0
  - conda-forge/linux-64::watchdog==2.2.1=py310hff52083_0
  - conda-forge/noarch::dask-core==2022.11.0=pyhd8ed1ab_0
  - conda-forge/noarch::flask==2.2.2=pyhd8ed1ab_0
  - conda-forge/noarch::importlib_metadata==6.0.0=hd8ed1ab_0
  - conda-forge/noarch::nltk==3.8.1=pyhd8ed1ab_0
  - conda-forge/linux-64::pyqt5-sip==12.11.0=py310hd8f1fbe_2
  - conda-forge/noarch::python-lsp-server-base==1.7.0=pyhd8ed1ab_0
  - conda-forge/noarch::pytoolconfig==1.2.4=pyhd8ed1ab_1
  - con

websocket-client-1.5 | 44 KB     | ##################################### | 100% 
soxr-python-0.3.5    | 259 KB    | ##################################### | 100% 
scikit-learn-1.2.2   | 7.3 MB    | ##################################### | 100% 
tqdm-4.65.0          | 86 KB     | ##################################### | 100% 
librosa-0.10.0       | 188 KB    | ##################################### | 100% 
openh264-2.1.0       | 1.5 MB    | ##################################### | 100% 
ca-certificates-2023 | 145 KB    | ##################################### | 100% 
pysoundfile-0.12.1   | 27 KB     | ##################################### | 100% 
botocore-1.29.137    | 5.8 MB    | ##################################### | 100% 
ffmpeg-4.4           | 9.7 MB    | ##################################### | 100% 
ujson-5.7.0          | 50 KB     | ##################################### | 100% 
audioread-3.0.0      | 34 KB     | ##################################### | 100% 
jupyter_server-1.23. | 238 K

In [3]:
import math
import boto3
import pickle
import keras_tuner
import numpy as np
import librosa as lr
import tensorflow as tf
import tensorflow.keras as keras

from tensorflow.keras import Input
from tensorflow.keras.models import Model
from tensorflow.keras.layers import Dense
from tensorflow.keras.layers import Reshape
from tensorflow.keras.layers import Flatten
from tensorflow.keras.optimizers import Adam

# 2. Import Preprocessed Data

In [4]:
def import_pkl(bucket, key):
    try:
        loaded_pickle = pickle.loads(s3.Bucket(bucket).Object(key).get()['Body'].read())
    except Exception as e:
        print(e)
    return loaded_pickle['X_train'], loaded_pickle['y_train'], loaded_pickle['X_test'], loaded_pickle['y_test']

In [5]:
s3 = boto3.resource('s3')
bucket = 'pumpaudio-zach'

In [6]:
train00_X, train00_y, test00_X, test00_y = import_pkl(bucket, 'pump_raw/id_00.pkl')
train02_X, train02_y, test02_X, test02_y = import_pkl(bucket, 'pump_raw/id_02.pkl')
train04_X, train04_y, test04_X, test04_y = import_pkl(bucket, 'pump_raw/id_04.pkl')
train06_X, train06_y, test06_X, test06_y = import_pkl(bucket, 'pump_raw/id_06.pkl')

# 3. Define Model Tuner

In [25]:
class MyHyperModel(keras_tuner.HyperModel):
    
    def __init__(self, min_delta, patience):
        self.input_shape_x = 0
        self.input_shape_y = 0
        self.min_delta = min_delta
        self.wait = 0
        self.patience = patience 
    
    def build(self, hp):
        
        n_mels = hp.Choice('n_mels', [64, 128, 256])
        hop_length = hp.Choice('hop_length', [128, 256, 512])

        self.input_shape_x = n_mels
        self.input_shape_y = hop_length
               
        inputLayer = Input(shape = (self.input_shape_x, math.ceil((22050 * 10) / self.input_shape_y)))
        h = Flatten()(inputLayer)
        h = Dense(units = hp.Choice('layer 1', [64, 128, 256]), activation = 'relu')(h)
        h = Dense(units = hp.Choice('layer 2', [64, 32, 16]), activation = 'relu')(h)
        h = Dense(units = hp.Choice('layer 3', [4, 8]), activation = 'relu')(h)
        h = Dense(units = hp.Choice('layer 4', [64, 32, 16]), activation = 'relu')(h)
        h = Dense(units = hp.Choice('layer 5', [64, 128, 256]), activation = 'relu')(h)
        h = Dense(self.input_shape_x * math.ceil((22050 * 10) / self.input_shape_y), activation = None)(h)
        h = Reshape((self.input_shape_x, math.ceil((22050 * 10) / self.input_shape_y)))(h)
                  
        Model(inputs = inputLayer, outputs = h).compile(loss = 'mean_squared_error', metrics = ['mse'])

        return Model(inputs = inputLayer, outputs = h)

    def fit(self, hp, model, train, validation, callbacks = None, **kwargs):
        
        def convert_to_melspectrogram(audio_files, sr, n_fft, hop_length, n_mels):
            
            audio_mel = lr.feature.melspectrogram(y = audio_files[0], sr = sr, n_fft = n_fft, hop_length = hop_length, n_mels = n_mels)
            audio_mel_dbd = lr.amplitude_to_db(abs(audio_mel), ref = np.max)
            shape_0 = audio_mel_dbd.shape[0]
            shape_1 = audio_mel_dbd.shape[1]
            audio_mel_dbd = np.reshape(audio_mel_dbd, [1, shape_0, shape_1])

            for file in audio_files[1:]:
                audio_mel_temp = lr.feature.melspectrogram(y = file, sr = sr, n_fft = n_fft, hop_length = hop_length, n_mels = n_mels)
                audio_mel_dbd_temp = lr.amplitude_to_db(abs(audio_mel_temp), ref = np.max)
                audio_mel_dbd = np.vstack([audio_mel_dbd, np.reshape(audio_mel_dbd_temp, [1, shape_0, shape_1])])
            return audio_mel_dbd

        train_ds = convert_to_melspectrogram(train, 22050, 1024, self.input_shape_y, self.input_shape_x)
        batch_size = hp.Choice("batch_size", [256, 512])
        train_ds = tf.data.Dataset.from_tensor_slices(train_ds).batch(batch_size)
        
        # Define the optimizer.
        optimizer = keras.optimizers.Adam(hp.Float('learning_rate', 1e-4, 1e-2, sampling = 'log', default = 1e-3))
        loss_fn = keras.losses.MeanSquaredError(reduction = 'auto', name = 'mean_squared_error')

        # The metric to track validation loss.
        epoch_loss_metric = keras.metrics.Mean()

        # Function to run the train step.
        @tf.function
        def run_train_step(train):
            with tf.GradientTape() as tape:
                logits = model(train)
                loss = loss_fn(train, logits)
                # Add any regularization losses.
                if model.losses:
                    loss +=  tf.math.add_n(model.losses)
            gradients = tape.gradient(loss, model.trainable_variables)
            optimizer.apply_gradients(zip(gradients, model.trainable_variables))

        # Function to run the validation step.
        @tf.function
        def run_val_step(validation):
            logits = model(validation)
            loss = loss_fn(validation, logits)
            # Update the metric.
            epoch_loss_metric.update_state(loss)

        # Assign the model to the callbacks.
        for callback in callbacks:
            callback.model = model

        # Record the best validation loss value
        best_epoch_loss = float("inf")

        # The custom training loop.
        epochs = hp.Choice('epochs', [50, 75, 100])
        for epoch in range(epochs):
            print(f"Epoch: {epoch}")

            # Iterate the training data to run the training step.
            for sample in train_ds:
                sample_temp = sample.numpy()
                run_train_step(sample_temp)

            # Iterate the validation data to run the validation step.
            for sample in train_ds:
                sample_temp = sample.numpy()
                run_val_step(sample_temp)

            # Calling the callbacks after epoch.
            epoch_loss = float(epoch_loss_metric.result().numpy())
            for callback in callbacks:
                # The "my_metric" is the objective passed to the tuner.
                callback.on_epoch_end(epoch, logs = {"my_metric": epoch_loss})
            epoch_loss_metric.reset_states()

            print(f"Epoch loss: {epoch_loss}")
            
            if best_epoch_loss <= (epoch_loss + self.min_delta):
                self.wait += 1
                if self.wait == self.patience:
                    break
            else:
                best_epoch_loss = min(best_epoch_loss, epoch_loss)
                self.wait = 0

        # Return the evaluation metric value.
        return best_epoch_loss

# Train Models

### Model 00

In [26]:
tuner_00 = keras_tuner.RandomSearch(
    objective = keras_tuner.Objective('my_metric', 'min'),
    seed = 41,
    max_trials = 60,
    hypermodel = MyHyperModel(min_delta = 1, patience = 2),
    directory = '../model/results',
    project_name = 'custom_training',
    overwrite = True,
)

In [28]:
tuner_00.search(train = train00_X, validation = train00_X)

Trial 60 Complete [00h 05m 50s]
my_metric: 80.3096923828125

Best my_metric So Far: 51.71058654785156
Total elapsed time: 03h 03m 41s
INFO:tensorflow:Oracle triggered exit


In [29]:
best_hps_00 = tuner_00.get_best_hyperparameters()[0]
print(best_hps_00.values)

best_model_00 = tuner_00.get_best_models()[0]
print(best_model_00.summary())

{'n_mels': 64, 'hop_length': 128, 'layer 1': 128, 'layer 2': 32, 'layer 3': 4, 'layer 4': 16, 'layer 5': 128, 'batch_size': 256, 'learning_rate': 0.00010674541740374034, 'epochs': 50}
Model: "model_1"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 input_1 (InputLayer)        [(None, 64, 1723)]        0         
                                                                 
 flatten (Flatten)           (None, 110272)            0         
                                                                 
 dense (Dense)               (None, 128)               14114944  
                                                                 
 dense_1 (Dense)             (None, 32)                4128      
                                                                 
 dense_2 (Dense)             (None, 4)                 132       
                                                                 
 dense_

### Model 02

In [30]:
tuner_02 = keras_tuner.RandomSearch(
    objective = keras_tuner.Objective('my_metric', 'min'),
    seed = 41,
    max_trials = 60,
    hypermodel = MyHyperModel(min_delta = 1, patience = 2),
    directory = '../model/results',
    project_name = 'custom_training',
    overwrite = True,
)

In [31]:
tuner_02.search(train = train02_X, validation = train02_X)

Trial 60 Complete [00h 04m 42s]
my_metric: 203.61766052246094

Best my_metric So Far: 46.55274963378906
Total elapsed time: 02h 33m 11s
INFO:tensorflow:Oracle triggered exit


In [32]:
best_hps_02 = tuner_02.get_best_hyperparameters()[0]
print(best_hps_02.values)

best_model_02 = tuner_02.get_best_models()[0]
print(best_model_02.summary())

{'n_mels': 64, 'hop_length': 256, 'layer 1': 64, 'layer 2': 32, 'layer 3': 4, 'layer 4': 64, 'layer 5': 128, 'batch_size': 256, 'learning_rate': 0.00021217076286104294, 'epochs': 50}
Model: "model_1"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 input_1 (InputLayer)        [(None, 64, 862)]         0         
                                                                 
 flatten (Flatten)           (None, 55168)             0         
                                                                 
 dense (Dense)               (None, 64)                3530816   
                                                                 
 dense_1 (Dense)             (None, 32)                2080      
                                                                 
 dense_2 (Dense)             (None, 4)                 132       
                                                                 
 dense_3

### Model 04

In [33]:
tuner_04 = keras_tuner.RandomSearch(
    objective = keras_tuner.Objective('my_metric', 'min'),
    seed = 41,
    max_trials = 60,
    hypermodel = MyHyperModel(min_delta = 1, patience = 2),
    directory = '../model/results',
    project_name = 'custom_training',
    overwrite = True,
)

In [34]:
tuner_04.search(train = train04_X, validation = train04_X)

Trial 60 Complete [00h 02m 28s]
my_metric: 3413.7060546875

Best my_metric So Far: 43.1404914855957
Total elapsed time: 01h 22m 22s
INFO:tensorflow:Oracle triggered exit


In [35]:
best_hps_04 = tuner_04.get_best_hyperparameters()[0]
print(best_hps_04.values)

best_model_04 = tuner_04.get_best_models()[0]
print(best_model_04.summary())

{'n_mels': 64, 'hop_length': 256, 'layer 1': 64, 'layer 2': 64, 'layer 3': 8, 'layer 4': 16, 'layer 5': 64, 'batch_size': 256, 'learning_rate': 0.003378131836989523, 'epochs': 75}
Model: "model_1"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 input_1 (InputLayer)        [(None, 64, 862)]         0         
                                                                 
 flatten (Flatten)           (None, 55168)             0         
                                                                 
 dense (Dense)               (None, 64)                3530816   
                                                                 
 dense_1 (Dense)             (None, 64)                4160      
                                                                 
 dense_2 (Dense)             (None, 8)                 520       
                                                                 
 dense_3 (D

### Model 06

In [36]:
tuner_06 = keras_tuner.RandomSearch(
    objective = keras_tuner.Objective('my_metric', 'min'),
    seed = 41,
    max_trials = 60,
    hypermodel = MyHyperModel(min_delta = 1, patience = 2),
    directory = '../model/results',
    project_name = 'custom_training',
    overwrite = True,
)

In [37]:
tuner_06.search(train = train06_X, validation = train06_X)

Trial 60 Complete [00h 05m 06s]
my_metric: 105.09736633300781

Best my_metric So Far: 37.78456497192383
Total elapsed time: 02h 48m 03s
INFO:tensorflow:Oracle triggered exit


In [38]:
best_hps_06 = tuner_06.get_best_hyperparameters()[0]
print(best_hps_06.values)

best_model_06 = tuner_06.get_best_models()[0]
print(best_model_06.summary())

{'n_mels': 64, 'hop_length': 256, 'layer 1': 128, 'layer 2': 32, 'layer 3': 8, 'layer 4': 64, 'layer 5': 64, 'batch_size': 256, 'learning_rate': 0.00011307631534561521, 'epochs': 100}
Model: "model_1"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 input_1 (InputLayer)        [(None, 64, 862)]         0         
                                                                 
 flatten (Flatten)           (None, 55168)             0         
                                                                 
 dense (Dense)               (None, 128)               7061632   
                                                                 
 dense_1 (Dense)             (None, 32)                4128      
                                                                 
 dense_2 (Dense)             (None, 8)                 264       
                                                                 
 dense_

# 4. Write Best Models to S3

In [39]:
def dump_pickle (bucket, key, model):
    pickle_obj = pickle.dumps(model) 
    s3.Bucket(bucket).Object(key).put(Body = pickle_obj)

In [40]:
dump_pickle(bucket, 'models/model_00.pkl', best_model_00)
dump_pickle(bucket, 'models/model_02.pkl', best_model_02)
dump_pickle(bucket, 'models/model_04.pkl', best_model_04)
dump_pickle(bucket, 'models/model_06.pkl', best_model_06)

Keras weights file (<HDF5 file "variables.h5" (mode r+)>) saving:
...layers
......dense
.........vars
............0
............1
......dense_1
.........vars
............0
............1
......dense_2
.........vars
............0
............1
......dense_3
.........vars
............0
............1
......dense_4
.........vars
............0
............1
......dense_5
.........vars
............0
............1
......flatten
.........vars
......input_layer
.........vars
......reshape
.........vars
...vars
Keras model archive saving:
File Name                                             Modified             Size
metadata.json                                  2023-05-22 13:42:36           64
config.json                                    2023-05-22 13:42:36         3753
variables.h5                                   2023-05-22 13:42:36    113410704
Keras weights file (<HDF5 file "variables.h5" (mode r+)>) saving:
...layers
......dense
.........vars
............0
............1
......dense_1
..