In [None]:
import numpy as np
import tensorflow as tf
import keras.models as models
from sklearn.mixture import GaussianMixture
from kerastuner import HyperModel

# define the hyperparameters to tune
HYPERPARAMETERS = {
    "n_components": [2, 5, 10, 15],
    "covariance_type": ["full", "tied", "diag", "spherical"]
}

class GMMHyperModel(HyperModel):
    
    def __init__(self, input_shape):
        self.input_shape = input_shape
        
    def build(self, hp):
        # define the GMM model with hyperparameters
        n_components = hp.Choice("n_components", values=HYPERPARAMETERS["n_components"])
        covariance_type = hp.Choice("covariance_type", values=HYPERPARAMETERS["covariance_type"])
        gmm = GaussianMixture(n_components=n_components, covariance_type=covariance_type)
        
        # define the input layer for the mel spectrograms
        inputs = tf.keras.layers.Input(shape=self.input_shape)
        
        # flatten the spectrograms to a 2D array for input to the GMM
        flattened_inputs = tf.keras.layers.Flatten()(inputs)
        
        # connect the GMM to the input layer
        outputs = gmm(flattened_inputs)
        
        # define the model with input and output layers
        model = tf.keras.models.Model(inputs=inputs, outputs=outputs)
        return model


In [None]:

# define a function to preprocess the audio files into mel spectrograms
def preprocess_audio(file_path, n_mels=128, n_fft=2048, hop_length=512):
    # load the audio file
    audio, sr = tf.audio.decode_wav(tf.io.read_file(file_path))
    audio = tf.squeeze(audio, axis=1)
    
    # compute the mel spectrogram
    stfts = tf.signal.stft(audio, frame_length=n_fft, frame_step=hop_length, pad_end=True)
    magnitude_spectrograms = tf.abs(stfts)
    num_spectrogram_bins = magnitude_spectrograms.shape[-1]
    linear_to_mel_weight_matrix = tf.signal.linear_to_mel_weight_matrix(n_mels, num_spectrogram_bins, sr, 0, sr/2)
    mel_spectrograms = tf.matmul(tf.square(magnitude_spectrograms), linear_to_mel_weight_matrix)
    mel_spectrograms = tf.math.log(mel_spectrograms + 1e-6)
    
    # resize the spectrogram to the desired shape of (198, 32)
    mel_spectrograms = tf.image.resize(mel_spectrograms, (198, 32))
    
    # normalize between 0 and 1
    mel_spectrograms = (mel_spectrograms - tf.reduce_min(mel_spectrograms)) / (tf.reduce_max(mel_spectrograms) - tf.reduce_min(mel_spectrograms))
    
    # return the mel spectrogram
    return mel_spectrograms.numpy()

# define the training data
audio_files = [...]
spectrograms = np.array([preprocess_audio(file) for file in audio_files])

# define the hypermodel and tuner
hypermodel = GMMHyperModel(input_shape=spectrograms[0].shape)
tuner = kt.Hyperband(hypermodel, objective="val_loss", max_epochs=10, directory="tuner_dir", project_name="gmm_tuner")


In [None]:

#define the callbacks

early_stop = tf.keras.callbacks.EarlyStopping(monitor="val_loss", patience=3)
model_checkpoint = tf.keras.callbacks.ModelCheckpoint("gmm_best_model.h5", monitor="val_loss", save_best_only=True)

compile the tuner
tuner.search_space_summary()
tuner.search(spectrograms, epochs=10, validation_split=0.2, callbacks=[early_stop, model_checkpoint])

retrieve the best model
best_model = tuner.get_best_models(num_models=1)[0]

evaluate the best model on the validation set
val_loss = best_model.evaluate(spectrograms, spectrograms, verbose=0)

print("Best validation loss:", val_loss)

best_model.save("best_gmm_model.h5")
