In [None]:
import os
import time
import json
from datetime import datetime
import warnings
import numpy as np
from tqdm import tqdm
import tensorflow as tf
import tensorflow.keras.backend as K
from tensorflow.python.keras import optimizers
from tensorflow.python.keras import backend as K
from tensorflow.python.ops import state_ops
from tensorflow.python.ops import math_ops
import matplotlib.pylab as plt
import librosa

In [None]:
# Configuration defaults
DEFAULT_CONFIG = {
    'general': {
        'dataset_directory': 'data/audio_samples',
    },
    'preprocessing': {
        'compute_embeddings': True,
        'files_to_PCM': True,
    },
    'training': {
        'epochs': 30,
        'batch_size': 32,
        'initial_lr': 0.001,
        'use_validation': True,
        'use_multiprocessing': True,
        'mode': 'normal',
        'ckpt_freq': 5
    },
    'model': {
        'num_classes': None,
        'input_shape': (96, 64, 1),  # Mel spectrogram dimensions
    }
}

class Paths:
    """Helper class to manage paths"""
    def __init__(self, base_dir='.'):
        self.base_dir = base_dir
        self.timestamp = None
        self.CONF = None

    def get_dataset_dir(self):
        return os.path.join(self.base_dir, 'data', 'dataset')

    def get_models_dir(self):
        return os.path.join(self.base_dir, 'models')

    def get_timestamped_dir(self):
        return os.path.join(self.base_dir, 'runs', self.timestamp)

    def get_checkpoints_dir(self):
        return os.path.join(self.get_timestamped_dir(), 'checkpoints')

    def get_stats_dir(self):
        return os.path.join(self.get_timestamped_dir(), 'stats')

    def get_embeddings_dir(self):
        return os.path.join(self.get_timestamped_dir(), 'embeddings')

    def get_ts_splits_dir(self):
        return os.path.join(self.get_timestamped_dir(), 'splits')


In [None]:
class customAdam(optimizers.Adam):
    """Custom Adam optimizer with learning rate multiplier"""
    def __init__(self, lr=0.001, beta_1=0.9, beta_2=0.999, epsilon=None, decay=0., amsgrad=False,
                 lr_mult=0.1, excluded_vars=[], **kwargs):
        super().__init__(lr=lr, beta_1=beta_1, beta_2=beta_2, epsilon=epsilon, decay=decay, amsgrad=amsgrad, **kwargs)
        with K.name_scope(self.__class__.__name__):
            self.lr_mult = lr_mult
            self.excluded_vars = excluded_vars

    def get_updates(self, loss, params):
        grads = self.get_gradients(loss, params)
        self.updates = [state_ops.assign_add(self.iterations, 1)]

        lr = self.lr
        if self.initial_decay > 0:
            lr = lr * (1. / (1. + self.decay * math_ops.cast(self.iterations, K.dtype(self.decay))))

        t = math_ops.cast(self.iterations, K.floatx()) + 1
        lr_t = lr * (K.sqrt(1. - math_ops.pow(self.beta_2, t)) / (1. - math_ops.pow(self.beta_1, t)))

        ms = [K.zeros(K.int_shape(p), dtype=K.dtype(p)) for p in params]
        vs = [K.zeros(K.int_shape(p), dtype=K.dtype(p)) for p in params]

        if self.amsgrad:
            vhats = [K.zeros(K.int_shape(p), dtype=K.dtype(p)) for p in params]
        else:
            vhats = [K.zeros(1) for _ in params]

        self.weights = [self.iterations] + ms + vs + vhats

        for p, g, m, v, vhat in zip(params, grads, ms, vs, vhats):
            multiplied_lr_t = lr_t * self.lr_mult if p.name not in self.excluded_vars else lr_t

            m_t = (self.beta_1 * m) + (1. - self.beta_1) * g
            v_t = (self.beta_2 * v) + (1. - self.beta_2) * math_ops.square(g)

            if self.amsgrad:
                vhat_t = math_ops.maximum(vhat, v_t)
                p_t = p - multiplied_lr_t * m_t / (K.sqrt(vhat_t) + self.epsilon)
                self.updates.append(state_ops.assign(vhat, vhat_t))
            else:
                p_t = p - multiplied_lr_t * m_t / (K.sqrt(v_t) + self.epsilon)

            self.updates.append(state_ops.assign(m, m_t))
            self.updates.append(state_ops.assign(v, v_t))
            new_p = p_t

            if getattr(p, 'constraint', None) is not None:
                new_p = p.constraint(new_p)

            self.updates.append(state_ops.assign(p, new_p))
        return self.updates


In [None]:
class DataSequence(tf.keras.utils.Sequence):
    """Data generator for training and validation"""
    def __init__(self, X, y, batch_size, num_classes):
        self.X = X
        self.y = y
        self.batch_size = batch_size
        self.num_classes = num_classes

    def __len__(self):
        return int(np.ceil(len(self.X) / self.batch_size))

    def __getitem__(self, idx):
        batch_x = self.X[idx * self.batch_size:(idx + 1) * self.batch_size]
        batch_y = self.y[idx * self.batch_size:(idx + 1) * self.batch_size]

        # Load audio files and convert to spectrograms
        X = np.array([self.load_audio(file_path) for file_path in batch_x])
        y = tf.keras.utils.to_categorical(batch_y, num_classes=self.num_classes)

        return X, y

    def load_audio(self, file_path):
        if file_path.endswith('.npy'):
            return np.load(file_path)
        else:
            # Load and preprocess audio file
            audio, sr = librosa.load(file_path, sr=None)
            mel_spec = librosa.feature.melspectrogram(y=audio, sr=sr)
            mel_spec_db = librosa.power_to_db(mel_spec, ref=np.max)
            return mel_spec_db


In [None]:
def create_model(conf, base_model=None):
    """Create or load the model architecture"""
    if base_model is None:
        # Create a simple CNN model if no base model is provided
        model = tf.keras.Sequential([
            tf.keras.layers.Conv2D(32, 3, activation='relu', input_shape=conf['model']['input_shape']),
            tf.keras.layers.MaxPooling2D(),
            tf.keras.layers.Conv2D(64, 3, activation='relu'),
            tf.keras.layers.MaxPooling2D(),
            tf.keras.layers.Flatten(),
            tf.keras.layers.Dense(64, activation='relu'),
            tf.keras.layers.Dense(conf['model']['num_classes'], activation='softmax')
        ])
        return model, model
    else:
        # Add classification head to base model
        x = base_model.output
        x = tf.keras.layers.Dense(64, activation='relu')(x)
        predictions = tf.keras.layers.Dense(conf['model']['num_classes'], activation='softmax')(x)
        model = tf.keras.Model(inputs=base_model.input, outputs=predictions)
        return model, base_model

def train_model(conf=None):
    """Main training function"""
    if conf is None:
        conf = DEFAULT_CONFIG

    # Setup paths
    paths = Paths()
    paths.timestamp = datetime.now().strftime('%Y-%m-%d_%H%M%S')

    # Create necessary directories
    os.makedirs(paths.get_timestamped_dir(), exist_ok=True)
    os.makedirs(paths.get_checkpoints_dir(), exist_ok=True)
    os.makedirs(paths.get_stats_dir(), exist_ok=True)

    # Load and prepare data
    train_files = os.listdir(os.path.join(paths.get_dataset_dir(), 'train'))
    train_files = [os.path.join(paths.get_dataset_dir(), 'train', f) for f in train_files]
    train_labels = [0] * len(train_files)  # Replace with actual labels

    if conf['training']['use_validation']:
        val_files = os.listdir(os.path.join(paths.get_dataset_dir(), 'val'))
        val_files = [os.path.join(paths.get_dataset_dir(), 'val', f) for f in val_files]
        val_labels = [0] * len(val_files)  # Replace with actual labels

    # Create data generators
    train_gen = DataSequence(train_files, train_labels,
                            conf['training']['batch_size'],
                            conf['model']['num_classes'])

    val_gen = None
    if conf['training']['use_validation']:
        val_gen = DataSequence(val_files, val_labels,
                              conf['training']['batch_size'],
                              conf['model']['num_classes'])

    # Create and compile model
    model, base_model = create_model(conf)

    # Get top layer variables
    base_vars = [var.name for var in base_model.trainable_variables]
    all_vars = [var.name for var in model.trainable_variables]
    top_vars = list(set(all_vars) - set(base_vars))

    # Compile model
    model.compile(
        optimizer=customAdam(
            lr=conf['training']['initial_lr'],
            amsgrad=True,
            lr_mult=0.1,
            excluded_vars=top_vars
        ),
        loss='categorical_crossentropy',
        metrics=['accuracy']
    )

    # Train model
    history = model.fit(
        train_gen,
        epochs=conf['training']['epochs'],
        validation_data=val_gen,
        use_multiprocessing=conf['training']['use_multiprocessing'],
        workers=4
    )

    # Save model and training stats
    model.save(os.path.join(paths.get_checkpoints_dir(), 'final_model.h5'))

    stats = {
        'epoch': list(range(1, len(history.history['loss']) + 1)),
        'training_time': time.time(),
        'timestamp': paths.timestamp
    }
    stats.update(history.history)

    with open(os.path.join(paths.get_stats_dir(), 'stats.json'), 'w') as f:
        json.dump(stats, f, indent=4)

    return model, history


In [None]:
if __name__ == '__main__':
    # Set up GPU memory growth
    gpu_options = tf.GPUOptions(allow_growth=True)
    tfconfig = tf.ConfigProto(gpu_options=gpu_options)
    sess = tf.Session(config=tfconfig)
    K.set_session(sess)

    # Train the model
    model, history = train_model()

    # Plot training results
    plt.figure(figsize=(12, 4))

    plt.subplot(1, 2, 1)
    plt.plot(history.history['loss'], label='Training Loss')
    if 'val_loss' in history.history:
        plt.plot(history.history['val_loss'], label='Validation Loss')
    plt.title('Model Loss')
    plt.xlabel('Epoch')
    plt.ylabel('Loss')
    plt.legend()

    plt.subplot(1, 2, 2)
    plt.plot(history.history['accuracy'], label='Training Accuracy')
    if 'val_accuracy' in history.history:
        plt.plot(history.history['val_accuracy'], label='Validation Accuracy')
    plt.title('Model Accuracy')
    plt.xlabel('Epoch')
    plt.ylabel('Accuracy')
    plt.legend()

    plt.tight_layout()
    plt.show()