In [1]:
from keras.layers import Dense, Dropout, Activation, LeakyReLU, Conv1D, GlobalAveragePooling1D, Flatten, MaxPooling1D,  BatchNormalization, LayerNormalization  
from tensorflow.keras.initializers import HeNormal, Constant
from sklearn.metrics import accuracy_score, f1_score
from sklearn.linear_model import LogisticRegression
from sklearn.preprocessing import KBinsDiscretizer
from sklearn.calibration import calibration_curve
from sklearn.isotonic import IsotonicRegression
from sklearn.utils import class_weight, shuffle
from tensorflow.keras.models import load_model
from sklearn.preprocessing import LabelEncoder
from keras.regularizers import l1, l2, l1_l2
from keras.callbacks import ModelCheckpoint
from tensorflow.keras.models import Model
from keras.callbacks import EarlyStopping
from sklearn.dummy import DummyClassifier
from keras.utils import to_categorical
from scipy.optimize import minimize
from keras.models import Sequential
from betacal import BetaCalibration
from keras.optimizers import Adam
import tensorflow_addons as tfa
import matplotlib.pyplot as plt
from keras import backend as K
from random import sample
import tensorflow as tf
import keras.metrics
import pandas as pd
import numpy as np
import joblib
import random
import os



TensorFlow Addons (TFA) has ended development and introduction of new features.
TFA has entered a minimal maintenance and release mode until a planned end of life in May 2024.
Please modify downstream libraries to take dependencies from other repositories in our TensorFlow community (e.g. Keras, Keras-CV, and Keras-NLP). 

For more information see: https://github.com/tensorflow/addons/issues/2807 



## GPU

In [2]:
#physical_devices = tf.config.experimental.list_physical_devices('GPU')
#tf.config.experimental.set_memory_growth(physical_devices[0], True)
#tf.config.set_visible_devices([], 'GPU')


## divide intro Train, Validation and Test
### Parameters:

- **df**:  
  The original DataFrame containing audio data.

- **df_aug**:  
  The augmented DataFrame, if available.

- **name_aug**:  
  A name identifier used for loading augmented data or specifying augmentation settings.

### Returns:

- **x_train**:  
  Numpy array of features for the training set.

- **y_train**:  
  List of labels for the training set.

- **x_test**:  
  Numpy array of features for the testing set.

- **y_test**:  
  List of labels for the testing set.

- **x_val**:  
  Numpy array of features for the validation set.

- **y_val**:  
  List of labels for the validation set.

- **y_test_names**:  
  List of names for the testing set.

- **y_test_speaker**:  
  List of speakers for the testing set.

- **y_test_segment_begin**:  
  List of starting sample indices for segments in the testing set.

- **y_test_segment_end**:  
  List of ending sample indices for segments in the testing set.


In [3]:
def train_test(df, df_aug, name_aug, random_state=42):
    
    test_list = []
    val_list = []
    train_list = []

    for cls in df['class'].unique():
        df_cls = df[df['class'] == cls]
        test_sample = df_cls.sample(frac=0.1, random_state=random_state)
        remaining = df_cls.drop(test_sample.index)
        val_sample = remaining.sample(frac=10/90, random_state=random_state)
        train_sample = remaining.drop(val_sample.index)

        test_list.append(test_sample)
        val_list.append(val_sample)
        train_list.append(train_sample)

    test_df = pd.concat(test_list)
    val_df = pd.concat(val_list)
    train_df = pd.concat(train_list)

    if df_aug is not None:
        train_df_aug_key = train_df.copy()
        df_aug = df_aug.copy()
        train_df_aug_key['base_file_name'] = train_df_aug_key['file_name']
        df_aug['base_file_name'] = df_aug['file_name'].apply(lambda x: x.split('_', 1)[1] if '_' in x else x)
        merge_keys = ['base_file_name', 'samples_begin', 'samples_end', 'class']
        merged = pd.merge(df_aug, train_df_aug_key[merge_keys], on=merge_keys, how='inner')
        train_df = pd.concat([train_df, merged.drop(columns=['base_file_name'])], ignore_index=True)

    x_train = np.asarray(train_df['trillsson'].tolist())
    y_train = train_df['class'].tolist()
    x_val = np.asarray(val_df['trillsson'].tolist())
    y_val = val_df['class'].tolist()
    x_test = np.asarray(test_df['trillsson'].tolist())
    y_test = test_df['class'].tolist()

    y_test_names = test_df.file_name.tolist()
    y_test_speaker = test_df.speaker.tolist()
    y_test_segment_begin = test_df.samples_begin.tolist()
    y_test_segment_end = test_df.samples_end.tolist()
   
    return x_train, y_train, x_test, y_test, x_val, y_val, y_test_names, y_test_speaker, y_test_segment_begin, y_test_segment_end


## Divide into Train and Validation
### Parameters:

- **df**:  
  The original DataFrame containing audio data.

- **df_aug**:  
  The augmented DataFrame, if available.

- **name_aug**:  
  A name identifier used for loading augmented data or specifying augmentation settings.

### Returns:

- **x_train**:  
  Numpy array of features for the training set.

- **y_train**:  
  List of labels for the training set.

- **x_val**:  
  Numpy array of features for the validation set.

- **y_val**:  
  List of labels for the validation set.


In [None]:
def train_val(df, df_aug, name_aug, random_state=42):

    val_list = []
    for cls in df['class'].unique():
        df_cls = df[df['class'] == cls]
        val_sample = df_cls.sample(frac=0.1, random_state=random_state)
        val_list.append(val_sample)
    val = pd.concat(val_list)
    train = df.drop(val.index)

    if df_aug is not None:
        train_df_aug_key = train.copy()
        df_aug = df_aug.copy()
        train_df_aug_key['base_file_name'] = train_df_aug_key['file_name']
        df_aug['base_file_name'] = df_aug['file_name'].apply(lambda x: x.split('_', 1)[1] if '_' in x else x)
        merge_keys = ['base_file_name', 'samples_begin', 'samples_end', 'class']
        merged = pd.merge(df_aug, train_df_aug_key[merge_keys], on=merge_keys, how='inner')
        train_df = pd.concat([train_df, merged.drop(columns=['base_file_name'])], ignore_index=True)

    x_train = np.asarray(train['trillsson'].tolist())
    y_train = train['class'].tolist()
    x_val = np.asarray(val['trillsson'].tolist())
    y_val = val['class'].tolist()

    return x_train, y_train, x_val, y_val


## divide intro Train, Validation, Validation for Calibration and Test
### Parameters:

- **df**:  
  The original DataFrame containing audio data.

- **df_aug**:  
  The augmented DataFrame, if available.

- **name_aug**:  
  A name identifier used for loading augmented data or specifying augmentation settings.

### Returns:

- **x_train**:  
  Numpy array of features for the training set.

- **y_train**:  
  List of labels for the training set.

- **x_test**:  
  Numpy array of features for the testing set.

- **y_test**:  
  List of labels for the testing set.

- **x_val**:  
  Numpy array of features for the validation set.

- **y_val**:  
  List of labels for the validation set.
  
- **x_val_cal**:  
  Numpy array of features for the validation set for calibration.

- **y_val_cal**:  
  List of labels for the validation set for calibration.

- **y_test_names**:  
  List of names for the testing set.

- **y_test_speaker**:  
  List of speakers for the testing set.

- **y_test_segment_begin**:  
  List of starting sample indices for segments in the testing set.

- **y_test_segment_end**:  
  List of ending sample indices for segments in the testing set.


In [None]:
def train_cal(df, df_aug, name_aug, random_state=42):
    test_list = []
    val_cal_list = []
    train_list = []

    val_list = df.sample(frac=0.1, random_state=random_state)
    df_remaining = df.drop(val_list.index)

    for cls in df_remaining['class'].unique():
        df_cls = df_remaining[df_remaining['class'] == cls]
        test_sample = df_cls.sample(frac=0.1, random_state=random_state)
        remaining = df_cls.drop(test_sample.index)
        val_cal_sample = remaining.sample(frac=25/90, random_state=random_state)
        train_sample = remaining.drop(val_cal_sample.index)

        test_list.append(test_sample)
        val_cal_list.append(val_cal_sample)
        train_list.append(train_sample)

    test_df = pd.concat(test_list)
    val_cal_df = pd.concat(val_cal_list)
    train_df = pd.concat(train_list)

    if df_aug is not None:
        train_df_aug_key = train_df.copy()
        df_aug = df_aug.copy()
        train_df_aug_key['base_file_name'] = train_df_aug_key['file_name']
        df_aug['base_file_name'] = df_aug['file_name'].apply(lambda x: x.split('_', 1)[1] if '_' in x else x)
        merge_keys = ['base_file_name', 'samples_begin', 'samples_end', 'class']
        merged = pd.merge(df_aug, train_df_aug_key[merge_keys], on=merge_keys, how='inner')
        train_df = pd.concat([train_df, merged.drop(columns=['base_file_name'])], ignore_index=True)

    x_train = np.asarray(train_df['trillsson'].tolist())
    y_train = train_df['class'].tolist()
    x_val = np.asarray(val_list['trillsson'].tolist())
    y_val = val_list['class'].tolist()
    x_val_cal = np.asarray(val_cal_df['trillsson'].tolist())
    y_val_cal = val_cal_df['class'].tolist()
    x_test = np.asarray(test_df['trillsson'].tolist())
    y_test = test_df['class'].tolist()

    y_test_names = test_df.file_name.tolist()
    y_test_speaker = test_df.speaker.tolist()
    y_test_segment_begin = test_df.samples_begin.tolist()
    y_test_segment_end = test_df.samples_end.tolist()

    return x_train, y_train, x_test, y_test, x_val, y_val, x_val_cal, y_val_cal, y_test_names, y_test_speaker, y_test_segment_begin, y_test_segment_end


## get Features for Training, Validation and Test
### Parameters:

- **x_train**:  
  Numpy array of features for the training set.

- **y_train**:  
  List of labels for the training set.

- **x_val**:  
  Numpy array of features for the validation set.

- **y_val**:  
  List of labels for the validation set.
  
- **x_test**:  
  Numpy array of features for the testing set.

- **y_test**:  
  List of labels for the testing set.

- **y_test_names**:  
  List of names for the testing set.

- **y_test_speaker**:  
  List of speakers for the testing set.

- **y_test_segment_begin**:  
  List of starting sample indices for segments in the testing set.

- **y_test_segment_end**:  
  List of ending sample indices for segments in the testing set.

- **df_learn**:  
  DataFrame containing audio data used for encoding labels.

### Returns:

- **y_train**:  
  List of shuffled labels for the training set.

- **x_train**:  
  Numpy array of shuffled features for the training set.

- **y_val**:  
  List of shuffled labels for the validation set.

- **x_val**:  
  Numpy array of shuffled features for the validation set.

- **y_test**:  
  List of shuffled labels for the testing set.

- **x_test**:  
  Numpy array of shuffled features for the testing set.

- **yy_train**:  
  Categorical labels for the training set.
  
- **yy_val**:  
  Categorical labels for the validation set.

- **yy_test**:  
  Categorical labels for the testing set.

- **y_test_names**:  
  List of names for the testing set.

- **y_test_speaker**:  
  List of speakers for the testing set.

- **y_test_segment_begin**:  
  List of starting sample indices for segments in the testing set.

- **y_test_segment_end**:  
  List of ending sample indices for segments in the testing set.

- **label_mapping**:  
  Mapping of original labels to encoded categorical labels.


In [6]:
def features(x_train, y_train, x_val, y_val, x_test, y_test, y_test_names, y_test_speaker, y_test_segment_begin, y_test_segment_end, df_learn):
    x_train, y_train = shuffle(x_train, y_train, random_state=42)
    x_val, y_val = shuffle(x_val, y_val, random_state=42)
    if y_test_names is None:
        x_test, y_test = shuffle(x_test, y_test, random_state=42)
    else:
        x_test, y_test, y_test_names, y_test_speaker, y_test_segment_begin, y_test_segment_end = shuffle(x_test, y_test, y_test_names, y_test_speaker, y_test_segment_begin, y_test_segment_end, random_state=42)

    # Encode the classification labels
    le = LabelEncoder()
    le.fit(sorted(df_learn['class'].unique().tolist()))
    yy_train = to_categorical(le.transform(y_train))   
    yy_val = to_categorical(le.transform(y_val))
    yy_test = to_categorical(le.transform(y_test))
    
    label_mapping = dict(zip(y_train, yy_train))
    
    return y_train, x_train, y_val, x_val, y_test, x_test, yy_train, yy_val, yy_test, y_test_names, y_test_speaker, y_test_segment_begin, y_test_segment_end, label_mapping


## get Features for Training and Validation
### Parameters:

- **x_train**:  
  Numpy array of features for the training set.

- **y_train**:  
  List of labels for the training set.

- **x_val**:  
  Numpy array of features for the validation set.

- **y_val**:  
  List of labels for the validation set.

- **df_learn**:  
  DataFrame containing audio data used for encoding labels.

### Returns:

- **y_train**:  
  List of shuffled labels for the training set.

- **x_train**:  
  Numpy array of shuffled features for the training set.

- **y_val**:  
  List of shuffled labels for the validation set.

- **x_val**:  
  Numpy array of shuffled features for the validation set.

- **yy_train**:  
  Categorical labels for the training set.

- **yy_val**:  
  Categorical labels for the validation set.

- **label_mapping**:  
  Mapping of original labels to encoded categorical labels.


In [7]:
def features_finetune_only(x_train, y_train, x_val, y_val, df_learn):
    x_train, y_train = shuffle(x_train, y_train, random_state=42)
    x_val, y_val = shuffle(x_val, y_val, random_state=42)

    # Encode the classification labels
    le = LabelEncoder()
    le.fit(sorted(df_learn['class'].unique().tolist()))
    yy_train = to_categorical(le.transform(y_train))
    yy_val = to_categorical(le.transform(y_val))
    
    label_mapping = dict(zip(y_train, yy_train))
    
    return y_train, x_train, y_val, x_val, yy_train, yy_val, label_mapping


## get Features for Training, Validation, Calibration Validation and Test
### Parameters:

- **x_train**:  
  Numpy array of features for the training set.

- **y_train**:  
  List of labels for the training set.

- **x_val**:  
  Numpy array of features for the validation set.

- **y_val**:  
  List of labels for the validation set.
  
- **x_val_cal**:  
  Numpy array of features for the validation set for Calibration.

- **y_val_cal**:  
  List of labels for the validation set for Calibration.
  
- **x_test**:  
  Numpy array of features for the testing set.

- **y_test**:  
  List of labels for the testing set.

- **df_learn**:  
  DataFrame containing audio data used for encoding labels.

### Returns:

- **y_train**:  
  List of shuffled labels for the training set.

- **x_train**:  
  Numpy array of shuffled features for the training set.

- **y_val**:  
  List of shuffled labels for the validation set.

- **x_val**:  
  Numpy array of shuffled features for the validation set.
  
- **y_val_cal**:  
  List of shuffled labels for the validation set for Calibration.

- **x_val_cal**:  
  Numpy array of shuffled features for the validation set for Calibration.

- **y_test**:  
  List of shuffled labels for the testing set.

- **x_test**:  
  Numpy array of shuffled features for the testing set.

- **yy_train**:  
  Categorical labels for the training set.

- **yy_val**:  
  Categorical labels for the validation set.
  
- **yy_val_cal**:  
  Categorical labels for the validation set.
  
- **yy_test**:  
  Categorical labels for the testing set.

- **label_mapping**:  
  Mapping of original labels to encoded categorical labels.


In [8]:
def features_train_only_cal(x_train, y_train, x_val, y_val, x_val_cal, y_val_cal, x_test, y_test, df_learn):
    x_train, y_train = shuffle(x_train, y_train, random_state=42)
    x_val, y_val = shuffle(x_val, y_val, random_state=42)
    x_val_cal, y_val_cal = shuffle(x_val_cal, y_val_cal, random_state=42)
    x_test, y_test = shuffle(x_test, y_test, random_state=42)

    # Encode the classification labels
    le = LabelEncoder()
    le.fit(sorted(df_learn['class'].unique().tolist()))
    yy_train = to_categorical(le.transform(y_train))
    yy_val = to_categorical(le.transform(y_val))
    yy_val_cal = to_categorical(le.transform(y_val_cal))
    yy_test = to_categorical(le.transform(y_test))
    
    label_mapping = dict(zip(y_train, yy_train))
    
    return y_train, x_train, y_val, x_val, y_val_cal, x_val_cal, y_test, x_test, yy_train, yy_val, yy_val_cal, yy_test, label_mapping


## create model
### Parameters:

- **df_learn**:  
  DataFrame containing audio data used for model training.

- **x_train**:  
  Numpy array of features for the training set.

- **lr**:  
  Learning rate for model optimization.

- **dr**:  
  Dropout rate for regularization.

- **units**:  
  Number of units/neurons in the dense layers of the model.

- **l1_val**:  
  L1 regularization parameter for the dense layers.

- **l2_val**:  
  L2 regularization parameter for the dense layers.

- **alpha_val**:  
  Alpha parameter for LeakyReLU.
  
- **finetune_only**:  
  True if the run is only for finetuning.
  

### Returns:

- **model**:  
  Compiled Keras model for classification.

- **callback**:  
  EarlyStopping callback to monitor validation loss and restore the best weights during training.


In [None]:
def create_model(df_learn, x_train, lr, dr, units, l1_val, l2_val, alpha_val, finetune_only, hyper_test=False):
    
    seed_value = 42
    he_initializer = HeNormal(seed=seed_value)
    bias_initializer = Constant(value=0.0)
    
    classes = df_learn['class'].unique().tolist()
    num_labels = len(classes)

    METRICS = [
        keras.metrics.TruePositives(name='tp'),
        keras.metrics.FalsePositives(name='fp'),
        keras.metrics.TrueNegatives(name='tn'),
        keras.metrics.FalseNegatives(name='fn'), 
        keras.metrics.BinaryAccuracy(name='accuracy'),
        keras.metrics.Precision(name='precision'),
        keras.metrics.Recall(name='recall'),
        keras.metrics.AUC(name='auc'),
        keras.metrics.AUC(name='prc', curve='PR')
    ]

    def build_model_graph(metrics=METRICS):
        model = Sequential()
        #model.add(BatchNormalization(input_shape=(np.array(x_train).shape[-1],), name='BatchNorm'))
        model.add(LayerNormalization(input_shape=(np.array(x_train).shape[-1],), name='LayerNorm')) 
        model.add(Dense(units*2,
                        kernel_regularizer=l2(l2_val), activity_regularizer=l1(l1_val),
                        kernel_initializer=he_initializer, bias_initializer=bias_initializer, name='Dense1'))
        model.add(LeakyReLU(alpha=alpha_val))
        model.add(Dropout(dr, seed=seed_value, name='Dropout1'))

        model.add(Dense(units,
                        kernel_regularizer=l2(l2_val), activity_regularizer=l1(l1_val),
                        kernel_initializer=he_initializer, bias_initializer=bias_initializer, name='Dense2'))
        model.add(LeakyReLU(alpha=alpha_val))
        model.add(Dropout(dr, seed=seed_value+1, name='Dropout2'))

        model.add(Dense(num_labels, name='Output'))
        model.add(Activation('softmax'))

        if finetune_only:
            model.load_weights('model_weights_train.h5')
            for layer in model.layers:
                layer.trainable = (layer.name == 'Output')
                    
        model.compile(loss='categorical_crossentropy', metrics=['accuracy'], optimizer=Adam(learning_rate = lr))

        return model

    model = build_model_graph()
    #model.save_weights("initial_weights.h5")
    if not hyper_test and not finetune_only:
        model.load_weights("initial_weights.h5")

    print(model.summary())

    callback = EarlyStopping(
        monitor="val_loss",
        mode='min',
        min_delta=0.005,
        patience=10,
        verbose=1,
        baseline=None,
        restore_best_weights=True,
    )

    return model, callback


## train Model
### Parameters:

- **x_train**:  
  Numpy array of features for the training set.

- **x_val**:  
  Numpy array of features for the validation set.

- **yy_train**:  
  Categorical labels for the training set.

- **yy_val**:  
  Categorical labels for the validation set.

- **class_weights**:  
  Dictionary of class weights for handling class imbalance.

- **model**:  
  Compiled Keras model for training.

- **batch_size**:  
  Batch size used for training.

- **num_epochs**:  
  Number of epochs for training.

- **callback**:  
  EarlyStopping callback for monitoring validation loss.

- **i**:  
  Index used for TensorBoard log directory.

- **lr**:  
  Learning rate used in the model.

- **dr**:  
  Dropout rate used in the model.

- **units**:  
  Number of units/neurons in the dense layers of the model.

- **l1_val**:  
  L1 regularization parameter used in the model.

- **l2_val**:  
  L2 regularization parameter used in the model.
  
- **alpha_val**:  
  Alpha parameter for LeakyReLU.

- **tb**:  
  Boolean indicating whether to enable TensorBoard logging.

- **log_dir**:  
  Directory path for TensorBoard logs.
  
- **train_only**:  
  True if the run is only for training.
  
- **finetune_only**:  
  True if the run is only for finetuning.


### Returns:

- **history**:  
  History object containing training metrics and loss values.


In [10]:
def weights(y_train):
    class_weights = class_weight.compute_class_weight(class_weight='balanced', classes=np.unique(y_train), y=y_train)
    class_weights = dict(enumerate(class_weights))
    print('Class weights:', class_weights)
    return class_weights


In [11]:
def train_model(x_train, x_val, yy_train, yy_val, class_weights, model, batch_size, num_epochs, callback, i, lr, dr, units, l1_val, l2_val, alpha_val, tb, log_dir, train_only, finetune_only):
      
    if tb:
        log_dir = log_dir + str(i) + "lr_" + str(lr) + "dr_" + str(dr) + "units_" + str(units) + "l1_" + str(l1_val) + "l2_" + str(l2_val) + "alpha_" + str(alpha_val) + "bs_" + str(batch_size)
        tensorboard_callback = keras.callbacks.TensorBoard(
            log_dir=(log_dir), histogram_freq=1,
        )
        #callbacks = [tensorboard_callback, callback]
        callbacks = [tensorboard_callback]
    else:
        #callbacks = [callback]
        callbacks = []
    
    history = model.fit(np.array(x_train), yy_train, batch_size=batch_size, epochs=num_epochs,
                            validation_data=(np.array(x_val), yy_val), verbose=1,
                            shuffle=False, class_weight=class_weights, callbacks=callbacks)
    
    if train_only:
        model.save_weights('model_weights_train.h5')
        
    if finetune_only:
        model.save_weights('model_weights_finetune.h5')
    
    return history


## test Model

In [12]:
def pred(x_test, yy_test, model):
    pred_test = model.predict(np.array(x_test))
        
    classes_x=np.argmax(pred_test,axis=1)
    classes_true=np.argmax(yy_test,axis=1)
    df_result = pd.DataFrame(list(zip(classes_x, classes_true)), columns=['Pred', 'True'])
    
    return df_result, classes_x, classes_true, pred_test


In [13]:
def get_false(y_test_names, y_test_speaker, y_test_segment_begin, y_test_segment_end, df_result):
    indices = df_result.index[df_result['Pred'] != df_result['True']].tolist()
    false_names = [y_test_names[index] for index in indices]
    false_speaker = [y_test_speaker[index] for index in indices]
    false_segments_begin = [y_test_segment_begin[index] for index in indices]
    false_segments_end = [y_test_segment_end[index] for index in indices]
    false_simplified = [(y_test_names[index] + ' ' + str(y_test_segment_begin[index]/16000)) for index in indices]
    return false_names, false_speaker, false_segments_begin, false_segments_end, false_simplified


In [14]:
def firstPictures(history, num_epochs):
    train_loss = history.history['loss']
    val_loss = history.history['val_loss']

    plt.figure(figsize=(12, 8))
    plt.plot(train_loss, label='Training loss', color='#185fad')
    plt.plot(val_loss, label='Validation loss', color='orange')
    plt.title('Training and Validation loss by Epoch', fontsize = 25)
    plt.xlabel('Epoch', fontsize = 18)
    plt.xticks(range(0,num_epochs,5), range(0,num_epochs,5))
    plt.legend(fontsize = 18)
    plt.savefig('ex_loss_epoch_.png', bbox_inches='tight')
    plt.close()
    
    train_loss = history.history['accuracy']
    val_loss = history.history['val_accuracy']

    plt.figure(figsize=(12, 8))
    plt.plot(train_loss, label='Training Accuracy', color='#185fad')
    plt.plot(val_loss, label='Validation Accuracy', color='orange')
    plt.title('Training and Validation Accuracy by Epoch', fontsize = 25)
    plt.xlabel('Epoch', fontsize = 18)
    plt.xticks(range(0,num_epochs,5), range(0,num_epochs,5))
    plt.legend(fontsize = 18)
    plt.savefig('ex_acc_epoch.png', bbox_inches='tight')
    plt.close()
    

## main function
### Parameters:

- **first_pictures**:  
  Boolean indicating whether to generate plots for the runs during training.

- **df_learn**:  
  DataFrame containing audio data used for model training.

- **df_learn_aug**:  
  Augmented DataFrame containing audio data, if available.

- **name_aug**:  
  A name identifier used for loading augmented data or specifying augmentation settings.

- **i**:  
  Index used for TensorBoard log directory.

- **lr**:  
  Learning rate used in the model.

- **dr**:  
  Dropout rate used in the model.

- **units**:  
  Number of units/neurons in the dense layers of the model.

- **l1_val**:  
  L1 regularization parameter used in the dense layers.

- **l2_val**:  
  L2 regularization parameter used in the dense layers.
  
- **alpha_val**:  
  Alpha parameter for LeakyReLU.

- **batch_size**:  
  Batch size used for training.

- **tb**:  
  Boolean indicating whether to enable TensorBoard logging.

- **log_dir**:  
  Directory path for TensorBoard logs.

- **max_epochs**:  
  Maximum number of epochs for training.

### Returns:

- **list_row**:  
  A list containing various metrics and data for evaluation and analysis.

- **label_mapping**:  
  Mapping of original dialect labels to encoded categorical labels.

### Description:

This function orchestrates the entire workflow for training and evaluating the classification model for binary classification. It performs data preprocessing, model creation, training, evaluation, and result extraction. It returns a list of evaluation metrics and data for analysis, along with a mapping of original labels to encoded categorical labels.


In [15]:
def do_all(first_pictures, df_learn, df_learn_aug, name_aug, i, lr, dr, units, l1_val, l2_val, alpha_val, batch_size, tb, log_dir, max_epochs, hyper_test=False):

    x_train, y_train, x_test, y_test, x_val, y_val, y_test_names, y_test_speaker, y_test_segment_begin, y_test_segment_end = train_test(df_learn, df_learn_aug, name_aug)
    y_train, x_train, y_val, x_val, y_test, x_test, yy_train, yy_val, yy_test, y_test_names, y_test_speaker, y_test_segment_begin, y_test_segment_end, label_mapping = features(x_train, y_train, x_val, y_val, x_test, y_test, y_test_names, y_test_speaker, y_test_segment_begin, y_test_segment_end, df_learn)
    class_weights = weights(y_train)
    model, callback = create_model(df_learn, x_train, lr, dr, units, l1_val, l2_val, alpha_val, False, hyper_test)
    history = train_model(x_train, x_val, yy_train, yy_val, class_weights, model, batch_size, max_epochs, callback, i, lr, dr, units, l1_val, l2_val, alpha_val, tb, log_dir, False, False)
    df_result, classes_x, classes_true, pred_test = pred(x_test, yy_test, model)
    loss, accuracy = model.evaluate(np.array(x_test), yy_test, verbose=0)  
    false_names, false_speaker, false_segments_begin, false_segments_end, false_simplified = get_false(y_test_names, y_test_speaker, y_test_segment_begin, y_test_segment_end, df_result)
    if first_pictures:
        firstPictures(history, max_epochs)
    
    list_row = [history.history['accuracy'][-1], history.history['val_accuracy'][-1], accuracy,
                history.history['loss'][-1], history.history['val_loss'][-1], loss,
                false_simplified, classes_x, classes_true, pred_test]
    
    return list_row, label_mapping


## main function for Finetuning
### Parameters:

- **first_pictures**:  
  Boolean indicating whether to generate plots for the runs during training.

- **df_learn**:  
  DataFrame containing audio data used for model training.

- **df_learn_aug**:  
  Augmented DataFrame containing audio data, if available.

- **name_aug**:  
  A name identifier used for loading augmented data or specifying augmentation settings.

- **lr**:  
  Learning rate used in the model.

- **dr**:  
  Dropout rate used in the model.

- **units**:  
  Number of units/neurons in the dense layers of the model.

- **l1_val**:  
  L1 regularization parameter used in the dense layers.

- **l2_val**:  
  L2 regularization parameter used in the dense layers.
  
- **alpha_val**:  
  Alpha parameter for LeakyReLU.

- **batch_size**:  
  Batch size used for training.

- **max_epochs**:  
  Maximum number of epochs for training.

### Returns:

- **list_row**:  
  A list containing various metrics and data for evaluation and analysis.

- **label_mapping**:  
  Mapping of original dialect labels to encoded categorical labels.

### Description:

This function orchestrates the entire workflow for finetunng the classification model for binary classification. It performs data preprocessing, model creation, loading model weights, training and saving the model weights.


In [16]:
def do_all_finetune(first_pictures, df_learn, df_learn_aug, name_aug, lr, dr, units, l1_val, l2_val, alpha_val, batch_size, max_epochs):

    x_train, y_train, x_val, y_val = train_val(df_learn, df_learn_aug, name_aug)
    y_train, x_train, y_val, x_val, yy_train, yy_val, label_mapping = features_finetune_only(x_train, y_train, x_val, y_val, df_learn)
    class_weights = weights(y_train)
    model, callback = create_model(df_learn, x_train, lr, dr, units, l1_val, l2_val, alpha_val, True)
    history = train_model(x_train, x_val, yy_train, yy_val, class_weights, model, batch_size, max_epochs, callback, 0, lr, dr, units, l1_val, l2_val, alpha_val, False, '', False, True)
    if first_pictures:
        firstPictures(history, max_epochs)
    
    list_row = [history.history['accuracy'][-1], history.history['val_accuracy'][-1], '',
                history.history['loss'][-1], history.history['val_loss'][-1], '',
                '', '', '', '']

    return list_row, label_mapping
        

## main function for Training
### Parameters:

- **first_pictures**:  
  Boolean indicating whether to generate plots for the runs during training.

- **df_learn**:  
  DataFrame containing audio data used for model training.

- **df_learn_aug**:  
  Augmented DataFrame containing audio data, if available.

- **name_aug**:  
  A name identifier used for loading augmented data or specifying augmentation settings.

- **lr**:  
  Learning rate used in the model.

- **dr**:  
  Dropout rate used in the model.

- **units**:  
  Number of units/neurons in the dense layers of the model.

- **l1_val**:  
  L1 regularization parameter used in the dense layers.

- **l2_val**:  
  L2 regularization parameter used in the dense layers.
  
- **alpha_val**:  
  Alpha parameter for LeakyReLU.

- **batch_size**:  
  Batch size used for training.

- **max_epochs**:  
  Maximum number of epochs for training.
  
- **calibration**:  
  Name of post-hoc probability calibration Method.
  
### Returns:

- **list_row**:  
  A list containing various metrics and data for evaluation and analysis.

- **label_mapping**:  
  Mapping of original dialect labels to encoded categorical labels.

### Description:

This function orchestrates the entire workflow for training the classification model for binary classification. It performs data preprocessing, model creation, training, calibration and saving the model weights.


In [17]:
def nll_loss(logits, temperature, labels):
    scaled_logits = logits / temperature
    loss = tf.reduce_mean(tf.nn.sparse_softmax_cross_entropy_with_logits(
        labels=labels, logits=scaled_logits))
    return loss


In [18]:
def do_all_train(first_pictures, df_learn, df_learn_aug, name_aug, lr, dr, units, l1_val, l2_val, alpha_val, batch_size, max_epochs, calibration):

    if calibration is not None:
        x_train, y_train, x_test, y_test, x_val, y_val, x_val_cal, y_val_cal, _, _, _, _ = train_cal(df_learn, df_learn_aug, name_aug)
        y_train, x_train, y_val, x_val, y_val_cal, x_val_cal, y_test, x_test, yy_train, yy_val, yy_val_cal, yy_test, label_mapping = features_train_only_cal(x_train, y_train, x_val, y_val, x_val_cal, y_val_cal, x_test, y_test, df_learn)
    else:
        x_train, y_train, x_test, y_test, x_val, y_val, _, _, _, _ = train_test(df_learn, df_learn_aug, name_aug)
        y_train, x_train, y_val, x_val, y_test, x_test, yy_train, yy_val, yy_test, _, _, _, _, label_mapping = features(x_train, y_train, x_val, y_val, x_test, y_test, None, None, None, None, df_learn)
    class_weights = weights(y_train)
    model, callback = create_model(df_learn, x_train, lr, dr, units, l1_val, l2_val, alpha_val, False)
    history = train_model(x_train, x_val, yy_train, yy_val, class_weights, model, batch_size, max_epochs, callback, 0, lr, dr, units, l1_val, l2_val, alpha_val, False, '', True, False)
    if first_pictures:
        firstPictures(history, max_epochs)
    
    list_row = [history.history['accuracy'][-1], history.history['val_accuracy'][-1], '',
                history.history['loss'][-1], history.history['val_loss'][-1], '',
                '', '', '', '']

    labels_test = np.argmax(yy_test, axis=1)
    if calibration is not None:
        model_logits = keras.Model(inputs=model.input, outputs=model.get_layer("Output").output)
        logits_val = model_logits.predict(x_val_cal, verbose=0)
        logits_test = model_logits.predict(x_test, verbose=0)
        
        if calibration == 'temperature_scaling':
            logits = tf.convert_to_tensor(logits_val, dtype=tf.float32)
            labels = tf.convert_to_tensor(np.argmax(yy_val_cal, axis=1), dtype=tf.int32)
            
            with tf.device("/CPU:0"):
                temperature = tf.Variable(1.0, dtype=tf.float32)
                optimizer = tf.keras.optimizers.Adam(learning_rate=0.001)
                for _ in range(500):
                    with tf.GradientTape() as tape:
                        loss = nll_loss(logits, temperature, labels)
                    grads = tape.gradient(loss, [temperature])
                    optimizer.apply_gradients(zip(grads, [temperature]))
            print(f"Optimized temperature: {temperature.numpy()}")
            
            calibrated_probs = tf.nn.softmax(logits_test / temperature).numpy()
            labels_temp = np.argmax(yy_test, axis=1)

            np.save("calibrator_temperature.npy", temperature.numpy())
            reliability_diagram_from_preds(calibrated_probs, labels_temp, filename="reliability_diagramm_temperature.png")
            np.savez("reliability_data_temperature.npz", probs=calibrated_probs, labels=labels_temp)
            
        elif calibration == 'platt':
            labels_val = np.argmax(yy_val_cal, axis=1)
            binary_logits = logits_val[:, 1].reshape(-1, 1)
            platt_model = LogisticRegression(solver='lbfgs')
            platt_model.fit(binary_logits, labels_val)

            calibrated_probs = platt_model.predict_proba(logits_test[:, 1].reshape(-1, 1))[:, 1]
            calibrated_probs = np.stack([1 - calibrated_probs, calibrated_probs], axis=1)
            
            joblib.dump(platt_model, "calibrator_platt.pkl")
            reliability_diagram_from_preds(calibrated_probs, labels_test, filename="reliability_diagramm_platt.png")
            np.savez("reliability_data_platt.npz", probs=calibrated_probs, labels=labels_test)
            
        elif calibration == 'isotonic':
            labels_val = np.argmax(yy_val_cal, axis=1)
            binary_logits = logits_val[:, 1]
            iso_reg = IsotonicRegression(out_of_bounds='clip')
            iso_reg.fit(binary_logits, labels_val)

            calibrated_probs = iso_reg.predict(logits_test[:, 1])
            calibrated_probs = np.stack([1 - calibrated_probs, calibrated_probs], axis=1)

            joblib.dump(iso_reg, "calibrator_isotonic.pkl")
            reliability_diagram_from_preds(calibrated_probs, labels_test, filename="reliability_diagramm_isotonic.png")
            np.savez("reliability_data_isotonic.npz", probs=calibrated_probs, labels=labels_test)

        elif calibration == 'beta':
            probs_val = tf.nn.softmax(logits_val).numpy()
            probs_test = tf.nn.softmax(logits_test).numpy()
            p_val = probs_val[:, 1]
            labels_val = np.argmax(yy_val_cal, axis=1)
            
            beta_calibrator = BetaCalibration(parameters="abm")
            beta_calibrator.fit(p_val, labels_val)

            p_test = probs_test[:, 1]
            calibrated_probs_pos = beta_calibrator.predict(p_test)
            calibrated_probs = np.stack([1 - calibrated_probs_pos, calibrated_probs_pos], axis=1)

            joblib.dump(beta_calibrator, "calibrator_beta.pkl")
            reliability_diagram_from_preds(calibrated_probs, labels_test, filename="reliability_diagramm_beta.png")
            np.savez("reliability_data_beta.npz", probs=calibrated_probs, labels=labels_test)
            
        elif calibration == 'histogram':
            probs_val = tf.nn.softmax(logits_val).numpy()
            probs_test = tf.nn.softmax(logits_test).numpy()

            p_val = probs_val[:, 1]
            p_test = probs_test[:, 1]
            labels_val = np.argmax(yy_val_cal, axis=1)

            n_bins = 20
            binner = KBinsDiscretizer(n_bins=n_bins, encode='ordinal', strategy='uniform')
            bin_ids = binner.fit_transform(p_val.reshape(-1, 1)).astype(int).flatten()

            bin_probs = np.zeros(n_bins)
            for i in range(n_bins):
                in_bin = bin_ids == i
                if np.any(in_bin):
                    bin_probs[i] = labels_val[in_bin].mean()
                else:
                    bin_probs[i] = 0.5  # neutral fallback

            test_bin_ids = binner.transform(p_test.reshape(-1, 1)).astype(int).flatten()
            calibrated_probs_pos = np.array([bin_probs[i] for i in test_bin_ids])
            calibrated_probs = np.stack([1 - calibrated_probs_pos, calibrated_probs_pos], axis=1)

            joblib.dump((binner, bin_probs), "calibrator_histogram.pkl")
            reliability_diagram_from_preds(calibrated_probs, labels_test, filename="reliability_diagramm_histogram.png")
            np.savez("reliability_data_histogram.npz", probs=calibrated_probs, labels=labels_test)

    else:
        probs = model.predict(x_test, verbose=0)

        reliability_diagram_from_preds(probs, labels_test, filename="reliability_diagramm_global_uncalibrated.png")
        np.savez("reliability_data_global_uncalibrated.npz", probs=probs, labels=labels_test)

    return list_row, label_mapping
        

## main function for Testing
### Parameters:

- **df_learn**:  
  DataFrame containing audio data used for model training.

- **df_learn_test**:  
  DataFrame containing audio data used for model testing.

- **lr**:  
  Learning rate used in the model.

- **dr**:  
  Dropout rate used in the model.

- **units**:  
  Number of units/neurons in the dense layers of the model.

- **l1_val**:  
  L1 regularization parameter used in the dense layers.

- **l2_val**:  
  L2 regularization parameter used in the dense layers.
  
- **alpha_val**:  
  Alpha parameter for LeakyReLU.
  
- **calibration**:  
  Name of post-hoc probability calibration Method.

### Returns:

- **predictions**:  
  A list containing all predictions.
  
- **x_samples_begin**:  
  A list containing all beginning indices of segments from predictions.
  
- **x_samples_end**:  
  A list containing all ending indices of segments from predictions.

### Description:

This function orchestrates the entire workflow for testing the classification model for dialect classification.


In [None]:
def do_all_test(df_learn, df_test, lr, dr, units, l1_val, l2_val, alpha_val, calibration, test_only=True):

    x_test = np.asarray(df_test['trillsson'].tolist())
    x_samples_begin = np.asarray(df_test['samples_begin'].tolist())
    x_samples_end = np.asarray(df_test['samples_end'].tolist())

    model, callback = create_model(df_learn, x_test, lr, dr, units, l1_val, l2_val, alpha_val, False)
    model.load_weights('model_weights_finetune.h5')

    model_logits = keras.Model(inputs=model.input, outputs=model.get_layer("Output").output)
    logits_test = model_logits.predict(x_test, verbose=0)

    if calibration:
        if calibration == 'temperature_scaling':
            temperature = np.load("calibrator_temperature.npy")
            calibrated_probs = tf.nn.softmax(logits_test / temperature).numpy()

        elif calibration == 'platt':
            platt_model = joblib.load("calibrator_platt.pkl")
            calibrated_probs_pos = platt_model.predict_proba(logits_test[:, 1].reshape(-1, 1))[:, 1]
            calibrated_probs = np.stack([1 - calibrated_probs_pos, calibrated_probs_pos], axis=1)

        elif calibration == 'isotonic':
            iso_reg = joblib.load("calibrator_isotonic.pkl")
            calibrated_probs_pos = iso_reg.predict(logits_test[:, 1])
            calibrated_probs = np.stack([1 - calibrated_probs_pos, calibrated_probs_pos], axis=1)

        elif calibration == 'beta':
            beta_calibrator = joblib.load("calibrator_beta.pkl")
            calibrated_probs_pos = beta_calibrator.predict(tf.nn.softmax(logits_test)[:, 1])
            calibrated_probs = np.stack([1 - calibrated_probs_pos, calibrated_probs_pos], axis=1)
            
        elif calibration == 'histogram':
            binner, bin_probs = joblib.load("calibrator_histogram.pkl")
            probs_test = tf.nn.softmax(logits_test).numpy()
            p_test = probs_test[:, 1]

            test_bin_ids = binner.transform(p_test.reshape(-1, 1)).astype(int).flatten()
            calibrated_probs_pos = np.array([bin_probs[i] for i in test_bin_ids])
            calibrated_probs = np.stack([1 - calibrated_probs_pos, calibrated_probs_pos], axis=1)

        predictions = calibrated_probs
    else:
        predictions = model.predict(x_test)

    return predictions, x_samples_begin, x_samples_end


## plots the reliability diagram
### Parameters:

- **pred_proba**:  
  List of predicted probabilitys.

- **true_labels**:  
  List of the true labels.


In [20]:
def reliability_diagram_from_preds(pred_proba, true_labels, filename="reliability_diagramm_finetuned.png", n_bins=20):
    from sklearn.calibration import calibration_curve
    import numpy as np
    import matplotlib.pyplot as plt

    y_proba = pred_proba[:, 1] if pred_proba.shape[1] == 2 else np.max(pred_proba, axis=1)
    prob_true, prob_pred = calibration_curve(true_labels, y_proba, n_bins=n_bins, strategy='uniform')

    # ECE berechnen
    bins = np.linspace(0, 1, n_bins + 1)
    bin_indices = np.digitize(y_proba, bins) - 1
    ece = 0.0
    for i in range(n_bins):
        bin_mask = bin_indices == i
        bin_size = np.sum(bin_mask)
        if bin_size > 0:
            bin_confidence = np.mean(y_proba[bin_mask])
            bin_accuracy = np.mean(true_labels[bin_mask])
            ece += (bin_size / len(y_proba)) * np.abs(bin_confidence - bin_accuracy)

    counts, _ = np.histogram(y_proba, bins=bins)
    bin_centers = (bins[:-1] + bins[1:]) / 2

    plt.figure(figsize=(8, 6))
    plt.plot(prob_pred, prob_true, marker='o', label='Model')
    plt.plot([0, 1], [0, 1], linestyle='--', label='Perfect Calibration')
    plt.title('Reliability Diagram')

    for center, count in zip(bin_centers, counts):
        plt.text(center, 0.05, str(count), ha='center', va='bottom', fontsize=8)

    plt.text(0.95, 0.05, f"ECE = {ece:.4f}", ha='right', va='bottom', fontsize=10, transform=plt.gca().transAxes)
    plt.legend()
    plt.savefig(filename)
