In [None]:
from keras.layers import Dense, Dropout, Activation, LeakyReLU, Conv1D, GlobalAveragePooling1D, Flatten, MaxPooling1D,  BatchNormalization 
from tensorflow.keras.initializers import HeNormal, Constant
from sklearn.metrics import accuracy_score, f1_score
from sklearn.utils import class_weight, shuffle
from tensorflow.keras.models import load_model
from sklearn.preprocessing import LabelEncoder
from keras.regularizers import l1, l2, l1_l2
from keras.callbacks import ModelCheckpoint
from tensorflow.keras.models import Model
from keras.callbacks import EarlyStopping
from sklearn.dummy import DummyClassifier
from keras.utils import to_categorical
from keras.models import Sequential
from keras.optimizers import Adam
import matplotlib.pyplot as plt
from keras import backend as K
from random import sample
import tensorflow as tf
import keras.metrics
import pandas as pd
import numpy as np
import random

import tensorflow_addons as tfa

seed_value = 42
random.seed(seed_value)
np.random.seed(seed_value)
tf.random.set_seed(seed_value)



## GPU

In [None]:
physical_devices = tf.config.experimental.list_physical_devices('GPU')
tf.config.experimental.set_memory_growth(physical_devices[0], True)

## divide intro Train, Validation and Test
### Parameters:

- **df**:  
  The original DataFrame containing audio data.

- **df_aug**:  
  The augmented DataFrame, if available.

- **name_aug**:  
  A name identifier used for loading augmented data or specifying augmentation settings.

### Returns:

- **x_train**:  
  Numpy array of features for the training set.

- **y_train**:  
  List of dialect labels for the training set.

- **x_test**:  
  Numpy array of features for the testing set.

- **y_test**:  
  List of dialect labels for the testing set.

- **x_val**:  
  Numpy array of features for the validation set.

- **y_val**:  
  List of dialect labels for the validation set.

- **y_test_names**:  
  List of names for the testing set.

- **y_test_speaker**:  
  List of speakers for the testing set.

- **y_test_segment_begin**:  
  List of starting sample indices for segments in the testing set.

- **y_test_segment_end**:  
  List of ending sample indices for segments in the testing set.


In [3]:
def train_test(df, df_aug, name_aug, random_state=42):
    
    test_list = []
    val_list = []
    train_list = []

    for cls in df['class'].unique():
        df_cls = df[df['class'] == cls]
        test_sample = df_cls.sample(frac=0.1, random_state=random_state)
        remaining = df_cls.drop(test_sample.index)
        val_sample = remaining.sample(frac=10/90, random_state=random_state)
        train_sample = remaining.drop(val_sample.index)

        test_list.append(test_sample)
        val_list.append(val_sample)
        train_list.append(train_sample)

    test_df = pd.concat(test_list)
    val_df = pd.concat(val_list)
    train_df = pd.concat(train_list)

    if df_aug is not None:
        df_aug = df_aug.copy()
        df_aug['base_file_name'] = df_aug['file_name'].apply(lambda x: x.split('_', 1)[1] if '_' in x else x)
        eval_df = pd.concat([val_df, test_df]).copy()
        eval_df['base_file_name'] = eval_df['file_name']
        merge_keys = ['base_file_name', 'samples_begin', 'samples_end', 'class']
        merged = pd.merge(df_aug, eval_df[merge_keys], on=merge_keys, how='left', indicator=True)
        df_aug_filtered = merged[merged['_merge'] == 'left_only'].drop(columns=['_merge', 'base_file_name'])
        train_df = pd.concat([train_df, df_aug_filtered], ignore_index=True)

    x_train = np.asarray(train_df['trillsson'].tolist())
    y_train = train_df['class'].tolist()
    x_val = np.asarray(val_df['trillsson'].tolist())
    y_val = val_df['class'].tolist()
    x_test = np.asarray(test_df['trillsson'].tolist())
    y_test = test_df['class'].tolist()

    y_test_names = test_df.file_name.tolist()
    y_test_speaker = test_df.speaker.tolist()
    y_test_segment_begin = test_df.samples_begin.tolist()
    y_test_segment_end = test_df.samples_end.tolist()
   
    return x_train, y_train, x_test, y_test, x_val, y_val, y_test_names, y_test_speaker, y_test_segment_begin, y_test_segment_end


## Divide into Train and Validation
### Parameters:

- **df**:  
  The original DataFrame containing audio data.

- **df_aug**:  
  The augmented DataFrame, if available.

- **name_aug**:  
  A name identifier used for loading augmented data or specifying augmentation settings.

### Returns:

- **x_train**:  
  Numpy array of features for the training set.

- **y_train**:  
  List of dialect labels for the training set.

- **x_val**:  
  Numpy array of features for the validation set.

- **y_val**:  
  List of dialect labels for the validation set.


In [4]:
def train_val(df, df_aug, name_aug, random_state=42):

    val_list = []
    for cls in df['class'].unique():
        df_cls = df[df['class'] == cls]
        val_sample = df_cls.sample(frac=0.1, random_state=random_state)
        val_list.append(val_sample)
    val = pd.concat(val_list)
    train = df.drop(val.index)

    if df_aug is not None:
        df_aug = df_aug.copy()
        df_aug['base_file_name'] = df_aug['file_name'].apply(lambda x: x.split('_', 1)[1] if '_' in x else x)
        val = val.copy()
        val['base_file_name'] = val['file_name']
        merge_keys = ['base_file_name', 'samples_begin', 'samples_end', 'class']
        merged = pd.merge(df_aug, val[merge_keys], on=merge_keys, how='left', indicator=True)
        df_aug_filtered = merged[merged['_merge'] == 'left_only'].drop(columns=['_merge', 'base_file_name'])
        train = pd.concat([train, df_aug_filtered], ignore_index=True)

    x_train = np.asarray(train['trillsson'].tolist())
    y_train = train['class'].tolist()
    x_val = np.asarray(val['trillsson'].tolist())
    y_val = val['class'].tolist()

    return x_train, y_train, x_val, y_val


## get Features
### Parameters:

- **x_train**:  
  Numpy array of features for the training set.

- **y_train**:  
  List of dialect labels for the training set.

- **x_test**:  
  Numpy array of features for the testing set.

- **y_test**:  
  List of dialect labels for the testing set.

- **x_val**:  
  Numpy array of features for the validation set.

- **y_val**:  
  List of dialect labels for the validation set.

- **y_test_names**:  
  List of names for the testing set.

- **y_test_speaker**:  
  List of speakers for the testing set.

- **y_test_segment_begin**:  
  List of starting sample indices for segments in the testing set.

- **y_test_segment_end**:  
  List of ending sample indices for segments in the testing set.

- **df_learn**:  
  DataFrame containing audio data used for encoding labels.

### Returns:

- **y_train**:  
  List of shuffled dialect labels for the training set.

- **x_train**:  
  Numpy array of shuffled features for the training set.

- **y_val**:  
  List of shuffled dialect labels for the validation set.

- **x_val**:  
  Numpy array of shuffled features for the validation set.

- **y_test**:  
  List of shuffled dialect labels for the testing set.

- **x_test**:  
  Numpy array of shuffled features for the testing set.

- **yy_train**:  
  Categorical labels for the training set.

- **yy_test**:  
  Categorical labels for the testing set.

- **yy_val**:  
  Categorical labels for the validation set.

- **y_test_names**:  
  List of names for the testing set.

- **y_test_speaker**:  
  List of speakers for the testing set.

- **y_test_segment_begin**:  
  List of starting sample indices for segments in the testing set.

- **y_test_segment_end**:  
  List of ending sample indices for segments in the testing set.

- **label_mapping**:  
  Mapping of original dialect labels to encoded categorical labels.


In [5]:
def features(x_train, y_train, x_test, y_test, x_val, y_val, y_test_names, y_test_speaker, y_test_segment_begin, y_test_segment_end, df_learn):
    y_train, x_train = shuffle(y_train, x_train, random_state=42)
    y_val, x_val = shuffle(y_val, x_val, random_state=42+1)
    y_test, x_test, y_test_names, y_test_speaker, y_test_segment_begin, y_test_segment_end = shuffle(y_test, x_test, y_test_names, y_test_speaker, y_test_segment_begin, y_test_segment_end, random_state=42+2)

    # Encode the classification labels
    le = LabelEncoder()
    le.fit(sorted(df_learn['class'].unique().tolist()))
    yy_train = to_categorical(le.transform(y_train))    
    yy_test = to_categorical(le.transform(y_test))
    yy_val = to_categorical(le.transform(y_val))
    
    label_mapping = dict(zip(y_train, yy_train))
    
    return y_train, x_train, y_val, x_val, y_test, x_test, yy_train, yy_test, yy_val, y_test_names, y_test_speaker, y_test_segment_begin, y_test_segment_end, label_mapping


## get Features for Training and Validation
### Parameters:

- **x_train**:  
  Numpy array of features for the training set.

- **y_train**:  
  List of dialect labels for the training set.

- **x_val**:  
  Numpy array of features for the validation set.

- **y_val**:  
  List of dialect labels for the validation set.

- **df_learn**:  
  DataFrame containing audio data used for encoding labels.

### Returns:

- **y_train**:  
  List of shuffled dialect labels for the training set.

- **x_train**:  
  Numpy array of shuffled features for the training set.

- **y_val**:  
  List of shuffled dialect labels for the validation set.

- **x_val**:  
  Numpy array of shuffled features for the validation set.

- **yy_train**:  
  Categorical labels for the training set.

- **yy_val**:  
  Categorical labels for the validation set.

- **label_mapping**:  
  Mapping of original dialect labels to encoded categorical labels.


In [6]:
def features_train_only(x_train, y_train, x_val, y_val, df_learn):
    y_train, x_train = shuffle(y_train, x_train, random_state=42+3)
    y_val, x_val = shuffle(y_val, x_val, random_state=42+4)

    # Encode the classification labels
    le = LabelEncoder()
    le.fit(sorted(df_learn['class'].unique().tolist()))
    yy_train = to_categorical(le.transform(y_train))
    yy_val = to_categorical(le.transform(y_val))
    
    label_mapping = dict(zip(y_train, yy_train))
    
    return y_train, x_train, y_val, x_val, yy_train, yy_val, label_mapping


## create model
### Parameters:

- **df_learn**:  
  DataFrame containing audio data used for model training.

- **x_train**:  
  Numpy array of features for the training set.

- **lr**:  
  Learning rate for model optimization.

- **dr**:  
  Dropout rate for regularization.

- **units**:  
  Number of units/neurons in the dense layers of the model.

- **l1_val**:  
  L1 regularization parameter for the dense layers.

- **l2_val**:  
  L2 regularization parameter for the dense layers.

- **alpha_val**:  
  Alpha parameter for LeakyReLU.
  
- **finetune_only**:  
  True if the run is only for finetuning.
  

### Returns:

- **model**:  
  Compiled Keras model for classification.

- **callback**:  
  EarlyStopping callback to monitor validation loss and restore the best weights during training.


In [7]:
def create_model(df_learn, x_train, lr, dr, units, l1_val, l2_val, alpha_val, finetune_only):
    
    seed_value = 42
    he_initializer = HeNormal(seed=seed_value)
    bias_initializer = Constant(value=0.0)
    
    classes = df_learn['class'].unique().tolist()
    num_labels = len(classes)

    METRICS = [
        keras.metrics.TruePositives(name='tp'),
        keras.metrics.FalsePositives(name='fp'),
        keras.metrics.TrueNegatives(name='tn'),
        keras.metrics.FalseNegatives(name='fn'), 
        keras.metrics.BinaryAccuracy(name='accuracy'),
        keras.metrics.Precision(name='precision'),
        keras.metrics.Recall(name='recall'),
        keras.metrics.AUC(name='auc'),
        keras.metrics.AUC(name='prc', curve='PR')
    ]

    def build_model_graph(metrics=METRICS):
        model = Sequential()
        model.add(BatchNormalization(input_shape=(np.array(x_train).shape[-1],), name='BatchNorm'))
        model.add(Dense(units*2,
                        kernel_regularizer=l2(l2_val), activity_regularizer=l1(l1_val),
                        kernel_initializer=he_initializer, bias_initializer=bias_initializer, name='Dense1'))
        model.add(LeakyReLU(alpha=alpha_val))
        model.add(Dropout(dr, seed=seed_value, name='Dropout1'))

        model.add(Dense(units,
                        kernel_regularizer=l2(l2_val), activity_regularizer=l1(l1_val),
                        kernel_initializer=he_initializer, bias_initializer=bias_initializer, name='Dense2'))
        model.add(LeakyReLU(alpha=alpha_val))
        model.add(Dropout(dr, seed=seed_value+1, name='Dropout2'))

        model.add(Dense(num_labels, name='Output'))
        model.add(Activation('softmax'))
        model.compile(loss='categorical_crossentropy', metrics=['accuracy'],
                      optimizer=Adam(learning_rate = lr))

        if finetune_only:
            model.load_weights('model_weights_train.h5')
            dense2_index = model.layers.index(model.get_layer("Dense2"))
            for layer in model.layers[:dense2_index]:
                layer.trainable = False

        return model

    model = build_model_graph()
    print(model.summary())

    callback = EarlyStopping(
        monitor="val_loss",
        mode='min',
        min_delta=0.005,
        patience=10,
        verbose=1,
        baseline=None,
        restore_best_weights=True,
    )

    return model, callback


## train Model
### Parameters:

- **x_train**:  
  Numpy array of features for the training set.

- **x_val**:  
  Numpy array of features for the validation set.

- **yy_train**:  
  Categorical labels for the training set.

- **yy_val**:  
  Categorical labels for the validation set.

- **class_weights**:  
  Dictionary of class weights for handling class imbalance.

- **model**:  
  Compiled Keras model for training.

- **batch_size**:  
  Batch size used for training.

- **num_epochs**:  
  Number of epochs for training.

- **callback**:  
  EarlyStopping callback for monitoring validation loss.

- **i**:  
  Index used for TensorBoard log directory.

- **lr**:  
  Learning rate used in the model.

- **dr**:  
  Dropout rate used in the model.

- **units**:  
  Number of units/neurons in the dense layers of the model.

- **l1_val**:  
  L1 regularization parameter used in the model.

- **l2_val**:  
  L2 regularization parameter used in the model.
  
- **alpha_val**:  
  Alpha parameter for LeakyReLU.

- **tb**:  
  Boolean indicating whether to enable TensorBoard logging.

- **log_dir**:  
  Directory path for TensorBoard logs.
  
- **train_only**:  
  True if the run is only for training.
  
- **finetune_only**:  
  True if the run is only for finetuning.


### Returns:

- **history**:  
  History object containing training metrics and loss values.


In [8]:
def weights(y_train):
    class_weights = class_weight.compute_class_weight(class_weight='balanced', classes=np.unique(y_train), y=y_train)
    class_weights = dict(enumerate(class_weights))
    print('Class weights:', class_weights)
    return class_weights


In [9]:
def train_model(x_train, x_val, yy_train, yy_val, class_weights, model, batch_size, num_epochs, callback, i, lr, dr, units, l1_val, l2_val, alpha_val, tb, log_dir, train_only, finetune_only):
      
    if tb:
        log_dir = log_dir + str(i) + "lr_" + str(lr) + "dr_" + str(dr) + "units_" + str(units) + "l1_" + str(l1_val) + "l2_" + str(l2_val) + "alpha_" + str(alpha_val) + "bs_" + str(batch_size)
        tensorboard_callback = keras.callbacks.TensorBoard(
            log_dir=(log_dir), histogram_freq=1,
        )
        callbacks = [callback, tensorboard_callback]
    else:
        callbacks = [callback]
    
    history = model.fit(np.array(x_train), yy_train, batch_size=batch_size, epochs=num_epochs,
                            validation_data=(np.array(x_val), yy_val), verbose=1,
                            shuffle=False, class_weight=class_weights, callbacks=callbacks)
    
    if train_only:
        model.save_weights('model_weights_train.h5')
        
    if finetune_only:
        model.save_weights('model_weights_finetune.h5')
    
    return history


## test Model

In [10]:
def pred(x_test, yy_test, model):
    pred_test = model.predict(np.array(x_test))
        
    classes_x=np.argmax(pred_test,axis=1)
    classes_true=np.argmax(yy_test,axis=1)
    df_result = pd.DataFrame(list(zip(classes_x, classes_true)), columns=['Pred', 'True'])
    
    return df_result, classes_x, classes_true, pred_test


In [11]:
def get_false(y_test_names, y_test_speaker, y_test_segment_begin, y_test_segment_end, df_result):
    indices = df_result.index[df_result['Pred'] != df_result['True']].tolist()
    false_names = [y_test_names[index] for index in indices]
    false_speaker = [y_test_speaker[index] for index in indices]
    false_segments_begin = [y_test_segment_begin[index] for index in indices]
    false_segments_end = [y_test_segment_end[index] for index in indices]
    false_simplified = [(y_test_names[index] + ' ' + str(y_test_segment_begin[index]/16000)) for index in indices]
    return false_names, false_speaker, false_segments_begin, false_segments_end, false_simplified


In [12]:
def firstPictures(history, num_epochs):
    train_loss = history.history['loss']
    val_loss = history.history['val_loss']

    plt.figure(figsize=(12, 8))
    plt.plot(train_loss, label='Training loss', color='#185fad')
    plt.plot(val_loss, label='Validation loss', color='orange')
    plt.title('Training and Validation loss by Epoch', fontsize = 25)
    plt.xlabel('Epoch', fontsize = 18)
    plt.xticks(range(0,num_epochs,5), range(0,num_epochs,5))
    plt.legend(fontsize = 18)
    plt.savefig('ex_loss_epoch_.png', bbox_inches='tight')
    plt.close()
    
    train_loss = history.history['accuracy']
    val_loss = history.history['val_accuracy']

    plt.figure(figsize=(12, 8))
    plt.plot(train_loss, label='Training Accuracy', color='#185fad')
    plt.plot(val_loss, label='Validation Accuracy', color='orange')
    plt.title('Training and Validation Accuracy by Epoch', fontsize = 25)
    plt.xlabel('Epoch', fontsize = 18)
    plt.xticks(range(0,num_epochs,5), range(0,num_epochs,5))
    plt.legend(fontsize = 18)
    plt.savefig('ex_acc_epoch.png', bbox_inches='tight')
    plt.close()
    

## main function
### Parameters:

- **first_pictures**:  
  Boolean indicating whether to generate plots for the runs during training.

- **df_learn**:  
  DataFrame containing audio data used for model training.

- **df_learn_aug**:  
  Augmented DataFrame containing audio data, if available.

- **name_aug**:  
  A name identifier used for loading augmented data or specifying augmentation settings.

- **i**:  
  Index used for TensorBoard log directory.

- **lr**:  
  Learning rate used in the model.

- **dr**:  
  Dropout rate used in the model.

- **units**:  
  Number of units/neurons in the dense layers of the model.

- **l1_val**:  
  L1 regularization parameter used in the dense layers.

- **l2_val**:  
  L2 regularization parameter used in the dense layers.

- **batch_size**:  
  Batch size used for training.

- **tb**:  
  Boolean indicating whether to enable TensorBoard logging.

- **log_dir**:  
  Directory path for TensorBoard logs.

- **max_epochs**:  
  Maximum number of epochs for training.

### Returns:

- **list_row**:  
  A list containing various metrics and data for evaluation and analysis.

- **label_mapping**:  
  Mapping of original dialect labels to encoded categorical labels.

### Description:

This function orchestrates the entire workflow for training and evaluating the classification model for dialect classification. It performs data preprocessing, model creation, training, evaluation, and result extraction. Depending on the parameters, it either trains a custom neural network model or a DummyClassifier. It returns a list of evaluation metrics and data for analysis, along with a mapping of original dialect labels to encoded categorical labels.


In [13]:
def do_all(first_pictures, df_learn, df_learn_aug, name_aug, i, lr, dr, units, l1_val, l2_val, alpha_val, batch_size, tb, log_dir, max_epochs):

    x_train, y_train, x_test, y_test, x_val, y_val, y_test_names, y_test_speaker, y_test_segment_begin, y_test_segment_end = train_test(df_learn, df_learn_aug, name_aug)
    y_train, x_train, y_val, x_val, y_test, x_test, yy_train, yy_test, yy_val, y_test_names, y_test_speaker, y_test_segment_begin, y_test_segment_end, label_mapping = features(x_train, y_train, x_test, y_test, x_val, y_val, y_test_names, y_test_speaker, y_test_segment_begin, y_test_segment_end, df_learn)
    class_weights = weights(y_train)
    model, callback = create_model(df_learn, x_train, lr, dr, units, l1_val, l2_val, alpha_val, False)
    history = train_model(x_train, x_val, yy_train, yy_val, class_weights, model, batch_size, max_epochs, callback, i, lr, dr, units, l1_val, l2_val, alpha_val, tb, log_dir, False, False)
    df_result, classes_x, classes_true, pred_test = pred(x_test, yy_test, model)
    loss, accuracy = model.evaluate(np.array(x_test), yy_test, verbose=0)  
    false_names, false_speaker, false_segments_begin, false_segments_end, false_simplified = get_false(y_test_names, y_test_speaker, y_test_segment_begin, y_test_segment_end, df_result)
    if first_pictures:
        firstPictures(history, max_epochs)
    
    list_row = [history.history['accuracy'][-1], history.history['val_accuracy'][-1], accuracy,
                history.history['loss'][-1], history.history['val_loss'][-1], loss,
                false_simplified, classes_x, classes_true, pred_test]
    
    return list_row, label_mapping


## main function for Training
### Parameters:

- **model_num**
  The Number of the Model for saving the weights

- **first_pictures**:  
  Boolean indicating whether to generate plots for the runs during training.

- **df_learn**:  
  DataFrame containing audio data used for model training.

- **df_learn_aug**:  
  Augmented DataFrame containing audio data, if available.

- **name_aug**:  
  A name identifier used for loading augmented data or specifying augmentation settings.

- **lr**:  
  Learning rate used in the model.

- **dr**:  
  Dropout rate used in the model.

- **units**:  
  Number of units/neurons in the dense layers of the model.

- **l1_val**:  
  L1 regularization parameter used in the dense layers.

- **l2_val**:  
  L2 regularization parameter used in the dense layers.

- **batch_size**:  
  Batch size used for training.

- **max_epochs**:  
  Maximum number of epochs for training.
  
- **finetune_only**:  
  True if the run is only for finetuning.
  

### Returns:

- **list_row**:  
  A list containing various metrics and data for evaluation and analysis.

- **label_mapping**:  
  Mapping of original dialect labels to encoded categorical labels.

### Description:

This function orchestrates the entire workflow for training the classification model for dialect classification. It performs data preprocessing, model creation, training and saving the model weights.


In [14]:
def do_all_train_finetune(first_pictures, df_learn, df_learn_aug, name_aug, lr, dr, units, l1_val, l2_val, alpha_val, batch_size, max_epochs, finetune_only):

    x_train, y_train, x_val, y_val = train_val(df_learn, df_learn_aug, name_aug)
    y_train, x_train, y_val, x_val, yy_train, yy_val, label_mapping = features_train_only(x_train, y_train, x_val, y_val, df_learn)
    class_weights = weights(y_train)
    model, callback = create_model(df_learn, x_train, lr, dr, units, l1_val, l2_val, alpha_val, finetune_only)
    history = train_model(x_train, x_val, yy_train, yy_val, class_weights, model, batch_size, max_epochs, callback, 0, lr, dr, units, l1_val, l2_val, alpha_val, False, '', not finetune_only, finetune_only)
    if first_pictures:
        firstPictures(history, max_epochs)
    
    list_row = [history.history['accuracy'][-1], history.history['val_accuracy'][-1], '',
                history.history['loss'][-1], history.history['val_loss'][-1], '',
                '', '', '', '']
    
    return list_row, label_mapping


## main function for Testing
### Parameters:

- **model_num**
  The Number of the Model for loading the right weights

- **df_learn**:  
  DataFrame containing audio data used for model training.

- **df_learn_test**:  
  DataFrame containing audio data used for model testing.

- **lr**:  
  Learning rate used in the model.

- **dr**:  
  Dropout rate used in the model.

- **units**:  
  Number of units/neurons in the dense layers of the model.

- **l1_val**:  
  L1 regularization parameter used in the dense layers.

- **l2_val**:  
  L2 regularization parameter used in the dense layers.

### Returns:

- **predictions**:  
  A list containing all predictions.

### Description:

This function orchestrates the entire workflow for testing the classification model for dialect classification.


In [15]:
def do_all_test(df_learn, df_test, lr, dr, units, l1_val, l2_val, alpha_val):
    
    x_test = np.asarray(df_test['trillsson'].tolist())
    x_samples_begin = np.asarray(df_test['samples_begin'].tolist())
    x_samples_end = np.asarray(df_test['samples_end'].tolist())
    model, callback = create_model(df_learn, x_test, lr, dr, units, l1_val, l2_val, alpha_val, False)
    model.load_weights('model_weights_finetune.h5')
    predictions = model.predict(x_test)
    
    return predictions, x_samples_begin, x_samples_end
