# Optuna Hyperparameter Tuning Notebook

## Contains a template for hyperparameter tuning
## Set parameters in the `def objective(trial):` as appropriate

In [10]:
import warnings

# Filter deprecated feature warnings from Optuna
warnings.filterwarnings("ignore", message=".*suggest_loguniform has been deprecated in v3.0.0.*")
warnings.filterwarnings("ignore", message=".*suggest_uniform has been deprecated in v3.0.0.*")

# Filter warnings related to TensorFlow optimizers on M1/M2 Macs
warnings.filterwarnings("ignore", message=".*please use the legacy Keras optimizer instead.*")

warnings.filterwarnings("ignore", message="At this time, the v2.11+ optimizer `tf.keras.optimizers.Adamax` runs slowly on M1/M2 Macs, please use the legacy Keras optimizer instead, located at `tf.keras.optimizers.legacy.Adamax`.")

In [2]:
import optuna
import pandas as pd
import numpy as np
import random
from sklearn.model_selection import train_test_split
from sklearn.model_selection import GroupShuffleSplit
from tensorflow.keras.layers import BatchNormalization
from sklearn.preprocessing import LabelEncoder
from tensorflow.keras.utils import to_categorical
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense, Dropout, Input, concatenate
from tensorflow.keras.optimizers import Adam, Adamax, SGD, RMSprop
from sklearn.preprocessing import MinMaxScaler, RobustScaler, StandardScaler
from tensorflow.keras.callbacks import EarlyStopping
import matplotlib.pyplot as plt
import seaborn as sns
from sklearn.metrics import confusion_matrix, classification_report
import tensorflow as tf
from tensorflow.keras.models import Model
from sklearn.inspection import permutation_importance
from sklearn.metrics import accuracy_score
from sklearn.model_selection import GroupKFold, StratifiedGroupKFold
from sklearn.metrics import accuracy_score, precision_score, recall_score, f1_score
from keras.regularizers import l2
from imblearn.over_sampling import SMOTE
from collections import Counter
import matplotlib.pyplot as plt
from datetime import datetime
from sklearn.utils.class_weight import compute_class_weight

In [4]:
# Specify the SQLite URL
storage_url = "sqlite:///vgg_baseline_study.db"

In [13]:
# Set a fixed random seed for reproducibility
random.seed(5390) 
np.random.seed(5390)
tf.random.set_seed(5390)

# Load the dataset from the CSV file
dataframe = pd.read_csv('/Users/astrid/PycharmProjects/audioset-thesis-work/audioset/vggish/embeddings/8april_looped_embeddings.csv')

dataframe.drop('mean_freq', axis=1, inplace=True)
dataframe.drop('gender', axis=1, inplace=True)

def assign_age_group(age, age_groups):
    for group_name, age_range in age_groups.items():
        if age_range[0] <= age < age_range[1]:
            return group_name
    return 'Unknown'  

# Define age groups
age_groups = {
    'kitten': (0, 0.5),
    'adult': (0.5, 10),
    'senior': (10, 20)
}

# create new column for the age group
dataframe['age_group'] = dataframe['target'].apply(assign_age_group, age_groups=age_groups)
dataframe['age_group'].value_counts()

adult     460
senior    306
kitten    171
Name: age_group, dtype: int64

In [14]:
def check_group_split(train_index, val_index, groups):
    """
    Check if any group is present in both training and validation sets.

    Parameters:
    - train_index: Indices for training data
    - val_index: Indices for validation data
    - groups: Array of group identifiers corresponding to each sample

    Returns:
    - Prints out any groups found in both sets and the count of such groups
    """
    train_groups = set(groups[train_index])
    val_groups = set(groups[val_index])
    common_groups = train_groups.intersection(val_groups)

    print(train_groups)
    print(val_groups)
    print(common_groups)

    if common_groups:
        print(f"Warning: Found {len(common_groups)} common groups in both training and validation sets: {common_groups}")
    else:
        print("No common groups found between training and validation sets.")

In [15]:
def check_initial_group_split(groups_train, groups_test):
    """
    Check if any group is present in both the train/validation and test sets.

    Parameters:
    - groups_train: Array of group identifiers for the train/validation set
    - groups_test: Array of group identifiers for the test set

    Returns:
    - Prints out any groups found in both sets and the count of such groups
    """
    train_groups = set(groups_train)
    test_groups = set(groups_test)
    common_groups = train_groups.intersection(test_groups)

    if common_groups:
        print(f"Warning: Found {len(common_groups)} common groups in both train/validation and test sets: {common_groups}")
    else:
        print("No common groups found between train/validation and test sets.")


In [16]:
def check_unique_cat_ids(df_train, df_val):
    """
    Check if the 'cat_id' column in df_train and df_val are unique.
    
    Parameters:
        df_train (pandas.DataFrame): DataFrame for training data.
        df_val (pandas.DataFrame): DataFrame for validation data.
        
    Returns:
        bool: True if 'cat_id' column in df_train and df_val are unique, False otherwise.
    """
    # Extract 'cat_id' column from both DataFrames
    train_cat_ids = set(df_train['cat_id'])
    val_cat_ids = set(df_val['cat_id'])
    
    # Check if there is any overlap between 'cat_id' in df_train and df_val
    is_unique = len(train_cat_ids.intersection(val_cat_ids)) == 0
    
    return is_unique

In [17]:
# Separate features and labels
X = dataframe.iloc[:, :-3].values  # all columns except the last three

# Encode the 'age_group' column as integers using LabelEncoder
label_encoder = LabelEncoder()
encoded_y = label_encoder.fit_transform(dataframe['age_group'].values)

# Now use the encoded labels for splitting and one-hot encoding
y = encoded_y  # This will be used in the GroupKFold

# Convert 'cat_id' column to numpy array to be used as groups array for GroupKFold
groups = dataframe['cat_id'].values

In [18]:
# Function to perform the swaps based on cat_id, ensuring swaps within the same age_group
def swap_cat_id_instances(dataframe, train_val_idx, test_idx, specific_cat_ids):
    for cat_id in specific_cat_ids:
        # Check if the specific cat_id is not in the training/validation set
        if cat_id not in dataframe.iloc[train_val_idx]['cat_id'].values:
            # Get the age_group of this cat_id
            age_group = dataframe[dataframe['cat_id'] == cat_id]['age_group'].iloc[0]
                
            # Find a different cat_id within the same age_group in the train/val set that is not in the test set
            other_cat_ids_in_age_group = dataframe[(dataframe['age_group'] == age_group) & 
                                                   (dataframe['cat_id'] != cat_id) &
                                                   (~dataframe['cat_id'].isin(dataframe.iloc[test_idx]['cat_id']))]['cat_id'].unique()
            
            # Choose one other cat_id for swapping
            if len(other_cat_ids_in_age_group) > 0:
                other_cat_id = np.random.choice(other_cat_ids_in_age_group)

                # Find all instances of the other_cat_id in the train/val set
                other_cat_id_train_val_indices = train_val_idx[dataframe.iloc[train_val_idx]['cat_id'] == other_cat_id]
                
                # Find all instances of the specific cat_id in the test set
                cat_id_test_indices = test_idx[dataframe.iloc[test_idx]['cat_id'] == cat_id]
                
                # Swap the indices
                train_val_idx = np.setdiff1d(train_val_idx, other_cat_id_train_val_indices, assume_unique=True)
                test_idx = np.setdiff1d(test_idx, cat_id_test_indices, assume_unique=True)

                train_val_idx = np.concatenate((train_val_idx, cat_id_test_indices))
                test_idx = np.concatenate((test_idx, other_cat_id_train_val_indices))
            else:
                print(f"No alternative cat_id found in the same age_group as {cat_id} for swapping.")
                
    return train_val_idx, test_idx

In [19]:
# Function to identify differences
def find_group_differences(original, new):
    # Convert numpy arrays to sets for easy difference computation
    original_set = set(original)
    new_set = set(new)
    # Find differences
    moved_to_new = new_set - original_set
    moved_to_original = original_set - new_set
    return moved_to_new, moved_to_original

In [20]:
# create custom logger function for local logs & stored in a .txt
def logger(message, file=None):
    print(message)
    if file is not None:
        with open(file, "a") as log_file:
            log_file.write(message + "\n")

log_file_path = "optuna_vggish_june_8.txt" 

# HYPEROPTIMISATION

In [21]:
import tensorflow as tf
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense, Dropout, BatchNormalization
from tensorflow.keras.optimizers import Adam, RMSprop, SGD, Adamax
from tensorflow.keras.callbacks import EarlyStopping
from tensorflow.keras.utils import to_categorical
from sklearn.preprocessing import StandardScaler
from sklearn.metrics import accuracy_score, precision_score, recall_score, f1_score
from sklearn.model_selection import StratifiedGroupKFold
import numpy as np
import optuna

# Define the StratifiedGroupKFold splitter for outer CV
outer_cv = StratifiedGroupKFold(n_splits=4, shuffle=True, random_state=42)

# Define the StratifiedGroupKFold splitter for inner CV
inner_cv = StratifiedGroupKFold(n_splits=4, shuffle=True, random_state=42)

def objective(trial):
    # Generate a unique identifier for the current trial
    run_id = f"trial_{trial.number}_{datetime.now().strftime('%Y-%m-%d_%H-%M-%S')}"
    
    # Hyperparameters to tune
    learning_rate = trial.suggest_float('learning_rate', 1e-4, 1e-2, log=True)
    optimizer_key = trial.suggest_categorical('optimizer', ['Adam', 'Adamax', 'RMSprop'])
    batch_size = trial.suggest_categorical('batch_size', [16, 32, 64])
    activation = trial.suggest_categorical('activation', ['relu', 'sigmoid'])
    n_layers = trial.suggest_int('n_layers', 1, 4)  # Number of layers

    # Specific hyperparameters for each layer
    units_per_layer = [trial.suggest_categorical(f'units_l{i}', [32, 64, 128, 224, 256, 480, 512]) for i in range(n_layers)]
    dropout_rate_per_layer = [trial.suggest_float(f'dropout_l{i}', 0.1, 0.5) for i in range(n_layers)]

    # inner loop metrics
    losses, accuracies, precisions, recalls, f1_scores = [], [], [], [], []

    # outer loop metrics
    outer_losses, outer_accuracies, outer_precisions, outer_recalls, outer_f1_scores = [], [], [], [], []

    # unseen test set metrics
    unseen_losses, unseen_accuracies, unseen_precisions, unseen_recalls, unseen_f1 = [], [], [], [], []

    total_inner_fold = 0
    outer_fold = 0

    try:
        # Use the splitter to generate indices for training and testing sets
        # Note: GroupShuffleSplit.split returns indices, so we use it to index the arrays
        for train_val_idx, test_idx in outer_cv.split(X, y, groups):
            outer_fold += 1
            logger(f"outer_fold {outer_fold}", file=log_file_path)

            # Convert indices back to DataFrame for easy manipulation
            df_train_val = dataframe.iloc[train_val_idx]
            df_test = dataframe.iloc[test_idx]
            
            # Get the distribution of age groups
            training_validation_age_group_distribution = df_train_val['age_group'].value_counts()
            testing_age_group_distribution = df_test['age_group'].value_counts()
            
            # Log the distribution
            logger(f"Training/Validation Set Age Group Distribution:\n{training_validation_age_group_distribution}", file=log_file_path)
            logger(f"Testing Set Age Group Distribution:\n{testing_age_group_distribution}", file=log_file_path)

            # Get the distribution of groups
            training_validation_group_distribution = df_train_val['cat_id'].value_counts()  
            testing_group_distribution = df_test['cat_id'].value_counts()  
            
            # Log the distribution
            logger(f"Training/Validation Set Group Distribution:\n{training_validation_group_distribution}", file=log_file_path)
            logger(f"Testing Set Group Distribution:\n{testing_group_distribution}", file=log_file_path)

            # Group by 'age_group' and then list unique 'cat_id' within each age group
            unique_cat_ids_per_age_group_train_val = df_train_val.groupby('age_group')['cat_id'].unique()
            unique_cat_ids_per_age_group_test = df_test.groupby('age_group')['cat_id'].unique()
            
            # Log the results
            logger(f"Unique Cat IDs per Age Group in Training/Validation Set:\n{unique_cat_ids_per_age_group_train_val}", file=log_file_path)
            logger(f"Unique Cat IDs per Age Group in Testing Set:\n{unique_cat_ids_per_age_group_test}", file=log_file_path)

            # Calculate the count of unique identifiers per age group for training/validation and testing set
            counts_train_val = {age_group: len(identifiers) for age_group, identifiers in unique_cat_ids_per_age_group_train_val.items()}
            counts_test = {age_group: len(identifiers) for age_group, identifiers in unique_cat_ids_per_age_group_test.items()}

            # Log the counts of unique identifiers per age group
            logger(f"Count of Unique Cat IDs per Age Group in Training/Validation Set:\n{counts_train_val}", file=log_file_path)
            logger(f"Count of Unique Cat IDs per Age Group in Testing Set:\n{counts_test}", file=log_file_path)
            
            X_train_val, X_test = X[train_val_idx], X[test_idx]
            y_train_val, y_test = y[train_val_idx], y[test_idx]
            groups_train_val, groups_test = groups[train_val_idx], groups[test_idx]

            # logging identifier splits 
            unique_train_val_groups = np.unique(groups_train_val)
            unique_test_groups = np.unique(groups_test)
            
            logger(f"Unique Training/Validation Group IDs:\n{unique_train_val_groups}", file=log_file_path)
            logger(f"Unique Test Group IDs:\n{unique_test_groups}", file=log_file_path)

            # check group splits
            check_initial_group_split(groups_train_val, groups_test)

            # Specify the cat_ids that must be in the training/validation set
            specific_cat_ids = ['000A', '046A']
            
            # Perform the swapping operation
            train_val_idx, test_idx = swap_cat_id_instances(dataframe, train_val_idx, test_idx, specific_cat_ids)
            
            # Re-assign the sets based on the updated indices
            X_train_val, X_test = X[train_val_idx], X[test_idx]
            y_train_val, y_test = y[train_val_idx], y[test_idx]
            new_groups_train_val, new_groups_test = groups[train_val_idx], groups[test_idx]

            # Find differences for training/validation and test sets
            moved_to_train_val, removed_from_train_val = find_group_differences(groups_train_val, new_groups_train_val)
            moved_to_test, removed_from_test = find_group_differences(groups_test, new_groups_test)
            
            # Display the results
            logger(f"Moved to Training/Validation Set:\n{moved_to_train_val}", file=log_file_path)
            logger(f"Removed from Training/Validation Set:\n{removed_from_train_val}", file=log_file_path)
            logger(f"Moved to Test Set:\n{moved_to_test}", file=log_file_path)
            logger(f"Removed from Test Set\n{removed_from_test}", file=log_file_path)

            # Update X_train_val, X_test, y_train_val, y_test, groups_train_val, groups_test based on updated indices
            X_train_val = X[train_val_idx]
            y_train_val = y[train_val_idx]
            groups_train_val = groups[train_val_idx]
            
            X_test = X[test_idx]
            y_test = y[test_idx]
            groups_test = groups[test_idx]

            # logging identifier splits again after potential swaps
            unique_train_val_groups = np.unique(groups_train_val)
            unique_test_groups = np.unique(groups_test)
            
            logger(f"AFTER SWAP - Unique Training/Validation Group IDs:\n{unique_train_val_groups}", file=log_file_path)
            logger(f"AFTER SWAP - Unique Test Group IDs:\n{unique_test_groups}", file=log_file_path)
            
            # Verify the lengths are consistent
            logger(f"Length of X_train_val:\n{len(X_train_val)}", file=log_file_path)
            logger(f"Length of y_train_val:\n{len(y_train_val)}", file=log_file_path)
            logger(f"Length of groups_train_val:\n{len(groups_train_val)}", file=log_file_path)

            # check group splits once more
            check_initial_group_split(groups_train_val, groups_test)

            # Convert the modified indices back to a DataFrame representing the updated df_train_val
            df_train_val_updated = dataframe.iloc[train_val_idx].copy()
            
            for train_index, val_index in inner_cv.split(X_train_val, y_train_val, groups=groups_train_val):
                
                total_inner_fold += 1
                logger(f"\n\nStarting total inner fold nr {total_inner_fold}:", file=log_file_path)
                
                # Perform the group split check
                check_group_split(train_index, val_index, groups_train_val)

                # Data preparation steps 
                df_train = df_train_val_updated.iloc[train_index]
                df_val = df_train_val_updated.iloc[val_index]

                unique = check_unique_cat_ids(df_train, df_val) 
        
                if unique:
                    logger(f"", file=log_file_path)
                    logger(f"NO OVERLAP IN INNER LOOP SPLIT", file=log_file_path)
                    logger(f"", file=log_file_path)
                else:
                    logger(f"", file=log_file_path)
                    logger(f"THERE IS OVERLAP IN INNER LOOP SPLIT", file=log_file_path)
                    logger(f"", file=log_file_path)
        
                df_train_cat_ids = df_train['cat_id'].sort_values().unique()
                df_val_cat_ids = df_val['cat_id'].sort_values().unique()

                logger(f"df_train_cat_ids:\n{df_train_cat_ids}", file=log_file_path)
                logger(f"df_val_cat_ids:\n{df_val_cat_ids}", file=log_file_path)
        
                # Check the distribution
                training_age_group_counts = df_train['age_group'].value_counts()
                validation_age_group_counts = df_val['age_group'].value_counts()
                logger(f"Training set age group distribution:\n{training_age_group_counts}", file=log_file_path)
                logger(f"Validation set age group distribution:\n{validation_age_group_counts}", file=log_file_path)

                X_train = df_train.iloc[:, :-3].values
                X_val = df_val.iloc[:, :-3].values

                y_train = label_encoder.transform(df_train['age_group'].values)
                y_val = label_encoder.transform(df_val['age_group'].values)
                
                # perform one final shuffle before training
                shuffle_idx = np.random.permutation(len(X_train))
                X_train = X_train[shuffle_idx]
                y_train = y_train[shuffle_idx]
                
                y_train_encoded = to_categorical(y_train)
                y_val_encoded = to_categorical(y_val)
                
                scaler = StandardScaler().fit(X_train)
                X_train_scaled = scaler.transform(X_train)
                X_val_scaled = scaler.transform(X_val)

                # Optimizer selection
                optimizers = {
                    'Adam': Adam(learning_rate=learning_rate),
                    'RMSprop': RMSprop(learning_rate=learning_rate),
                    'SGD': SGD(learning_rate=learning_rate),
                    'Adamax': Adamax(learning_rate=learning_rate)
                }

                class_weights = compute_class_weight(
                    class_weight='balanced',
                    classes=np.unique(y_train),
                    y=y_train
                )
                weight_dict = {i: class_weights[i] for i in range(len(class_weights))}
                print(f"Class Weights: {weight_dict}")
                
                # Model definition with dynamic number of layers
                model = Sequential()
                for i in range(n_layers):
                    if i == 0:
                        model.add(Dense(units_per_layer[i], activation=activation, input_shape=(X_train_scaled.shape[1],)))
                    else:
                        model.add(Dense(units_per_layer[i], activation=activation))
                    model.add(BatchNormalization())
                    model.add(Dropout(dropout_rate_per_layer[i]))
                model.add(Dense(3, activation='softmax'))
                
                optimizer = optimizers[optimizer_key]
        
                model.compile(optimizer=optimizer, loss='categorical_crossentropy', metrics=['accuracy'])
                
                # EarlyStopping function
                early_stopping = EarlyStopping(monitor='val_loss', min_delta=0.001, patience=100, verbose=1, restore_best_weights=True)

                # Training with EarlyStopping and silent verbose
                history = model.fit(X_train_scaled, y_train_encoded, validation_data=(X_val_scaled, y_val_encoded), epochs=1500,
                                    batch_size=batch_size, callbacks=[early_stopping], verbose=0, class_weight=weight_dict)
                
                # Log the number of epochs trained and the last epoch's metrics
                epochs_trained = len(history.history['loss'])
                last_loss = history.history['loss'][-1]
                last_accuracy = history.history['accuracy'][-1]
                last_val_loss = history.history['val_loss'][-1]
                last_val_accuracy = history.history['val_accuracy'][-1]
                logger(f"Epochs Trained: {epochs_trained}", file=log_file_path)
                logger(f"Last Training Loss: {last_loss}, Last Training Accuracy: {last_accuracy}", file=log_file_path)
                logger(f"Last Validation Loss: {last_val_loss}, Last Validation Accuracy: {last_val_accuracy}", file=log_file_path)
                
                # Evaluation
                val_loss, val_accuracy = model.evaluate(X_val_scaled, y_val_encoded, verbose=0)
                y_val_pred = model.predict(X_val_scaled)
                y_val_pred_class = np.argmax(y_val_pred, axis=1)
                y_val_true_class = np.argmax(y_val_encoded, axis=1)
                
                # Metric calculation
                accuracy = accuracy_score(y_val_true_class, y_val_pred_class)
                precision = precision_score(y_val_true_class, y_val_pred_class, average='macro', zero_division=0)
                recall = recall_score(y_val_true_class, y_val_pred_class, average='macro', zero_division=0)
                f1 = f1_score(y_val_true_class, y_val_pred_class, average='macro', zero_division=0)

                logger(f"\nrecall for total inner fold nr {total_inner_fold}:\n{recall}\n", file=log_file_path)
                
                # Storing metrics
                losses.append(val_loss)
                accuracies.append(accuracy)
                precisions.append(precision)
                recalls.append(recall)
                f1_scores.append(f1)
        
                # Calculate the average of each metric
                avg_loss = np.mean(losses)
                avg_accuracy = np.mean(accuracies)
                avg_precision = np.mean(precisions)
                avg_recall = np.mean(recalls)
                avg_f1 = np.mean(f1_scores)
            
                # At the end of your objective function
                trial.set_user_attr('average_loss', avg_loss)
                trial.set_user_attr('average_accuracy', avg_accuracy)
                trial.set_user_attr('average_precision', avg_precision)
                trial.set_user_attr('average_recall', avg_recall)
    
                logger(f"average_loss:\n{avg_loss}", file=log_file_path)
                logger(f"average_accuracy:\n{avg_accuracy}", file=log_file_path)
                logger(f"average_precision:\n{avg_precision}", file=log_file_path)
                logger(f"average_recall:\n{avg_recall}", file=log_file_path)

            # After inner loop, calculate and store the average metrics of the inner folds for the current outer split
            inner_average_loss = np.mean(losses)
            inner_average_accuracies = np.mean(accuracies)
            inner_average_precisions = np.mean(precisions)
            inner_average_recalls = np.mean(recalls)
            inner_average_f1_scores = np.mean(f1_scores)

            outer_losses.append(inner_average_loss)
            outer_accuracies.append(inner_average_accuracies)
            outer_precisions.append(inner_average_precisions)
            outer_recalls.append(inner_average_recalls)
            outer_f1_scores.append(inner_average_f1_scores)

            # Log the final averages
            logger(f"\nInner Average Loss for outer fold {outer_fold}: {inner_average_loss}", file=log_file_path)
            logger(f"Inner Average Accuracy for outer fold {outer_fold}: {inner_average_accuracies}", file=log_file_path)
            logger(f"Inner Average Precision for outer fold {outer_fold}: {inner_average_precisions}", file=log_file_path)
            logger(f"Inner Average Recall for outer fold {outer_fold}: {inner_average_recalls}", file=log_file_path)
            logger(f"Inner Average F1-Score for outer fold {outer_fold}: {inner_average_f1_scores}", file=log_file_path)
            
            logger(f"\n Starting training on unseen test set\n", file=log_file_path)

            # EarlyStopping callback modification: monitor 'loss' instead of 'val_loss'
            early_stopping = EarlyStopping(
                monitor='loss',  
                min_delta=0.001, 
                patience=100,  
                verbose=1,  
                restore_best_weights=True  
            )

            # one final shuffle
            outer_shuffle_idx = np.random.permutation(len(X_train_val))
            X_train_val = X_train_val[outer_shuffle_idx]
            y_train_val = y_train_val[outer_shuffle_idx]
            
            # Assuming X_train_val and X_test have been defined earlier along with y_train_val and y_test
            # Scale the features
            scaler_full = StandardScaler().fit(X_train_val)
            X_train_full_scaled = scaler_full.transform(X_train_val)
            X_test_scaled = scaler_full.transform(X_test)
            
            # Encode the labels
            y_train_full_encoded = to_categorical(y_train_val)
            y_test_encoded = to_categorical(y_test)

            # Optimizer selection
            optimizers = {
                'Adam': Adam(learning_rate=learning_rate),
                'RMSprop': RMSprop(learning_rate=learning_rate),
                'SGD': SGD(learning_rate=learning_rate),
                'Adamax': Adamax(learning_rate=learning_rate)
            }

            class_weights = compute_class_weight(
                class_weight='balanced',
                classes=np.unique(y_train_val),
                y=y_train_val
            )
            weight_dict = {i: class_weights[i] for i in range(len(class_weights))}
            print(f"Class Weights: {weight_dict}")

            # full model definition
            model_full = Sequential()
            for i in range(n_layers):
                if i == 0:
                    model_full.add(Dense(units_per_layer[i], activation=activation, input_shape=(X_train_scaled.shape[1],)))
                else:
                    model_full.add(Dense(units_per_layer[i], activation=activation))
                model_full.add(BatchNormalization())
                model_full.add(Dropout(dropout_rate_per_layer[i]))
            model_full.add(Dense(3, activation='softmax'))
            
            # Use the selected optimizer
            optimizer = optimizers[optimizer_key]
            model_full.compile(optimizer=optimizer, loss='categorical_crossentropy', metrics=['accuracy'])
            
            # Train the model on the full training set
            history_full = model_full.fit(X_train_full_scaled, y_train_full_encoded, epochs=1500, batch_size=batch_size,
                                          verbose=1, callbacks=[early_stopping], class_weight=weight_dict)
            
            # Evaluate the model on the held-out test set
            test_loss, test_accuracy = model_full.evaluate(X_test_scaled, y_test_encoded)

            y_test_pred_prob = model_full.predict(X_test_scaled)
            y_test_pred = np.argmax(y_test_pred_prob, axis=1)  # Convert probabilities to class labels
            y_test_true = np.argmax(y_test_encoded, axis=1)    # Ensure y_test is in the correct format for comparison
            
            # Calculate additional metrics
            test_precision = precision_score(y_test_true, y_test_pred, average='macro') 
            test_recall = recall_score(y_test_true, y_test_pred, average='macro')
            test_f1 = f1_score(y_test_true, y_test_pred, average='macro')

            # prepare average of outer f1 scores for Optuna optimisation
            unseen_f1.append(test_f1)
            # add remaining metrics for logging
            unseen_losses.append(test_loss)
            unseen_accuracies.append(test_accuracy)
            unseen_precisions.append(test_precision)
            unseen_recalls.append(test_recall)

            # Print final test results
            logger(f"Final Test Results - Loss: {test_loss}, Accuracy: {test_accuracy}, Precision: {test_precision}, Recall: {test_recall}, F1 Score: {test_f1}", file=log_file_path)

            # Generate the confusion matrix
            cm = confusion_matrix(y_test, y_test_pred)
            logger(f"Confusion Matrix:\n {cm}", file=log_file_path)

        # After the outer loop, calculate the overall average metrics across all outer splits
        final_avg_loss = np.mean(outer_losses)
        final_avg_accuracy = np.mean(outer_accuracies)
        final_avg_precision = np.mean(outer_precisions)
        final_avg_recall = np.mean(outer_recalls)
        final_avg_f1 = np.mean(outer_f1_scores)
        
        # Log the averages from all folds in this trial (5 outer splits * 5 inner cvs = 25) 
        logger(f"\nFinal Total Averages over all folds in current trial (5 outer splits * 5 inner cvs = 25):", file=log_file_path)
        logger(f"\nFinal Average Loss: {final_avg_loss}", file=log_file_path)
        logger(f"Final Average Accuracy: {final_avg_accuracy}", file=log_file_path)
        logger(f"Final Average Precision: {final_avg_precision}", file=log_file_path)
        logger(f"Final Average Recall: {final_avg_recall}", file=log_file_path)
        logger(f"Final Average F1-Score: {final_avg_f1}", file=log_file_path)

        # After evaluating all outer folds, calculate the overall average F1 score across all outer folds for Optuna
        unseen_set_avg_f1 = np.mean(unseen_f1)
        # And the remaining metrics for logging
        unseen_set_avg_loss = np.mean(unseen_losses)
        unseen_set_avg_acc = np.mean(unseen_accuracies)
        unseen_set_avg_precision = np.mean(unseen_precisions)
        unseen_set_avg_recall = np.mean(unseen_recalls)

        logger(f"\nFinal Average F1-Score across all OUTER UNSEEN TEST sets in current trial {trial.number}: {unseen_set_avg_f1}", file=log_file_path)
        logger(f"\nFinal Average Loss across all OUTER UNSEEN TEST sets in current trial {trial.number}: {unseen_set_avg_loss}", file=log_file_path)
        logger(f"\nFinal Average Accuracy across all OUTER UNSEEN TEST sets in current trial {trial.number}: {unseen_set_avg_acc}", file=log_file_path)
        logger(f"\nFinal Average Precision across all OUTER UNSEEN TEST sets in current trial {trial.number}: {unseen_set_avg_precision}", file=log_file_path)
        logger(f"\nFinal Average Recall across all OUTER UNSEEN TEST sets in current trial {trial.number}: {unseen_set_avg_recall}\n", file=log_file_path)
        
        # Return the final average F1-score for Optuna optimisation 
        # Return the average F1 from the INNER folds to optimise and aiming to generalise well on unseen outer data as well
        # We dont want to optimise on the outer data as this might just learn the 'unseen' data then 
        return final_avg_f1

    except Exception as e:
        print(f"!EXCEPTION ERROR!: {e}")
        logger(f"!EXCEPTION ERROR!: {e}", file=log_file_path)
        return 0


In [22]:
def custom_callback(study, trial):
    # Access the latest completed trial
    latest_trial = study.trials[-1]

    logger(f"Finished trial # {latest_trial.number} with value: {latest_trial.value}.", file=log_file_path)
    logger(f"Best so far #:\n{study.best_value:.4f}", file=log_file_path)

    if study.best_trial:
        logger(f"Best parameters: #:\n{study.best_value:.4f}", file=log_file_path)
        for key, value in study.best_trial.params.items():
            logger(f"    {key}: {value}\n", file=log_file_path)

In [23]:
tf.keras.backend.clear_session()

In [None]:
# Enable logging to display information about the optimization process
optuna.logging.set_verbosity(optuna.logging.INFO)

# Create or load the study
study = optuna.create_study(study_name="vggish_9_june", storage=storage_url, direction="maximize", load_if_exists=True)

study.enqueue_trial({
    'activation': 'relu',
    'learning_rate': 0.00038188800331973483,
    'optimizer': 'Adamax',
    'batch_size': 32,
    'n_layers': 1,  
    'units_l0': 480, 
    'dropout_l0': 0.27188281261238406,  
})

# Pass the callback to the optimize method
study.optimize(objective, callbacks=[custom_callback])

# Display the information about the best trial
logger(f"Number of finished trials: {len(study.trials)}", file=log_file_path)

logger(f"Best trial:", file=log_file_path)
trial = study.best_trial

logger(f"  Value: {trial.value}", file=log_file_path)
logger(f"  Params: ", file=log_file_path)
for key, value in trial.params.items():
    logger(f"    {key}: {value}", file=log_file_path)

[I 2024-06-10 09:27:25,619] A new study created in RDB with name: vggish_9_june


outer_fold 1
Training/Validation Set Age Group Distribution:
adult     327
senior    261
kitten    153
Name: age_group, dtype: int64
Testing Set Age Group Distribution:
adult     133
senior     45
kitten     18
Name: age_group, dtype: int64
Training/Validation Set Group Distribution:
046A    63
000A    39
103A    33
047A    28
057A    27
        ..
041A     1
092A     1
049A     1
043A     1
090A     1
Name: cat_id, Length: 86, dtype: int64
Testing Set Group Distribution:
002B    32
074A    25
020A    23
106A    14
059A    14
068A    11
014B    10
072A     9
095A     8
117A     7
099A     7
044A     5
025C     5
021A     5
062A     4
104A     4
014A     3
032A     2
076A     1
048A     1
004A     1
115A     1
110A     1
100A     1
024A     1
026B     1
Name: cat_id, dtype: int64
Unique Cat IDs per Age Group in Training/Validation Set:
age_group
adult     [006A, 000A, 033A, 071A, 097B, 019A, 067A, 022...
kitten    [111A, 040A, 046A, 047A, 042A, 109A, 050A, 043...
senior    [093A, 015A, 

In [5]:
# Create or load the study
study = optuna.create_study(study_name="vggish_9_june", storage=storage_url, direction="maximize", load_if_exists=True)

# Get the number of trials
num_trials = len(study.trials)

print("Number of trials:", num_trials)

[I 2024-06-10 09:41:53,955] Using an existing study with name 'vggish_9_june' instead of creating a new one.


Number of trials: 3


In [74]:
# Access the best trial
best_trial = study.best_trial

print("Best trial ID:", best_trial.number)
print("Best trial value:", best_trial.value)
print("Best trial parameters:", best_trial.params)

Best trial ID: 42
Best trial value: 0.6505760166987988
Best trial parameters: {'learning_rate': 0.00854117044509888, 'optimizer': 'Adam', 'batch_size': 32, 'activation': 'relu', 'n_layers': 1, 'units_l0': 32, 'dropout_l0': 0.17219006828082012}


In [75]:
trial_index = 44
particular_trial = study.trials[trial_index]

# Access trial attributes
trial_params = particular_trial.params
trial_value = particular_trial.value

print("Parameters of the particular trial:", trial_params)
print("Value of the particular trial:", trial_value)

Parameters of the particular trial: {'learning_rate': 0.004757171053968943, 'optimizer': 'Adam', 'batch_size': 32, 'activation': 'relu', 'n_layers': 1, 'units_l0': 512, 'dropout_l0': 0.20956287952922775}
Value of the particular trial: 0.622956389506926
