In [None]:
from google.colab import drive
drive.mount("/content/drive")

Mounted at /content/drive


In [None]:
%load_ext autoreload
%autoreload 2

In [None]:
!pip install tensorflow



In [None]:
!pip install bokeh



In [None]:
import glob
import os
import warnings
import numpy as np

from sklearn.model_selection import train_test_split
from tqdm.notebook import tqdm

import sys
sys.path.append('/content/drive/MyDrive/MasterThesis/utils')
from sound_utils import extract_log_mel_windows, generate_dataset_from_list, load_sound_file
from misc import build_files_list, dump_pickle, load_pickle
from eval_perf import (
    get_prediction,
    plot_confusion_matrix,
    plot_histogram_by_class,
    plot_loss_per_epoch,
    plot_pr_curve,
    plot_roc_curve,
)

np.random.seed(42)

In [None]:
import tensorflow as tf

from tensorflow.keras.callbacks import EarlyStopping
from tensorflow.keras.layers import Input, Dense, BatchNormalization, Activation
from tensorflow.keras.models import Model
from tensorflow.keras.optimizers import Adam
from tensorflow.python.client import device_lib

tf.random.set_seed(42)

In [None]:
from bokeh.io import export_svgs, output_notebook, reset_output
from bokeh.models import BoxAnnotation, ColumnDataSource, HoverTool
from bokeh.plotting import figure, show
from sklearn.metrics import (
    accuracy_score,
    confusion_matrix,
    f1_score,
    precision_score,
    recall_score,
    average_precision_score,
    precision_recall_curve,
    roc_auc_score,
    roc_curve,
)

output_notebook()

In [None]:
#random seeds for reproducibility
np.random.seed(42)
tf.random.set_seed(42)

In [None]:
n_fft = 1024
hop_length = 512
n_mels = 80
frames = 5

In [None]:
from psutil import virtual_memory
ram_gb = virtual_memory().total / 1e9
print('Your runtime has {:.1f} gigabytes of available RAM\n'.format(ram_gb))

if ram_gb < 20:
  print('Not using a high-RAM runtime')
else:
  print('You are using a high-RAM runtime!')

Your runtime has 89.6 gigabytes of available RAM

You are using a high-RAM runtime!


#Processing pipeline


1.   Load data
2.   Split into training, test and validation sets
3.   Extract log-Mel spectrograms
4.   Save the spectrograms



In [None]:
root_dir = "/content/drive/MyDrive/ToyCar_data"
DATA_PATH = "/content/drive/MyDrive/ToyCar_data/ToyADMOS-anomaly-detection"
MODEL_PATH = "/content/drive/MyDrive/ToyCar_data/ToyADMOS-anomaly-detection"

In [None]:
def build_files_list(root_dir):
    normal_files = []
    abnormal_files = []

    for root, _, files in os.walk(top=root_dir):
        for name in files:
            full_path = os.path.join(root, name)
            if root == "/content/drive/MyDrive/ToyCar_data/NormalSound":
                normal_files.append(full_path)
            elif root == "/content/drive/MyDrive/ToyCar_data/AnomalousSound":
                abnormal_files.append(full_path)

    return normal_files, abnormal_files

In [None]:
#Train and Validation sets

np.random.seed(42)

normal_files, abnormal_files = build_files_list(root_dir)

# Randomly sample 50% of each
normal_sample_indices = np.random.choice(len(normal_files), size=len(normal_files) // 2, replace=False)
abnormal_sample_indices = np.random.choice(len(abnormal_files), size=len(abnormal_files) // 2, replace=False)

normal_files_sampled = [normal_files[i] for i in normal_sample_indices]
abnormal_files_sampled = [abnormal_files[i] for i in abnormal_sample_indices]

# Create labels for the sampled files
normal_labels = np.zeros(len(normal_files_sampled))
abnormal_labels = np.ones(len(abnormal_files_sampled))

# Split normal files into train/val
train_files, val_files, train_labels, val_labels = train_test_split(
    normal_files_sampled, normal_labels, train_size=0.8, random_state=42, shuffle=True
)

# Add abnormal files to val set
val_files = np.concatenate((val_files, abnormal_files_sampled), axis=0)
val_labels = np.concatenate((val_labels, abnormal_labels), axis=0)

# Shuffle val set
val_indices = np.arange(len(val_files))
np.random.shuffle(val_indices)

val_files = val_files[val_indices]
val_labels = val_labels[val_indices]

# Print dataset stats
print(
    f"Train set has {train_labels.shape[0]} signals including abnormal {train_labels.sum():.0f} signals. "
    f"Validation set has {val_labels.shape[0]} signals including abnormal {val_labels.sum():.0f} signals."
)


Train set has 2160 signals including abnormal 0 signals. Validation set has 1069 signals including abnormal 529 signals.


In [None]:
#Test set

np.random.seed(42)

normal_files, abnormal_files = build_files_list(root_dir)

# Get the other half (the unselected indices)
other_normal_sample_indices = np.setdiff1d(np.arange(len(normal_files)), normal_sample_indices)
other_abnormal_sample_indices = np.setdiff1d(np.arange(len(abnormal_files)), abnormal_sample_indices)

other_normal_files_sampled = [normal_files[i] for i in other_normal_sample_indices]
other_abnormal_files_sampled = [abnormal_files[i] for i in other_abnormal_sample_indices]

# Create labels for the sampled files
other_normal_labels = np.zeros(len(other_normal_files_sampled))
other_abnormal_labels = np.ones(len(other_abnormal_files_sampled))

# Split normal files into train/test
other_train_files, test_files, other_train_labels, test_labels = train_test_split(
    other_normal_files_sampled, other_normal_labels, train_size=0.8, random_state=42, shuffle=True
)

# Add abnormal files to test set
test_files = np.concatenate((test_files, other_abnormal_files_sampled), axis=0)
test_labels = np.concatenate((test_labels, other_abnormal_labels), axis=0)

# Shuffle test set
test_indices = np.arange(len(test_files))
np.random.shuffle(test_indices)

test_files = test_files[test_indices]
test_labels = test_labels[test_indices]

# Print dataset stats
print(
    f"Test set has {test_labels.shape[0]} signals including abnormal {test_labels.sum():.0f} signals."
)


Test set has 1070 signals including abnormal 530 signals.


In [None]:
dataset = {
    "train_files": train_files,
    "val_files": val_files,
    "test_files": test_files,
    "train_labels": train_labels,
    "val_labels": val_labels,
    "test_labels": test_labels,
}

for key, values in dataset.items():
    file_name = os.path.join(DATA_PATH, "dataset", key + "_AE.txt")
    with open(file_name, "w") as f:
        for item in values:
            f.write(str(item) + "\n")

In [None]:
# Extract spectrograms for training set

train_data_path = os.path.join(DATA_PATH, "dataset", "train_data_LSTM_0605" + ".pkl")

if os.path.exists(train_data_path):
    print("Train data already exists, loading from file...")
    train_data = load_pickle(train_data_path)

else:
    train_data = generate_dataset_from_list(
        train_files, n_fft=n_fft, hop_length=hop_length, n_mels=n_mels, frames=frames
    )
    print("Saving train data to disk...")
    dump_pickle(train_data_path, train_data)
    print("Done.")

print(f"Train data has a {train_data.shape} shape.")

Extracting features: 100%|██████████| 2160/2160 [31:55<00:00,  1.13it/s]


Saving train data to disk...
Done.
Train data has a (734400, 5, 80) shape.


In [None]:
# Extract spectrograms for validation set

val_data_path = os.path.join(DATA_PATH, "dataset", "val_data_LSTM_0605" + ".pkl")

if os.path.exists(val_data_path):
    print("Validation data already exists, loading from file...")
    val_data = load_pickle(val_data_path)

else:
    val_data = generate_dataset_from_list(val_files,n_fft=n_fft, hop_length=hop_length, n_mels=n_mels, frames=frames)
    print("Saving validation data to disk...")
    dump_pickle(val_data_path, val_data)
    print("Done.")

print(f"Validation data has a {val_data.shape} shape.")

Extracting features: 100%|██████████| 1069/1069 [15:28<00:00,  1.15it/s]


Saving validation data to disk...
Done.
Validation data has a (363460, 5, 80) shape.


#Model 3 : LSTM Autoencoder

In [None]:
from tensorflow.keras.models import Model
from tensorflow.keras.layers import Input, LSTM, RepeatVector, TimeDistributed, Dense, BatchNormalization
from tensorflow.keras.optimizers import Adam

# Model parameters
timesteps = 5
features = 80
latent_dim =10  # compressed representation

def LSTM_autoencoder(timesteps, features, latent_dim):
    # Input
    input_layer = Input(shape=(timesteps, features), name='encoder_input')

    # Encoder
    encoded = LSTM(80, activation='relu', return_sequences=True, name='encoder_LSTM_1')(input_layer)
    encoded = BatchNormalization(name='encoder_BN_1')(encoded)
    encoded = LSTM(40, activation='relu', return_sequences=True, name='encoder_LSTM_2')(encoded)
    encoded = BatchNormalization(name='encoder_BN_2')(encoded)
    encoded = LSTM(20, activation='relu', return_sequences=True, name='encoder_LSTM_3')(encoded)
    encoded = BatchNormalization(name='encoder_BN_3')(encoded)
    encoded = LSTM(10, activation='relu', return_sequences=True, name='encoder_LSTM_4')(encoded)
    encoded = BatchNormalization(name='encoder_BN_4')(encoded)

    encoded = LSTM(latent_dim, activation='relu',return_sequences=False, name='encoder_bottleneck')(encoded)
    #encoded = BatchNormalization(name='encoder_BN_4')(encoded)

    # Repeat the latent vector
    repeated = RepeatVector(timesteps, name='repeat_vector')(encoded)

    # Decoder
    decoded = LSTM(10, activation='relu', return_sequences=True, name='decoder_LSTM_1')(repeated)
    decoded = BatchNormalization(name='decoder_BN_1')(decoded)
    decoded = LSTM(20, activation='relu', return_sequences=True, name='decoder_LSTM_2')(decoded)
    decoded = BatchNormalization(name='decoder_BN_2')(decoded)
    decoded = LSTM(40, activation='relu', return_sequences=True, name='decoder_LSTM_3')(decoded)
    decoded = BatchNormalization(name='decoder_BN_3')(decoded)
    decoded = LSTM(80, activation='relu', return_sequences=True, name='decoder_LSTM_4')(decoded)
    decoded = BatchNormalization(name='decoder_BN_4')(decoded)


    # Output
    output = TimeDistributed(Dense(features), name='decoder_output')(decoded)

    # Define model
    model = Model(inputs=input_layer, outputs=output)
    return model

In [None]:
# Create model
autoencoder = LSTM_autoencoder(timesteps=timesteps, features=features, latent_dim=latent_dim)

# Compile model
autoencoder.compile(optimizer=Adam(learning_rate=0.001), loss='mse')

# Summary
autoencoder.summary()

In [None]:
# Define global constants to be used in this notebook
%%time
batch_size = 128
epochs = 100

autoencoder.compile(
    optimizer=Adam(learning_rate=1e-02),
    loss="mean_squared_error"
)

history = autoencoder.fit(
    train_data,train_data,
    batch_size=batch_size,
    epochs=epochs,
    verbose=1,
    callbacks=[EarlyStopping(monitor="val_loss", patience=10, restore_best_weights=True)],
    validation_data=(val_data, val_data),
    shuffle=True
)

Epoch 1/100
[1m5738/5738[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m91s[0m 12ms/step - loss: 77.3338 - val_loss: 48.7187
Epoch 2/100
[1m5738/5738[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m53s[0m 9ms/step - loss: 14.3520 - val_loss: 65.0842
Epoch 3/100
[1m5738/5738[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m53s[0m 9ms/step - loss: 13.0405 - val_loss: 80.3787
Epoch 4/100
[1m5738/5738[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m53s[0m 9ms/step - loss: 13.2367 - val_loss: 155.2551
Epoch 5/100
[1m5738/5738[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m53s[0m 9ms/step - loss: 16.4704 - val_loss: 32.6575
Epoch 6/100
[1m5738/5738[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m54s[0m 9ms/step - loss: 18.7942 - val_loss: 12608013.0000
Epoch 7/100
[1m5738/5738[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m53s[0m 9ms/step - loss: 18.8352 - val_loss: 55.2850
Epoch 8/100
[1m5738/5738[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m53s[0m 9ms/step - loss: 18.3362 - va

In [None]:
MODEL_NAME = "Model3_LSTM_AutoEncoder"

autoencoder.save(os.path.join(MODEL_PATH, MODEL_NAME + ".h5"))



In [None]:
autoencoder.save(os.path.join(MODEL_PATH, MODEL_NAME + ".h5"))



#Performance Evaluation for Validation set

---



In [None]:
plot_loss_per_epoch(
    history, model_name=MODEL_NAME
)

In [None]:
#Computes MSE (averaged_per_sample -> averaged_per_batch) between the original and reconstructed inputs.
#features = original input
#predictions = reconstructed version

from tqdm import tqdm
import numpy as np

recon_errors = []

for file_path in tqdm(val_files, desc="Evaluating validation files"):
    # Extract log-mel spectrogram windows
    features = extract_log_mel_windows(
        file_path,
        sr=16000,
        n_fft=1024,
        hop_length=512,
        n_mels=80,
        frames=5
    )

    if features.size == 0:
        recon_errors.append(np.nan)
        continue

    # Predict reconstructed features from the model
    predictions = autoencoder.predict(features, verbose=0)

    # Compute mean squared error per window and average over all windows
    mse_per_window = np.mean(np.square(features - predictions), axis=(1, 2))  # shape: (num_windows,)
    file_error = np.mean(mse_per_window)
    recon_errors.append(file_error)

Evaluating validation files: 100%|██████████| 1069/1069 [03:15<00:00,  5.45it/s]


In [None]:
recon_errors

[np.float32(10.314348),
 np.float32(9.47672),
 np.float32(11.957479),
 np.float32(9.501906),
 np.float32(16.229378),
 np.float32(14.393224),
 np.float32(15.689124),
 np.float32(15.110542),
 np.float32(10.731136),
 np.float32(13.96105),
 np.float32(9.703925),
 np.float32(15.462543),
 np.float32(11.498368),
 np.float32(10.960139),
 np.float32(9.697742),
 np.float32(9.537986),
 np.float32(10.103869),
 np.float32(15.046904),
 np.float32(10.257968),
 np.float32(13.786778),
 np.float32(10.052109),
 np.float32(14.465373),
 np.float32(10.44505),
 np.float32(14.794964),
 np.float32(15.326661),
 np.float32(9.810251),
 np.float32(15.484571),
 np.float32(17.490261),
 np.float32(14.873617),
 np.float32(16.695889),
 np.float32(10.877016),
 np.float32(17.447931),
 np.float32(11.363983),
 np.float32(21.1636),
 np.float32(9.733339),
 np.float32(9.86822),
 np.float32(14.917253),
 np.float32(12.516428),
 np.float32(10.799885),
 np.float32(10.627755),
 np.float32(14.450826),
 np.float32(9.747266),
 np.flo

In [None]:
stack = np.column_stack((range(len(recon_errors)), recon_errors))
score_false = stack[val_labels == 0][:, 1]
score_true = stack[val_labels == 1][:, 1]

plot_histogram_by_class(
    score_false,
    score_true,
    bins=[20, 30],
    model_name=MODEL_NAME,
)

In [None]:
THRESHOLD_MIN = 0.0
THRESHOLD_MAX = 10

p = figure(
    width=600,
    height=400,
    title=f"{MODEL_NAME}: Threshold Range Exploration",
    x_axis_label="Samples",
    y_axis_label="Reconstruction Error",
)

source = ColumnDataSource(
    dict(index=stack[val_labels == 0][:, 0], error=stack[val_labels == 0][:, 1])
)

p.scatter(
    "index",
    "error",
    fill_alpha=0.6,
    fill_color="crimson",
    line_color=None,
    legend_label="Normal Signals",
    source=source,
)

source = ColumnDataSource(
    dict(index=stack[val_labels == 1][:, 0], error=stack[val_labels == 1][:, 1])
)

p.scatter(
    "index",
    "error",
    fill_alpha=0.6,
    fill_color="indigo",
    line_color=None,
    legend_label="Abnormal Signals",
    source=source,
)

source = ColumnDataSource(
    data=dict(
        index=stack[:, 0],
        threshold_min=np.repeat(THRESHOLD_MIN, stack.shape[0]),
        threshold_max=np.repeat(THRESHOLD_MAX, stack.shape[0]),
    )
)

box = BoxAnnotation(
    bottom=THRESHOLD_MIN,
    top=THRESHOLD_MAX,
    fill_alpha=0.1,
    fill_color="magenta",
    line_color="darkmagenta",
    line_width=1.0,
)
p.add_layout(box)

p.legend.label_text_font_size = "8pt"
p.legend.location = "top_right"
p.title.align = "center"
p.title.text_font_size = "12pt"

p.add_tools(HoverTool(tooltips=[("index", "@index"), ("error", "@error")]))

show(p)

In [None]:
THRESHOLD_MIN = 0.00
THRESHOLD_MAX = 100
THRESHOLD_STEP = 0.2

thresholds = np.arange(THRESHOLD_MIN, THRESHOLD_MAX + THRESHOLD_STEP, THRESHOLD_STEP)
errors = []

for threshold in thresholds:
    predictions = get_prediction(stack[:, 1], threshold=threshold)
    conf_mat = confusion_matrix(val_labels, predictions)
    errors.append([threshold, conf_mat[1, 0], conf_mat[0, 1]])

errors = np.array(errors)

p = figure(
    width=600,
    height=400,
    title=f"{MODEL_NAME}: Best Threshold Exploration",
    x_axis_label="Reconstruction Error Threshold (%)",
    y_axis_label="# Samples",
)

source = ColumnDataSource(
    data=dict(
        threshold=errors[:, 0], false_negative=errors[:, 1], false_positive=errors[:, 2]
    )
)

p.line(
    x="threshold",
    y="false_negative",
    color="crimson",
    legend_label="False Negative",
    source=source,
)

p.line(
    x="threshold",
    y="false_positive",
    color="indigo",
    legend_label="False Positive",
    source=source,
)

p.legend.label_text_font_size = "8pt"
p.legend.location = "top_left"
p.legend.click_policy = "hide"
p.title.align = "center"
p.title.text_font_size = "12pt"

p.add_tools(
    HoverTool(
        tooltips=[
            ("threshold", "@threshold"),
            ("false_negative", "@false_negative"),
            ("false_positive", "@false_positive"),
        ]
    )
)
show(p)


In [None]:
THRESHOLD = 10
predictions = get_prediction(stack[:, 1], threshold=THRESHOLD)

plot_confusion_matrix(
    confusion_matrix(val_labels, predictions),
    #model_name=MODEL_NAME,
)

print(
    f"Accuracy: {accuracy_score(val_labels, predictions):.2%}, \
Precision: {precision_score(val_labels, predictions):.2%}, \
Recall: {recall_score(val_labels, predictions):.2%}, \
F1: {f1_score(val_labels, predictions):.2%}"
)

Accuracy: 49.49%, Precision: 49.49%, Recall: 100.00%, F1: 66.21%


In [None]:
plot_roc_curve(
    roc_curve(val_labels, recon_errors),
    roc_auc_score(val_labels, recon_errors),

    model_name=MODEL_NAME
)

In [None]:
auc=roc_auc_score(val_labels, recon_errors)

print(f"AUC score: {auc:.4f}")

AUC score: 0.9424


In [None]:
plot_pr_curve(
    precision_recall_curve(val_labels, recon_errors),
    average_precision_score(val_labels, recon_errors),
    model_name=MODEL_NAME
)



In [None]:
from sklearn.metrics import roc_curve, auc
import numpy as np

def compute_partial_auc(y_true, y_scores, max_fpr=0.1):
    fpr, tpr, _ = roc_curve(y_true, y_scores)

    # Keep only points where FPR <= max_fpr
    mask = fpr <= max_fpr
    fpr_partial = fpr[mask]
    tpr_partial = tpr[mask]

    # Interpolate to add (max_fpr, interpolated_tpr) if needed
    if fpr_partial[-1] < max_fpr:
        # Find next point beyond max_fpr
        idx = np.searchsorted(fpr, max_fpr)
        fpr_left, fpr_right = fpr[idx - 1], fpr[idx]
        tpr_left, tpr_right = tpr[idx - 1], tpr[idx]

        # Linear interpolation
        slope = (tpr_right - tpr_left) / (fpr_right - fpr_left)
        tpr_interp = tpr_left + slope * (max_fpr - fpr_left)

        fpr_partial = np.append(fpr_partial, max_fpr)
        tpr_partial = np.append(tpr_partial, tpr_interp)

    return auc(fpr_partial, tpr_partial)

# Usage
pauc = compute_partial_auc(val_labels, recon_errors, max_fpr=0.1)
print(f"Unnormalized Partial AUC (FPR ≤ 0.1): {pauc:.4f} or the model performs {(pauc/0.1):.1%} as well as a perfect classifier in the region where FPR ≤ 0.1.")

#pauc/0.1 * 100

Unnormalized Partial AUC (FPR ≤ 0.1): 0.0809 or the model performs 80.9% as well as a perfect classifier in the region where FPR ≤ 0.1.


#Performance Evaluation for Test set

In [None]:
from tqdm import tqdm
import numpy as np

recon_errors = []

for file_path in tqdm(test_files, desc="Evaluating test files"):
    # Extract log-mel spectrogram windows
    features = extract_log_mel_windows(
        file_path,
        sr=16000,
        n_fft=1024,
        hop_length=512,
        n_mels=80,
        frames=5
    )

    if features.size == 0:
        recon_errors.append(np.nan)
        continue

    # Predict reconstructed features from the model
    predictions = autoencoder.predict(features, verbose=0)

    # Compute mean squared error per window and average over all windows
    mse_per_window = np.mean(np.square(features - predictions), axis=(1, 2))  # shape: (num_windows,)
    file_error = np.mean(mse_per_window)
    recon_errors.append(file_error)

Evaluating test files: 100%|██████████| 1070/1070 [18:33<00:00,  1.04s/it]


In [None]:
stack = np.column_stack((range(len(recon_errors)), recon_errors))
score_false = stack[test_labels == 0][:, 1]
score_true = stack[test_labels == 1][:, 1]

plot_histogram_by_class(
    score_false,
    score_true,
    bins=[20, 30],
    model_name=MODEL_NAME,
)

In [None]:
THRESHOLD_MIN = 8.0
THRESHOLD_MAX = 10.0

p = figure(
    width=600,
    height=400,
    title=f"{MODEL_NAME}: Threshold Range Exploration",
    x_axis_label="Samples",
    y_axis_label="Reconstruction Error",
)

source = ColumnDataSource(
    dict(index=stack[test_labels == 0][:, 0], error=stack[test_labels == 0][:, 1])
)

p.scatter(
    "index",
    "error",
    fill_alpha=0.6,
    fill_color="crimson",
    line_color=None,
    legend_label="Normal Signals",
    source=source,
)

source = ColumnDataSource(
    dict(index=stack[test_labels == 1][:, 0], error=stack[test_labels == 1][:, 1])
)

p.scatter(
    "index",
    "error",
    fill_alpha=0.6,
    fill_color="indigo",
    line_color=None,
    legend_label="Abnormal Signals",
    source=source,
)

source = ColumnDataSource(
    data=dict(
        index=stack[:, 0],
        threshold_min=np.repeat(THRESHOLD_MIN, stack.shape[0]),
        threshold_max=np.repeat(THRESHOLD_MAX, stack.shape[0]),
    )
)

box = BoxAnnotation(
    bottom=THRESHOLD_MIN,
    top=THRESHOLD_MAX,
    fill_alpha=0.1,
    fill_color="magenta",
    line_color="darkmagenta",
    line_width=1.0,
)
p.add_layout(box)

p.legend.label_text_font_size = "8pt"
p.legend.location = "top_right"
p.title.align = "center"
p.title.text_font_size = "12pt"

p.add_tools(HoverTool(tooltips=[("index", "@index"), ("error", "@error")]))

show(p)

In [None]:
THRESHOLD_MIN = 0.0
THRESHOLD_MAX = 100.0
THRESHOLD_STEP = 0.5

thresholds = np.arange(THRESHOLD_MIN, THRESHOLD_MAX + THRESHOLD_STEP, THRESHOLD_STEP)
errors = []

for threshold in thresholds:
    predictions = get_prediction(stack[:, 1], threshold=threshold)
    conf_mat = confusion_matrix(test_labels, predictions)
    errors.append([threshold, conf_mat[1, 0], conf_mat[0, 1]])

errors = np.array(errors)

p = figure(
    width=600,
    height=400,
    title=f"{MODEL_NAME}: Best Threshold Exploration",
    x_axis_label="Reconstruction Error Threshold (%)",
    y_axis_label="# Samples",
)

source = ColumnDataSource(
    data=dict(
        threshold=errors[:, 0], false_negative=errors[:, 1], false_positive=errors[:, 2]
    )
)

p.line(
    x="threshold",
    y="false_negative",
    color="crimson",
    legend_label="False Negative",
    source=source,
)

p.line(
    x="threshold",
    y="false_positive",
    color="indigo",
    legend_label="False Positive",
    source=source,
)

p.legend.label_text_font_size = "8pt"
p.legend.location = "top_left"
p.legend.click_policy = "hide"
p.title.align = "center"
p.title.text_font_size = "12pt"

p.add_tools(
    HoverTool(
        tooltips=[
            ("threshold", "@threshold"),
            ("false_negative", "@false_negative"),
            ("false_positive", "@false_positive"),
        ]
    )
)
show(p)


In [None]:
THRESHOLD = 10
predictions = get_prediction(stack[:, 1], threshold=THRESHOLD)

plot_confusion_matrix(
    confusion_matrix(test_labels, predictions),
    model_name=MODEL_NAME,
)

print(
    f"Accuracy: {accuracy_score(test_labels, predictions):.2%}, \
Precision: {precision_score(test_labels, predictions):.2%}, \
Recall: {recall_score(test_labels, predictions):.2%}, \
F1: {f1_score(test_labels, predictions):.2%}"
)

Accuracy: 49.53%, Precision: 49.53%, Recall: 100.00%, F1: 66.25%


In [None]:
THRESHOLD = 29
predictions = get_prediction(stack[:, 1], threshold=THRESHOLD)

plot_confusion_matrix(
    confusion_matrix(test_labels, predictions),
    model_name=MODEL_NAME,
)

print(
    f"Accuracy: {accuracy_score(test_labels, predictions):.2%}, \
Precision: {precision_score(test_labels, predictions):.2%}, \
Recall: {recall_score(test_labels, predictions):.2%}, \
F1: {f1_score(test_labels, predictions):.2%}"
)

Accuracy: 92.24%, Precision: 95.71%, Recall: 88.30%, F1: 91.85%


In [None]:
plot_roc_curve(
    roc_curve(test_labels, recon_errors),
    roc_auc_score(test_labels, recon_errors),

    model_name=MODEL_NAME
)

In [None]:
auc=roc_auc_score(test_labels, recon_errors)

print(f"AUC score: {auc:.4f}")

AUC score: 0.9467


In [None]:
from sklearn.metrics import roc_curve, auc
import numpy as np

def compute_partial_auc(y_true, y_scores, max_fpr=0.1):
    fpr, tpr, _ = roc_curve(y_true, y_scores)

    # Keep only points where FPR <= max_fpr
    mask = fpr <= max_fpr
    fpr_partial = fpr[mask]
    tpr_partial = tpr[mask]

    # Interpolate to add (max_fpr, interpolated_tpr) if needed
    if fpr_partial[-1] < max_fpr:
        # Find next point beyond max_fpr
        idx = np.searchsorted(fpr, max_fpr)
        fpr_left, fpr_right = fpr[idx - 1], fpr[idx]
        tpr_left, tpr_right = tpr[idx - 1], tpr[idx]

        # Linear interpolation
        slope = (tpr_right - tpr_left) / (fpr_right - fpr_left)
        tpr_interp = tpr_left + slope * (max_fpr - fpr_left)

        fpr_partial = np.append(fpr_partial, max_fpr)
        tpr_partial = np.append(tpr_partial, tpr_interp)

    return auc(fpr_partial, tpr_partial)

# Usage
pauc = compute_partial_auc(test_labels, recon_errors, max_fpr=0.1)
print(f"Unnormalized Partial AUC (FPR ≤ 0.1): {pauc:.4f} or the model performs {(pauc/0.1):.1%} as well as a perfect classifier in the region where FPR ≤ 0.1.")

#pauc/0.1 * 100

Unnormalized Partial AUC (FPR ≤ 0.1): 0.0793 or the model performs 79.3% as well as a perfect classifier in the region where FPR ≤ 0.1.


# CV - to delete


In [None]:
def build_autoencoder(input_dim, model_name=None):
    # Build model here
    inputs = Input(shape=(input_dim,))
    # ... build layers ...
    outputs = ...  # your decoder output
    model = autoencoder(n_mels * frames, model_name=MODEL_NAME)
    model.compile(optimizer=Adam(learning_rate=1e-2), loss='mse')

    return model

# Cross-Validation (5 folds)

In [None]:
from sklearn.model_selection import KFold
from sklearn.metrics import roc_auc_score
import time

In [None]:
# === Your partial AUC calculator ===
def calculate_pauc(y_true, scores, max_fpr=0.1):
    from sklearn.metrics import roc_curve, auc
    fpr, tpr, _ = roc_curve(y_true, scores)
    fpr_limit = np.where(fpr <= max_fpr)[0]
    if len(fpr_limit) < 2:
        return 0.0
    return auc(fpr[fpr_limit], tpr[fpr_limit]) / max_fpr

In [None]:
def run_repeated_kfold_file_level(train_data, train_labels, val_data, val_labels,
                                  build_autoencoder, model_name=None,
                                  n_splits=5, n_repeats=1, epochs=100, batch_size=256):

    file_count = len(train_labels)
    total_frames = len(train_data)
    frames_per_file = total_frames // file_count
    assert frames_per_file * file_count == total_frames, \
        "train_data must be evenly divisible by number of files"

    val_file_count = len(val_labels)
    val_frames_per_file = val_data.shape[0] // val_file_count
    assert val_frames_per_file * val_file_count == val_data.shape[0], \
        "val_data must be evenly divisible by number of val files"

    all_val_auc = []

    for repeat in range(n_repeats):
        print(f"\n======== Repetition {repeat + 1}/{n_repeats} ========")
        kf = KFold(n_splits=n_splits, shuffle=True, random_state=repeat)

        for fold, (train_idx, val_idx) in enumerate(kf.split(np.arange(file_count)), start=1):
            print(f"\n--- Fold {fold} ---")
            start_time = time.time()

            # Gather frames for the training files of this fold
            train_frame_idx = np.concatenate([
                np.arange(i * frames_per_file, (i + 1) * frames_per_file) for i in train_idx
            ])
            X_train = train_data[train_frame_idx]

            # Build model with correct input dimension and optional model_name
            model = build_autoencoder(X_train.shape[1], model_name=model_name)

            model.fit(X_train, X_train,
                      epochs=epochs,
                      batch_size=batch_size,
                      verbose=0)

            # Predict reconstruction on the full validation set
            val_recon = model.predict(val_data, batch_size=batch_size, verbose=0)

            # Calculate frame-level reconstruction errors (MSE)
            val_errors = np.mean(np.square(val_data - val_recon), axis=1)

            # Aggregate frame errors to file-level by averaging frames per file
            val_file_scores = np.mean(val_errors.reshape(-1, val_frames_per_file), axis=1)

            # Compute AUC on validation files
            val_auc = roc_auc_score(val_labels, val_file_scores)
            all_val_auc.append(val_auc)

            print(f"Fold {fold} - File-level AUC: {val_auc:.4f}")
            print(f"Elapsed: {time.time() - start_time:.2f} seconds")

    print("\n======== Final Summary Over 10 Repeats ========")
    print(f"Validation AUC: mean={np.mean(all_val_auc):.4f}, std={np.std(all_val_auc):.4f}")

    return {
        "val_auc_scores": all_val_auc
    }

In [None]:
%%time
results = run_repeated_kfold_file_level(
    train_data, train_labels,
    val_data, val_labels,
    build_autoencoder,
    model_name=MODEL_NAME)





--- Fold 1 ---
Fold 1 - File-level AUC: 0.9721
Elapsed: 1714.26 seconds

--- Fold 2 ---
Fold 2 - File-level AUC: 0.9947
Elapsed: 1736.53 seconds

--- Fold 3 ---
Fold 3 - File-level AUC: 0.9924
Elapsed: 1762.93 seconds

--- Fold 4 ---
Fold 4 - File-level AUC: 0.9898
Elapsed: 1753.74 seconds

--- Fold 5 ---
