In [None]:
%load_ext autoreload
%autoreload 2

In [None]:
import glob
import os
import warnings
import numpy as np

from sklearn.model_selection import train_test_split
from collections import defaultdict
from tqdm.notebook import tqdm

from sound_utils import extract_signal_features, generate_dataset, load_sound_file
from misc import build_files_list, dump_pickle, load_pickle
from eval_perf import (
    get_prediction,
    plot_confusion_matrix,
    plot_histogram_by_class,
    plot_loss_per_epoch,
    plot_pr_curve,
    plot_roc_curve,
)

np.random.seed(42)

In [None]:
import tensorflow as tf

from tensorflow.keras.callbacks import EarlyStopping
from tensorflow.keras.layers import Input, Dense, BatchNormalization, Activation
from tensorflow.keras.models import Model
from tensorflow.keras.optimizers import Adam
from tensorflow.python.client import device_lib

tf.random.set_seed(42)

In [None]:
from bokeh.io import export_svgs, output_notebook, reset_output
from bokeh.models import BoxAnnotation, ColumnDataSource, HoverTool
from bokeh.plotting import figure, show
from sklearn.metrics import (
    accuracy_score,
    confusion_matrix,
    f1_score,
    precision_score,
    recall_score,
    average_precision_score,
    precision_recall_curve,
    roc_auc_score,
    roc_curve,
)

output_notebook()

#Processing pipeline


1.   Load data
2.   Split into training, test and validation sets
3.   Extract log-Mel spectrograms
4.   Save the spectrograms



In [None]:
root_dir = "/.../ToyCar_data"
DATA_PATH = "/.../ToyCar_data/datasets"
MODEL_PATH = "/.../ToyCar_data/models"

In [None]:
def build_files_list(root_dir):
    normal_files = []
    abnormal_files = []

    for root, _, files in os.walk(top=root_dir):
        for name in files:
            full_path = os.path.join(root, name)
            if root == "/.../NormalSound":
                normal_files.append(full_path)
            elif root == "/.../AnomalousSound":
                abnormal_files.append(full_path)

    return normal_files, abnormal_files

In [None]:
# Load full file lists (assuming build_files_list returns two lists)
normal_files, abnormal_files = build_files_list(root_dir)

# Randomly sample 50% of each
normal_sample_indices = np.random.choice(len(normal_files), size=len(normal_files) // 2, replace=False)
abnormal_sample_indices = np.random.choice(len(abnormal_files), size=len(abnormal_files) // 2, replace=False)

normal_files_sampled = [normal_files[i] for i in normal_sample_indices]
abnormal_files_sampled = [abnormal_files[i] for i in abnormal_sample_indices]

# Create labels for the sampled files
normal_labels = np.zeros(len(normal_files_sampled))
abnormal_labels = np.ones(len(abnormal_files_sampled))

# Split normal files into train/test
train_files, test_files, train_labels, test_labels = train_test_split(
    normal_files_sampled, normal_labels, train_size=0.8, random_state=42, shuffle=True
)

# Add abnormal files to test set
test_files = np.concatenate((test_files, abnormal_files_sampled), axis=0)
test_labels = np.concatenate((test_labels, abnormal_labels), axis=0)

# Shuffle test set
test_indices = np.arange(len(test_files))
np.random.shuffle(test_indices)

test_files = test_files[test_indices]
test_labels = test_labels[test_indices]

# Print dataset stats
print(
    f"Train set has {train_labels.shape[0]} signals including abnormal {train_labels.sum():.0f} signals, "
    f"but test set has {test_labels.shape[0]} signals including abnormal {test_labels.sum():.0f} signals."
)


Train set has 2160 signals including abnormal 0 signals, but test set has 1069 signals including abnormal 529 signals.


In [None]:
dataset = defaultdict(list)

for key in ['train_files', 'test_files', 'train_labels', 'test_labels']:
    file_name = os.path.join(DATA_PATH, 'dataset', key + '.txt')
    with open(file_name, 'r') as f:
        for item in f:
            dataset[key].append(item[:-1])

In [None]:
#Load train_data
train_data_path = os.path.join(DATA_PATH, "dataset", "train_data" + ".pkl")
train_data = load_pickle(train_data_path)
print(f"Train data has a {train_data.shape} shape.")

Train data has a (2220480, 400) shape.


#Model 1: Dense Autoencoder

In [None]:
def autoencoder(input_dims, model_name=None):

    # Input
    input_layer = Input(shape=(input_dims,), name='encoder_input')

    # Encoder
    output = Dense(80, name='encoder_dense_1', activation='relu')(input_layer)
    output = Dense(40, name='encoder_dense_2', activation='relu')(output)
    output = Dense(20, name='encoder_dense_3', activation='relu')(output)
    output = BatchNormalization(name='encoder_BN')(output)

    # Latent space
    output = Dense(8, name='bottleneck_dense')(output)

    # Decoder
    output = BatchNormalization(name='decoder_BN')(output)
    output = Dense(20, name='decoder_dense_1', activation='relu')(output)
    output = Dense(40, name='decoder_dense_2', activation='relu')(output)
    output = Dense(80, name='decoder_dense_3', activation='relu')(output)

    # Output
    output = Dense(input_dims,activation=None, name='decoder_output')(output)

    return Model(inputs=input_layer, outputs=output, name=model_name)

In [None]:
n_fft = 1024
hop_length = 512
n_mels = 80
frames = 5

MODEL_NAME = "Model1_Dense_AutoEncoder"
model = autoencoder(n_mels * frames, model_name=MODEL_NAME)
print(model.summary())

None


In [None]:
# Train the autoencoder
%%time
batch_size = 512
epochs = 100

model.compile(
    optimizer=Adam(learning_rate=1e-03),
    loss="mean_squared_error"
)


history = model.fit(
    train_data,train_data,
    batch_size=batch_size,
    epochs=epochs,
    verbose=False,
    callbacks=[EarlyStopping(monitor="val_loss", patience=10)],
    validation_split=0.2,
    shuffle=True
)

CPU times: user 9min 15s, sys: 33 s, total: 9min 48s
Wall time: 6min 9s


In [None]:
model.save(os.path.join(MODEL_PATH, MODEL_NAME + ".h5"))



#Performance Evaluation - report

In [None]:
plot_loss_per_epoch(
    history, model_name=MODEL_NAME
)

In [None]:
#Computes MSE (averaged_per_sample -> averaged_per_batch) between the original and reconstructed inputs.
#features = original input
#predictions = reconstructed version

recon_errors = []

for index in tqdm(range(len(test_files))):
    signal, sr = load_sound_file(test_files[index])

    features = extract_signal_features(
        signal, sr, n_fft=n_fft, hop_length=hop_length, n_mels=n_mels, frames=frames
    )

    predictions = model.predict(features)
    mse = np.mean(np.mean(np.square(features - predictions), axis=1))
    recon_errors.append(mse)

  0%|          | 0/1069 [00:00<?, ?it/s]

[1m33/33[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m5s[0m 69ms/step
[1m33/33[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 2ms/step
[1m33/33[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 3ms/step
[1m33/33[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 2ms/step
[1m33/33[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 3ms/step
[1m33/33[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 2ms/step
[1m33/33[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 2ms/step
[1m33/33[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 2ms/step 
[1m33/33[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 2ms/step
[1m33/33[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 2ms/step
[1m33/33[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 2ms/step
[1m33/33[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 2ms/step
[1m33/33[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 2ms/step
[1m33/33[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m

In [None]:
stack = np.column_stack((range(len(recon_errors)), recon_errors))
score_false = stack[test_labels == 0][:, 1]
score_true = stack[test_labels == 1][:, 1]

plot_histogram_by_class(
    score_false,
    score_true,
    bins=[20, 30],
    model_name=MODEL_NAME,
)

In [None]:
THRESHOLD_MIN = 0.0
THRESHOLD_MAX = 20.0

p = figure(
    width=600,
    height=400,
    title=f"{MODEL_NAME}: Threshold Range Exploration",
    x_axis_label="Samples",
    y_axis_label="Reconstruction Error",
)

source = ColumnDataSource(
    dict(index=stack[test_labels == 0][:, 0], error=stack[test_labels == 0][:, 1])
)

p.scatter(
    "index",
    "error",
    fill_alpha=0.6,
    fill_color="crimson",
    line_color=None,
    legend_label="Normal Signals",
    source=source,
)

source = ColumnDataSource(
    dict(index=stack[test_labels == 1][:, 0], error=stack[test_labels == 1][:, 1])
)

p.scatter(
    "index",
    "error",
    fill_alpha=0.6,
    fill_color="indigo",
    line_color=None,
    legend_label="Abnormal Signals",
    source=source,
)

source = ColumnDataSource(
    data=dict(
        index=stack[:, 0],
        threshold_min=np.repeat(THRESHOLD_MIN, stack.shape[0]),
        threshold_max=np.repeat(THRESHOLD_MAX, stack.shape[0]),
    )
)

box = BoxAnnotation(
    bottom=THRESHOLD_MIN,
    top=THRESHOLD_MAX,
    fill_alpha=0.1,
    fill_color="magenta",
    line_color="darkmagenta",
    line_width=1.0,
)
p.add_layout(box)

p.legend.label_text_font_size = "8pt"
p.legend.location = "top_right"
p.title.align = "center"
p.title.text_font_size = "12pt"

p.add_tools(HoverTool(tooltips=[("index", "@index"), ("error", "@error")]))

show(p)

In [None]:
THRESHOLD_MIN =9.0
THRESHOLD_MAX = 12.0

p = figure(
    width=600,
    height=400,
    title=f"{MODEL_NAME}: Threshold Range Exploration",
    x_axis_label="Samples",
    y_axis_label="Reconstruction Error",
)

source = ColumnDataSource(
    dict(index=stack[test_labels == 0][:, 0], error=stack[test_labels == 0][:, 1])
)

p.scatter(
    "index",
    "error",
    fill_alpha=0.6,
    fill_color="crimson",
    line_color=None,
    legend_label="Normal Signals",
    source=source,
)

source = ColumnDataSource(
    dict(index=stack[test_labels == 1][:, 0], error=stack[test_labels == 1][:, 1])
)

p.scatter(
    "index",
    "error",
    fill_alpha=0.6,
    fill_color="indigo",
    line_color=None,
    legend_label="Abnormal Signals",
    source=source,
)

source = ColumnDataSource(
    data=dict(
        index=stack[:, 0],
        threshold_min=np.repeat(THRESHOLD_MIN, stack.shape[0]),
        threshold_max=np.repeat(THRESHOLD_MAX, stack.shape[0]),
    )
)

box = BoxAnnotation(
    bottom=THRESHOLD_MIN,
    top=THRESHOLD_MAX,
    fill_alpha=0.1,
    fill_color="magenta",
    line_color="darkmagenta",
    line_width=1.0,
)
p.add_layout(box)

p.legend.label_text_font_size = "8pt"
p.legend.location = "top_right"
p.title.align = "center"
p.title.text_font_size = "12pt"

p.add_tools(HoverTool(tooltips=[("index", "@index"), ("error", "@error")]))

show(p)

In [None]:
THRESHOLD_MIN = 9.0
THRESHOLD_MAX = 12.0
THRESHOLD_STEP = 0.2

thresholds = np.arange(THRESHOLD_MIN, THRESHOLD_MAX + THRESHOLD_STEP, THRESHOLD_STEP)
errors = []

for threshold in thresholds:
    predictions = get_prediction(stack[:, 1], threshold=threshold)
    conf_mat = confusion_matrix(test_labels, predictions)
    errors.append([threshold, conf_mat[1, 0], conf_mat[0, 1]])

errors = np.array(errors)

p = figure(
    width=600,
    height=400,
    title=f"{MODEL_NAME}: Best Threshold Exploration",
    x_axis_label="Reconstruction Error Threshold (%)",
    y_axis_label="# Samples",
)

source = ColumnDataSource(
    data=dict(
        threshold=errors[:, 0], false_negative=errors[:, 1], false_positive=errors[:, 2]
    )
)

p.line(
    x="threshold",
    y="false_negative",
    color="crimson",
    legend_label="False Negative",
    source=source,
)

p.line(
    x="threshold",
    y="false_positive",
    color="indigo",
    legend_label="False Positive",
    source=source,
)

p.legend.label_text_font_size = "8pt"
p.legend.location = "top_left"
p.legend.click_policy = "hide"
p.title.align = "center"
p.title.text_font_size = "12pt"

p.add_tools(
    HoverTool(
        tooltips=[
            ("threshold", "@threshold"),
            ("false_negative", "@false_negative"),
            ("false_positive", "@false_positive"),
        ]
    )
)
show(p)


In [None]:
THRESHOLD = 11
predictions = get_prediction(stack[:, 1], threshold=THRESHOLD)

plot_confusion_matrix(
    confusion_matrix(test_labels, predictions),
    model_name=MODEL_NAME,
)

print(
    f"Accuracy: {accuracy_score(test_labels, predictions):.2%}, \
Precision: {precision_score(test_labels, predictions):.2%}, \
Recall: {recall_score(test_labels, predictions):.2%}, \
F1: {f1_score(test_labels, predictions):.2%}"
)

Accuracy: 93.45%, Precision: 99.35%, Recall: 87.33%, F1: 92.96%


In [None]:
plot_roc_curve(
    roc_curve(test_labels, recon_errors),
    roc_auc_score(test_labels, recon_errors),

    model_name=MODEL_NAME
)

In [None]:
auc=roc_auc_score(test_labels, recon_errors)

print(f"AUC score: {auc:.4f}")

AUC score: 0.9647


In [None]:
plot_pr_curve(
    precision_recall_curve(test_labels, recon_errors),
    average_precision_score(test_labels, recon_errors),
    model_name=MODEL_NAME
)



In [None]:
# pAUC score
pauc = compute_partial_auc(test_labels, recon_errors, max_fpr=0.1)
print(f"Unnormalized Partial AUC (FPR ≤ 0.1): {pauc:.4f} or the model performs {(pauc/0.1):.1%} as well as a perfect classifier in the region where FPR ≤ 0.1.")

#pauc/0.1 * 100

Unnormalized Partial AUC (FPR ≤ 0.1): 0.0943 or the model performs 94.3% as well as a perfect classifier in the region where FPR ≤ 0.1.
