In [1]:
# Import libraries
import os
os.environ["TF_GPU_ALLOCATOR"] = "cuda_malloc_async"

import numpy as np
import tensorflow as tf
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Conv2D, MaxPooling2D, Flatten, Dense, Dropout, Input
from tensorflow.keras.regularizers import l2
from tqdm import tqdm
import gc

from sklearn.metrics import precision_score, recall_score, f1_score
from sklearn.metrics import precision_recall_fscore_support
from sklearn.utils import shuffle

print("Num GPUs Available:", len(tf.config.list_physical_devices('GPU')))
print("TensorFlow is using:", tf.test.gpu_device_name())
gpus = tf.config.experimental.list_physical_devices('GPU')
if gpus:
    try:
        for gpu in gpus:
            tf.config.experimental.set_memory_growth(gpu, True)
        print("GPU memory growth enabled.")
    except RuntimeError as e:
        print(e)

2025-03-29 20:41:36.588448: E external/local_xla/xla/stream_executor/cuda/cuda_fft.cc:477] Unable to register cuFFT factory: Attempting to register factory for plugin cuFFT when one has already been registered
E0000 00:00:1743306096.677244     686 cuda_dnn.cc:8310] Unable to register cuDNN factory: Attempting to register factory for plugin cuDNN when one has already been registered
E0000 00:00:1743306096.702745     686 cuda_blas.cc:1418] Unable to register cuBLAS factory: Attempting to register factory for plugin cuBLAS when one has already been registered
2025-03-29 20:41:36.936657: I tensorflow/core/platform/cpu_feature_guard.cc:210] This TensorFlow binary is optimized to use available CPU instructions in performance-critical operations.
To enable the following instructions: AVX2 FMA, in other operations, rebuild TensorFlow with the appropriate compiler flags.


Num GPUs Available: 1
TensorFlow is using: /device:GPU:0
GPU memory growth enabled.


I0000 00:00:1743306100.936156     686 gpu_process_state.cc:201] Using CUDA malloc Async allocator for GPU: 0
I0000 00:00:1743306100.937898     686 gpu_device.cc:2022] Created device /device:GPU:0 with 5564 MB memory:  -> device: 0, name: NVIDIA GeForce RTX 3070, pci bus id: 0000:06:00.0, compute capability: 8.6


In [2]:
import os
import numpy as np
from tqdm import tqdm

def load_data(spectrogram_dir, label_dir):
    spectrograms = []
    labels = []

    # Load spectrograms
    spectrogram_files = sorted(os.listdir(spectrogram_dir))
    label_files = sorted(os.listdir(label_dir))

    total_files = len(spectrogram_files)

    for spec_file, label_file in tqdm(zip(spectrogram_files, label_files), 
                                        total=total_files, 
                                        desc="Loading Data", 
                                        unit="file"):
        # Load data
        spec_data = np.load(os.path.join(spectrogram_dir, spec_file))  # Shape: (n, 32, 42)
        label_data = np.load(os.path.join(label_dir, label_file)).T     # Shape: (n, 5)

        # Append if there is valid data
        if len(spec_data) > 0:
            spectrograms.append(spec_data)
            labels.append(label_data)

    # Convert to numpy arrays 
    if spectrograms:
        X = np.vstack(spectrograms)
        Y = np.vstack(labels)
        return X, Y
    else:
        return np.array([]), np.array([])

In [3]:
# Load the dataset 
X_train, Y_train = load_data("spectrograms_train", "labels_train")  
X_train = np.expand_dims(X_train, axis=-1)  
print("Final X_train:", X_train.shape, "Final Y_train:", Y_train.shape)

Loading Data: 100%|██████████| 1289/1289 [00:03<00:00, 407.67file/s]


Final X_train: (319508, 32, 42, 1) Final Y_train: (319508, 6)


In [4]:
# Occurrences
class_counts = np.sum(Y_train, axis=0)
total_samples = len(Y_train)

for i, count in enumerate(class_counts):
    print(f"Class {i}: {count} occurrences ({count / total_samples:.2%} of the data)")

Class 0: 128695 occurrences (40.28% of the data)
Class 1: 125932 occurrences (39.41% of the data)
Class 2: 171444 occurrences (53.66% of the data)
Class 3: 257671 occurrences (80.65% of the data)
Class 4: 155212 occurrences (48.58% of the data)
Class 5: 271620 occurrences (85.01% of the data)


In [7]:
model = Sequential([
    Input(shape=(32, 42, 1)), 
    Conv2D(16, (3, 3), activation='relu', padding='same', kernel_regularizer=l2(0.0001)),
    MaxPooling2D((2, 2)),
    Conv2D(32, (3, 3), activation='relu', padding='same', kernel_regularizer=l2(0.0001)),
    MaxPooling2D((2, 2)),
    Conv2D(64, (3, 3), activation='relu', padding='same', kernel_regularizer=l2(0.0001)),
    MaxPooling2D((2, 2)),
    Flatten(),
    Dense(64, activation='relu', kernel_regularizer=l2(0.0001)),
    Dropout(0.3),  
    Dense(6, activation='sigmoid', dtype='float32')  
])

model.compile(optimizer='adam', loss='binary_crossentropy', metrics=['binary_accuracy'])
model.summary()

In [8]:
# Train model
history = model.fit(X_train, Y_train, batch_size=64, epochs=10, verbose=1, validation_split=0.1)

# Final loss & accuracy
final_loss = history.history['loss'][-1]
final_accuracy = history.history['binary_accuracy'][-1]
print(f"Final Loss: {final_loss:.4f}, Final Accuracy: {final_accuracy:.4f}")

Epoch 1/10


I0000 00:00:1743306228.697191    1184 service.cc:148] XLA service 0x7fdeb0007630 initialized for platform CUDA (this does not guarantee that XLA will be used). Devices:
I0000 00:00:1743306228.699140    1184 service.cc:156]   StreamExecutor device (0): NVIDIA GeForce RTX 3070, Compute Capability 8.6
2025-03-29 20:43:48.753920: I tensorflow/compiler/mlir/tensorflow/utils/dump_mlir_util.cc:268] disabling MLIR crash reproducer, set env var `MLIR_CRASH_REPRODUCER_DIRECTORY` to enable.
I0000 00:00:1743306228.939147    1184 cuda_dnn.cc:529] Loaded cuDNN version 90300


[1m  44/4494[0m [37m━━━━━━━━━━━━━━━━━━━━[0m [1m15s[0m 4ms/step - binary_accuracy: 0.6307 - loss: 0.7811

I0000 00:00:1743306231.004794    1184 device_compiler.h:188] Compiled cluster using XLA!  This line is logged at most once for the lifetime of the process.


[1m4494/4494[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m22s[0m 4ms/step - binary_accuracy: 0.7934 - loss: 0.4418 - val_binary_accuracy: 0.8583 - val_loss: 0.3310
Epoch 2/10
[1m4494/4494[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m16s[0m 4ms/step - binary_accuracy: 0.8661 - loss: 0.3261 - val_binary_accuracy: 0.8686 - val_loss: 0.3210
Epoch 3/10
[1m4494/4494[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m16s[0m 3ms/step - binary_accuracy: 0.8773 - loss: 0.3074 - val_binary_accuracy: 0.8782 - val_loss: 0.3072
Epoch 4/10
[1m4494/4494[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m16s[0m 4ms/step - binary_accuracy: 0.8824 - loss: 0.2997 - val_binary_accuracy: 0.8786 - val_loss: 0.3049
Epoch 5/10
[1m4494/4494[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m16s[0m 4ms/step - binary_accuracy: 0.8864 - loss: 0.2940 - val_binary_accuracy: 0.8777 - val_loss: 0.3055
Epoch 6/10
[1m4494/4494[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m16s[0m 4ms/step - binary_accuracy: 0.8884 -

In [9]:
# Get predictions
Y_pred = model.predict(X_train)

# Convert to binary (threshold the probabilities)
threshold = 0.5
Y_pred_binary = (Y_pred > threshold).astype(int)

# Calculate metrics
precision = precision_score(Y_train, Y_pred_binary, average='samples', zero_division=0)
recall = recall_score(Y_train, Y_pred_binary, average='samples', zero_division=0)
f1 = f1_score(Y_train, Y_pred_binary, average='samples', zero_division=0)

# Print the metrics
print(f"Precision: {precision:.4f}")
print(f"Recall: {recall:.4f}")
print(f"F1-Score: {f1:.4f}")

[1m9985/9985[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m15s[0m 1ms/step
Precision: 0.8873
Recall: 0.8663
F1-Score: 0.8659


In [None]:
X_test, Y_test = load_data("spectrograms_test", "labels_test")
X_test = np.expand_dims(X_test, axis=-1)
Y_pred_test = model.predict(X_test)

In [56]:
# --- Threshold Adjustment ---
num_classes = Y_test.shape[1]
thresholds = np.array([0.5] * num_classes)  # Default 0.5
# Adjust thresholds as needed (example):
thresholds[0] = 0.3
thresholds[1] = 0.3
thresholds[2] = 0.6
thresholds[4] = 0.4

Y_pred_test_binary = (Y_pred_test >= thresholds).astype(int)
# --- End Threshold Adjustment ---

precision_test = precision_score(Y_test, Y_pred_test_binary, average='samples', zero_division=0)
recall_test = recall_score(Y_test, Y_pred_test_binary, average='samples', zero_division=0)
f1_test = f1_score(Y_test, Y_pred_test_binary, average='samples', zero_division=0)

print(f"Test Precision: {precision_test:.4f}")
print(f"Test Recall: {recall_test:.4f}")
print(f"Test F1-Score: {f1_test:.4f}")

# Compute per-class precision, recall, and F1-score
precision_per_class, recall_per_class, f1_per_class, _ = precision_recall_fscore_support(
    Y_test, Y_pred_test_binary, average=None, zero_division=0
)

# Occurrences
class_counts = np.sum(Y_train, axis=0) # Make sure Y_train is defined.
total_samples = len(Y_train)

for i in range(num_classes):
    print(f"Class {i} | Precision={precision_per_class[i]:.4f}, Recall={recall_per_class[i]:.4f}, "
          f"F1={f1_per_class[i]:.4f} | Thresh={thresholds[i]:.1f}, Occurrences={int(class_counts[i])} ({class_counts[i] / total_samples:.2%})")

Test Precision: 0.8618
Test Recall: 0.8607
Test F1-Score: 0.8476
Class 0 | Precision=0.7362, Recall=0.8181, F1=0.7750 | Thresh=0.3, Occurrences=128695 (40.28%)
Class 1 | Precision=0.8428, Recall=0.7902, F1=0.8157 | Thresh=0.3, Occurrences=125932 (39.41%)
Class 2 | Precision=0.8439, Recall=0.8080, F1=0.8255 | Thresh=0.6, Occurrences=171444 (53.66%)
Class 3 | Precision=0.9498, Recall=0.9681, F1=0.9588 | Thresh=0.5, Occurrences=257671 (80.65%)
Class 4 | Precision=0.9281, Recall=0.8164, F1=0.8687 | Thresh=0.4, Occurrences=155212 (48.58%)
Class 5 | Precision=0.9502, Recall=0.9863, F1=0.9679 | Thresh=0.5, Occurrences=271620 (85.01%)


In [19]:
model.summary()

In [20]:
model.save("instrument_classifier.h5")
model = tf.keras.models.load_model("instrument_classifier.h5", compile=False)

converter = tf.lite.TFLiteConverter.from_keras_model(model)
tflite_model = converter.convert()

with open("instrument_classifier.tflite", "wb") as f:
    f.write(tflite_model)



INFO:tensorflow:Assets written to: /tmp/tmpuvpmf_09/assets


INFO:tensorflow:Assets written to: /tmp/tmpuvpmf_09/assets


Saved artifact at '/tmp/tmpuvpmf_09'. The following endpoints are available:

* Endpoint 'serve'
  args_0 (POSITIONAL_ONLY): TensorSpec(shape=(None, 32, 40, 1), dtype=tf.float32, name='input_layer_3')
Output Type:
  TensorSpec(shape=(None, 5), dtype=tf.float32, name=None)
Captures:
  140103781968256: TensorSpec(shape=(), dtype=tf.resource, name=None)
  140103781973360: TensorSpec(shape=(), dtype=tf.resource, name=None)
  140103781960688: TensorSpec(shape=(), dtype=tf.resource, name=None)
  140104423013040: TensorSpec(shape=(), dtype=tf.resource, name=None)
  140103782011952: TensorSpec(shape=(), dtype=tf.resource, name=None)
  140103782012656: TensorSpec(shape=(), dtype=tf.resource, name=None)
  140103782009840: TensorSpec(shape=(), dtype=tf.resource, name=None)
  140103782015120: TensorSpec(shape=(), dtype=tf.resource, name=None)
  140103782069552: TensorSpec(shape=(), dtype=tf.resource, name=None)
  140103782067440: TensorSpec(shape=(), dtype=tf.resource, name=None)
  140104423461408

W0000 00:00:1742875013.770337     647 tf_tfl_flatbuffer_helpers.cc:365] Ignored output_format.
W0000 00:00:1742875013.770383     647 tf_tfl_flatbuffer_helpers.cc:368] Ignored drop_control_dependency.
2025-03-24 20:56:53.770793: I tensorflow/cc/saved_model/reader.cc:83] Reading SavedModel from: /tmp/tmpuvpmf_09
2025-03-24 20:56:53.771591: I tensorflow/cc/saved_model/reader.cc:52] Reading meta graph with tags { serve }
2025-03-24 20:56:53.771602: I tensorflow/cc/saved_model/reader.cc:147] Reading SavedModel debug info (if present) from: /tmp/tmpuvpmf_09
I0000 00:00:1742875013.777529     647 mlir_graph_optimization_pass.cc:401] MLIR V1 optimization pass is not enabled
2025-03-24 20:56:53.778843: I tensorflow/cc/saved_model/loader.cc:236] Restoring SavedModel bundle.
2025-03-24 20:56:53.816662: I tensorflow/cc/saved_model/loader.cc:220] Running initialization op on SavedModel bundle at path: /tmp/tmpuvpmf_09
2025-03-24 20:56:53.828498: I tensorflow/cc/saved_model/loader.cc:466] SavedModel 