In [1]:
# Import libraries
import os
os.environ["TF_GPU_ALLOCATOR"] = "cuda_malloc_async"

import numpy as np
import tensorflow as tf
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Conv2D, MaxPooling2D, Flatten, Dense, Dropout, Input
from tensorflow.keras.regularizers import l2
from tensorflow.keras import backend as K

from tqdm import tqdm

from sklearn.metrics import precision_score, recall_score, f1_score
from sklearn.metrics import precision_recall_fscore_support

print("Num GPUs Available:", len(tf.config.list_physical_devices('GPU')))
print("TensorFlow is using:", tf.test.gpu_device_name())
gpus = tf.config.experimental.list_physical_devices('GPU')
if gpus:
    try:
        for gpu in gpus:
            tf.config.experimental.set_memory_growth(gpu, True)
        print("GPU memory growth enabled.")
    except RuntimeError as e:
        print(e)

2025-03-30 16:31:34.942468: E external/local_xla/xla/stream_executor/cuda/cuda_fft.cc:477] Unable to register cuFFT factory: Attempting to register factory for plugin cuFFT when one has already been registered
E0000 00:00:1743377494.966391    2754 cuda_dnn.cc:8310] Unable to register cuDNN factory: Attempting to register factory for plugin cuDNN when one has already been registered
E0000 00:00:1743377494.973623    2754 cuda_blas.cc:1418] Unable to register cuBLAS factory: Attempting to register factory for plugin cuBLAS when one has already been registered
2025-03-30 16:31:34.995806: I tensorflow/core/platform/cpu_feature_guard.cc:210] This TensorFlow binary is optimized to use available CPU instructions in performance-critical operations.
To enable the following instructions: AVX2 FMA, in other operations, rebuild TensorFlow with the appropriate compiler flags.


Num GPUs Available: 1
TensorFlow is using: /device:GPU:0
GPU memory growth enabled.


I0000 00:00:1743377497.513609    2754 gpu_process_state.cc:201] Using CUDA malloc Async allocator for GPU: 0
I0000 00:00:1743377497.513910    2754 gpu_device.cc:2022] Created device /device:GPU:0 with 5564 MB memory:  -> device: 0, name: NVIDIA GeForce RTX 3070, pci bus id: 0000:06:00.0, compute capability: 8.6


In [2]:
def load_data(spectrogram_dir, label_dir):
    spectrograms = []
    labels = []

    # Load spectrograms
    spectrogram_files = sorted(os.listdir(spectrogram_dir))
    label_files = sorted(os.listdir(label_dir))

    total_files = len(spectrogram_files)

    for spec_file, label_file in tqdm(zip(spectrogram_files, label_files), 
                                        total=total_files, 
                                        desc="Loading Data", 
                                        unit="file"):
        # Load data
        spec_data = np.load(os.path.join(spectrogram_dir, spec_file)) # (n, 64, 42)
        label_data = np.load(os.path.join(label_dir, label_file)).T   # (n, 8)

        label_data = label_data[:, 2].reshape(-1, 1)
        
        # Append if data is valid
        if len(spec_data) > 0:
            spectrograms.append(spec_data)
            labels.append(label_data)

    # Convert to numpy arrays 
    if spectrograms:
        X = np.vstack(spectrograms)
        Y = np.vstack(labels)
        return X, Y
    else:
        return np.array([]), np.array([])

In [3]:
X_train, Y_train = load_data("spectrograms_train", "labels_train")
X_train = np.expand_dims(X_train, axis=-1)
print("Final X_train:", X_train.shape, "Final Y_train:", Y_train.shape)

Loading Data: 100%|██████████| 1289/1289 [00:01<00:00, 671.92file/s]


Final X_train: (319508, 64, 42, 1) Final Y_train: (319508, 1)


In [4]:
# Occurrences
class_counts = np.sum(Y_train, axis=0)
total_samples = len(Y_train)

for i, count in enumerate(class_counts):
    print(f"Class {i}: {count} occurrences ({count / total_samples:.2%} of the data)")

Class 0: 27686 occurrences (8.67% of the data)


In [5]:
model = Sequential([
    Input(shape=(64, 42, 1)),
    Conv2D(32, (3, 3), activation='relu', padding='same', kernel_regularizer=l2(0.0001)),
    MaxPooling2D((2, 2)),
    Conv2D(64, (3, 3), activation='relu', padding='same', kernel_regularizer=l2(0.0001)),
    MaxPooling2D((2, 2)),
    Conv2D(128, (3, 3), activation='relu', padding='same', kernel_regularizer=l2(0.0001)),
    MaxPooling2D((2, 2)),
    Flatten(),
    Dense(128, activation='relu', kernel_regularizer=l2(0.0001)),
    Dropout(0.4),  
    Dense(1, activation='sigmoid', dtype='float32')  
])

def weighted_binary_crossentropy(beta=5.0):
    def loss(y_true, y_pred):
        bce = tf.keras.losses.binary_crossentropy(y_true, y_pred)
        weight = tf.where(y_true == 1, beta, 1.0)  # Apply beta to true positives
        return tf.reduce_mean(weight * bce)
    return loss

# Compile model with custom loss
model.compile(optimizer='adam', loss=weighted_binary_crossentropy(beta=3.0), metrics=['binary_accuracy'])

model.summary()

I0000 00:00:1743377508.614114    2754 gpu_device.cc:2022] Created device /job:localhost/replica:0/task:0/device:GPU:0 with 5564 MB memory:  -> device: 0, name: NVIDIA GeForce RTX 3070, pci bus id: 0000:06:00.0, compute capability: 8.6


In [7]:
# Train model
history = model.fit(X_train, Y_train, batch_size=64, epochs=2, verbose=1)

# Final loss & accuracy
final_loss = history.history['loss'][-1]
final_accuracy = history.history['binary_accuracy'][-1]
print(f"Final Loss: {final_loss:.4f}, Final Accuracy: {final_accuracy:.4f}")

2025-03-30 16:35:43.775349: W external/local_xla/xla/tsl/framework/cpu_allocator_impl.cc:83] Allocation of 3435350016 exceeds 10% of free system memory.
2025-03-30 16:35:45.081432: W external/local_xla/xla/tsl/framework/cpu_allocator_impl.cc:83] Allocation of 3435350016 exceeds 10% of free system memory.


Epoch 1/2
[1m4993/4993[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m27s[0m 5ms/step - binary_accuracy: 0.9657 - loss: 0.1532
Epoch 2/2
[1m4993/4993[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m24s[0m 5ms/step - binary_accuracy: 0.9670 - loss: 0.1489
Final Loss: 0.1492, Final Accuracy: 0.9670


In [8]:
# Get predictions
Y_pred = model.predict(X_train)

# Convert probabilities to binary labels using thresholding (for each time step)
threshold = 0.5
Y_pred_binary = (Y_pred > threshold).astype(int)  # 1 if piano is on, 0 if piano is off

# Ground truth labels for training set
Y_train_labels = Y_train  # Assuming Y_train is already in binary form (0 or 1)

# Compute metrics for binary classification
precision = precision_score(Y_train_labels, Y_pred_binary, average='macro', zero_division=0)
recall = recall_score(Y_train_labels, Y_pred_binary, average='macro', zero_division=0)
f1 = f1_score(Y_train_labels, Y_pred_binary, average='macro', zero_division=0)

# Print the metrics
print(f"Train Precision: {precision:.4f}")
print(f"Train Recall: {recall:.4f}")
print(f"Train F1-Score: {f1:.4f}")

2025-03-30 16:36:42.458328: W external/local_xla/xla/tsl/framework/cpu_allocator_impl.cc:83] Allocation of 3435350016 exceeds 10% of free system memory.


[1m9985/9985[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m17s[0m 2ms/step
Train Precision: 0.9651
Train Recall: 0.8534
Train F1-Score: 0.9000


In [10]:
# --- Evaluation on Test Set ---
X_test, Y_test = load_data("spectrograms_test", "labels_test")
X_test = np.expand_dims(X_test, axis=-1)

Y_pred_test = model.predict(X_test)

# Convert probabilities to binary labels for the test set
Y_pred_test_binary = (Y_pred_test > 0.3).astype(int)

# Compute test metrics
precision_test = precision_score(Y_test, Y_pred_test_binary, average='macro', zero_division=0)
recall_test = recall_score(Y_test, Y_pred_test_binary, average='macro', zero_division=0)
f1_test = f1_score(Y_test, Y_pred_test_binary, average='macro', zero_division=0)

print(f"Test Precision: {precision_test:.4f}")
print(f"Test Recall: {recall_test:.4f}")
print(f"Test F1-Score: {f1_test:.4f}")

Loading Data: 100%|██████████| 151/151 [00:00<00:00, 1401.63file/s]


[1m1234/1234[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 2ms/step
Test Precision: 0.8838
Test Recall: 0.8628
Test F1-Score: 0.8729


In [11]:
model.summary()

In [12]:
model.save("clavinet.h5")
model = tf.keras.models.load_model("clavinet.h5", compile=False)

converter = tf.lite.TFLiteConverter.from_keras_model(model)
tflite_model = converter.convert()

with open("clavinet.tflite", "wb") as f:
    f.write(tflite_model)



INFO:tensorflow:Assets written to: /tmp/tmppv4olvns/assets


INFO:tensorflow:Assets written to: /tmp/tmppv4olvns/assets


Saved artifact at '/tmp/tmppv4olvns'. The following endpoints are available:

* Endpoint 'serve'
  args_0 (POSITIONAL_ONLY): TensorSpec(shape=(None, 64, 42, 1), dtype=tf.float32, name='input_layer')
Output Type:
  TensorSpec(shape=(None, 1), dtype=tf.float32, name=None)
Captures:
  139862596030128: TensorSpec(shape=(), dtype=tf.resource, name=None)
  139862596035760: TensorSpec(shape=(), dtype=tf.resource, name=None)
  139862787166576: TensorSpec(shape=(), dtype=tf.resource, name=None)
  139862787175728: TensorSpec(shape=(), dtype=tf.resource, name=None)
  139862787163760: TensorSpec(shape=(), dtype=tf.resource, name=None)
  139863582018544: TensorSpec(shape=(), dtype=tf.resource, name=None)
  139862595560272: TensorSpec(shape=(), dtype=tf.resource, name=None)
  139862595557984: TensorSpec(shape=(), dtype=tf.resource, name=None)
  139862595566784: TensorSpec(shape=(), dtype=tf.resource, name=None)
  139862595559568: TensorSpec(shape=(), dtype=tf.resource, name=None)


W0000 00:00:1743377983.324782    2754 tf_tfl_flatbuffer_helpers.cc:365] Ignored output_format.
W0000 00:00:1743377983.324822    2754 tf_tfl_flatbuffer_helpers.cc:368] Ignored drop_control_dependency.
2025-03-30 16:39:43.325082: I tensorflow/cc/saved_model/reader.cc:83] Reading SavedModel from: /tmp/tmppv4olvns
2025-03-30 16:39:43.325594: I tensorflow/cc/saved_model/reader.cc:52] Reading meta graph with tags { serve }
2025-03-30 16:39:43.325603: I tensorflow/cc/saved_model/reader.cc:147] Reading SavedModel debug info (if present) from: /tmp/tmppv4olvns
I0000 00:00:1743377983.329078    2754 mlir_graph_optimization_pass.cc:401] MLIR V1 optimization pass is not enabled
2025-03-30 16:39:43.329807: I tensorflow/cc/saved_model/loader.cc:236] Restoring SavedModel bundle.
2025-03-30 16:39:43.355524: I tensorflow/cc/saved_model/loader.cc:220] Running initialization op on SavedModel bundle at path: /tmp/tmppv4olvns
2025-03-30 16:39:43.363303: I tensorflow/cc/saved_model/loader.cc:466] SavedModel 