In [1]:
import sys
import os
import essentia.standard as es
import pandas as pd
from pprint import pprint
%matplotlib inline

os.environ['TF_CPP_MIN_LOG_LEVEL'] = '3'  # Hide INFO, WARNING y ERROR of TensorFlow

# Determine the project root (assuming this notebook is in the notebooks/ folder)
project_root = os.path.abspath(os.path.join(os.getcwd(), ".."))
if project_root not in sys.path:
    sys.path.insert(0, project_root)
print("Project root:", project_root)
print("Current PYTHONPATH (first few entries):", sys.path[:3])

# Define paths to the raw audio folder, sample audio file, and the TempoCNN model file.
raw_dir = os.path.join(project_root, "data", "raw")
sample_audio = os.path.join(raw_dir, "example.mp3")  # Ensure this file exists
tempo_model_file = os.path.join(project_root, "src", "deeptemp-k16-3.pb")

print("Sample audio file:", sample_audio)
print("Tempo model file:", tempo_model_file)


2025-02-12 21:04:34.488473: I tensorflow/stream_executor/platform/default/dso_loader.cc:53] Successfully opened dynamic library libcudart.so.11.0
[   INFO   ] MusicExtractorSVM: no classifier models were configured by default
2025-02-12 21:04:35.688679: I tensorflow/core/platform/cpu_feature_guard.cc:142] This TensorFlow binary is optimized with oneAPI Deep Neural Network Library (oneDNN) to use the following CPU instructions in performance-critical operations:  SSE3 SSE4.1 SSE4.2 AVX AVX2 FMA
To enable them in other operations, rebuild TensorFlow with the appropriate compiler flags.
2025-02-12 21:04:35.691676: I tensorflow/stream_executor/platform/default/dso_loader.cc:53] Successfully opened dynamic library libcuda.so.1
2025-02-12 21:04:37.013706: I tensorflow/stream_executor/cuda/cuda_gpu_executor.cc:923] could not open file to read NUMA node: /sys/bus/pci/devices/0000:01:00.0/numa_node
Your kernel may have been built without NUMA support.
2025-02-12 21:04:37.013783: I tensorflow/co

Project root: /home/cepatinog/amplab
Current PYTHONPATH (first few entries): ['/home/cepatinog/amplab', '/home/cepatinog/miniconda3/envs/amplab_essentia/lib/python310.zip', '/home/cepatinog/miniconda3/envs/amplab_essentia/lib/python3.10']
Sample audio file: /home/cepatinog/amplab/data/raw/example.mp3
Tempo model file: /home/cepatinog/amplab/src/deeptemp-k16-3.pb


Test the Audio Loading Helper
This cell tests the load_audio.py module to verify that the audio is loaded correctly into the three required versions.

In [2]:
# Import the helper function from load_audio.py
from src.load_audio import load_audio_file

# Load the audio file using the helper function.
# It should return a dictionary with keys: 'stereo_audio', 'mono_audio', 'mono_tempo', 'sampleRate', and 'numChannels'.
audio_dict = load_audio_file(sample_audio, targetMonoSampleRate=44100, targetTempoSampleRate=11025)

# Inspect the returned dictionary.
print("Returned keys:")
pprint(list(audio_dict.keys()))

print("\nDetails of loaded audio:")
print("Stereo audio (first 5 samples):")
pprint(audio_dict['stereo_audio'][:5])  # Should show a 2D NumPy array: [[L, R], [L, R], ...]
print("Mono audio length (for key extraction):", len(audio_dict['mono_audio']))
print("Mono audio length (for tempo extraction):", len(audio_dict['mono_tempo']))
print("Sample rate used for mono audio:", audio_dict['sampleRate'])
print("Number of channels in original file:", audio_dict['numChannels'])


Returned keys:
['stereo_audio', 'mono_audio', 'mono_tempo', 'sampleRate', 'numChannels']

Details of loaded audio:
Stereo audio (first 5 samples):
array([[-0.10971069,  0.09204102],
       [-0.17526245,  0.07278442],
       [-0.29336548, -0.0501709 ],
       [-0.28570557,  0.00180054],
       [-0.22473145,  0.12976074]], dtype=float32)
Mono audio length (for key extraction): 1323001
Mono audio length (for tempo extraction): 330750
Sample rate used for mono audio: 44100.0
Number of channels in original file: 2


Cell 3: Test Individual Feature Extractors
In this cell, we test each extractor separately:

Tempo extraction using the mono audio resampled for tempo extraction.
Key extraction using the mono audio.
Loudness extraction using the stereo audio.

In [3]:
# Import individual extractor functions.
from src.extract_tempo import extract_tempo_features
from src.key_extraction import extract_key_features
from src.extract_loudness import extract_loudness_features

# Test tempo extraction (using the mono_tempo signal)
tempo_features = extract_tempo_features(audio_dict['mono_tempo'], method='tempocnn', model_file=tempo_model_file)
print("Tempo Features:")
pprint(tempo_features)

# Test key extraction (using the mono_audio signal)
key_features = extract_key_features(audio_dict['mono_audio'])
print("\nKey Features:")
pprint(key_features)

# Test loudness extraction (using the stereo_audio signal)
loudness_features = extract_loudness_features(audio_dict['stereo_audio'], hopSize=1024/44100, sampleRate=44100, startAtZero=True)
print("\nLoudness Features:")
pprint(loudness_features)


2025-02-12 21:04:40.993323: I tensorflow/stream_executor/cuda/cuda_gpu_executor.cc:923] could not open file to read NUMA node: /sys/bus/pci/devices/0000:01:00.0/numa_node
Your kernel may have been built without NUMA support.
2025-02-12 21:04:40.993421: I tensorflow/core/common_runtime/gpu/gpu_device.cc:1733] Found device 0 with properties: 
pciBusID: 0000:01:00.0 name: NVIDIA GeForce GTX 1650 computeCapability: 7.5
coreClock: 1.515GHz coreCount: 14 deviceMemorySize: 4.00GiB deviceMemoryBandwidth: 178.84GiB/s
2025-02-12 21:04:40.993520: I tensorflow/stream_executor/cuda/cuda_gpu_executor.cc:923] could not open file to read NUMA node: /sys/bus/pci/devices/0000:01:00.0/numa_node
Your kernel may have been built without NUMA support.
2025-02-12 21:04:40.993552: I tensorflow/stream_executor/cuda/cuda_gpu_executor.cc:923] could not open file to read NUMA node: /sys/bus/pci/devices/0000:01:00.0/numa_node
Your kernel may have been built without NUMA support.
2025-02-12 21:04:40.993560: I tensor

Tempo Features:
{'global_bpm': 128.0,
 'local_bpms': [128.0, 128.0, 128.0, 128.0],
 'local_probs': [0.9546993374824524,
                 0.9014107584953308,
                 0.8928115367889404,
                 0.8102704882621765]}

Key Features:
{'edma_key': 'F#',
 'edma_scale': 'major',
 'edma_strength': 0.9310499429702759,
 'krumhansl_key': 'F#',
 'krumhansl_scale': 'major',
 'krumhansl_strength': 0.985231876373291,
 'temperley_key': 'F#',
 'temperley_scale': 'major',
 'temperley_strength': 0.9225367903709412}


00.0/numa_node
Your kernel may have been built without NUMA support.
2025-02-12 21:04:41.030583: I tensorflow/stream_executor/cuda/cuda_gpu_executor.cc:923] could not open file to read NUMA node: /sys/bus/pci/devices/0000:01:00.0/numa_node
Your kernel may have been built without NUMA support.
2025-02-12 21:04:41.030589: I tensorflow/core/common_runtime/gpu/gpu_device.cc:1871] Adding visible gpu devices: 0
2025-02-12 21:04:41.030613: I tensorflow/core/common_runtime/gpu/gpu_device.cc:1258] Device interconnect StreamExecutor with strength 1 edge matrix:
2025-02-12 21:04:41.030617: I tensorflow/core/common_runtime/gpu/gpu_device.cc:1264]      0 
2025-02-12 21:04:41.030620: I tensorflow/core/common_runtime/gpu/gpu_device.cc:1277] 0:   N 
2025-02-12 21:04:41.030948: I tensorflow/stream_executor/cuda/cuda_gpu_executor.cc:923] could not open file to read NUMA node: /sys/bus/pci/devices/0000:01:00.0/numa_node
Your kernel may have been built without NUMA support.
2025-02-12 21:04:41.030961: I t


Loudness Features:
{'integrated_loudness': -7.263375759124756,
 'loudness_range': 1.1281824111938477,
 'momentary_loudness': array([-9.38477 , -9.104578, -8.783603, ..., -8.875216, -9.161896,
       -9.573801], dtype=float32),
 'short_term_loudness': array([-9.571363, -9.482447, -9.383096, ..., -9.565043, -9.629582,
       -9.684465], dtype=float32)}


Cell 4: Test the Integrated Pipeline
Here we use the unified function from audio_analysis.py to extract all features at once.

In [4]:
# Import the integrated feature extractor function.
from src.audio_analysis import extract_all_features

# Extract all features using the audio dictionary and specified tempo extraction method.
all_features = extract_all_features(audio_dict, tempo_method='tempocnn', tempo_model_file=tempo_model_file)
print("\nAll Integrated Extracted Features:")
pprint(all_features)



2025-02-12 21:04:46.794754: I tensorflow/stream_executor/cuda/cuda_gpu_executor.cc:923] could not open file to read NUMA node: /sys/bus/pci/devices/0000:01:00.0/numa_node
Your kernel may have been built without NUMA support.
2025-02-12 21:04:46.794883: I tensorflow/core/common_runtime/gpu/gpu_device.cc:1733] Found device 0 with properties: 
pciBusID: 0000:01:00.0 name: NVIDIA GeForce GTX 1650 computeCapability: 7.5
coreClock: 1.515GHz coreCount: 14 deviceMemorySize: 4.00GiB deviceMemoryBandwidth: 178.84GiB/s
2025-02-12 21:04:46.794987: I tensorflow/stream_executor/cuda/cuda_gpu_executor.cc:923] could not open file to read NUMA node: /sys/bus/pci/devices/0000:01:00.0/numa_node
Your kernel may have been built without NUMA support.
2025-02-12 21:04:46.795032: I tensorflow/stream_executor/cuda/cuda_gpu_executor.cc:923] could not open file to read NUMA node: /sys/bus/pci/devices/0000:01:00.0/numa_node
Your kernel may have been built without NUMA support.
2025-02-12 21:04:46.795047: I tensor


All Integrated Extracted Features:
{'edma_key': 'F#',
 'edma_scale': 'major',
 'edma_strength': 0.9310499429702759,
 'krumhansl_key': 'F#',
 'krumhansl_scale': 'major',
 'krumhansl_strength': 0.985231876373291,
 'loudness_integrated_loudness': -7.263375759124756,
 'loudness_loudness_range': 1.1281824111938477,
 'loudness_momentary_loudness': array([-9.38477 , -9.104578, -8.783603, ..., -8.875216, -9.161896,
       -9.573801], dtype=float32),
 'loudness_short_term_loudness': array([-9.571363, -9.482447, -9.383096, ..., -9.565043, -9.629582,
       -9.684465], dtype=float32),
 'temperley_key': 'F#',
 'temperley_scale': 'major',
 'temperley_strength': 0.9225367903709412,
 'tempo_global_bpm': 128.0,
 'tempo_local_bpms': [128.0, 128.0, 128.0, 128.0],
 'tempo_local_probs': [0.9546993374824524,
                       0.9014107584953308,
                       0.8928115367889404,
                       0.8102704882621765]}


PU:0 with 2128 MB memory) -> physical GPU (device: 0, name: NVIDIA GeForce GTX 1650, pci bus id: 0000:01:00.0, compute capability: 7.5)
2025-02-12 21:04:46.803455: I tensorflow/stream_executor/cuda/cuda_gpu_executor.cc:923] could not open file to read NUMA node: /sys/bus/pci/devices/0000:01:00.0/numa_node
Your kernel may have been built without NUMA support.
2025-02-12 21:04:46.803510: I tensorflow/core/common_runtime/gpu/gpu_device.cc:1733] Found device 0 with properties: 
pciBusID: 0000:01:00.0 name: NVIDIA GeForce GTX 1650 computeCapability: 7.5
coreClock: 1.515GHz coreCount: 14 deviceMemorySize: 4.00GiB deviceMemoryBandwidth: 178.84GiB/s
2025-02-12 21:04:46.803576: I tensorflow/stream_executor/cuda/cuda_gpu_executor.cc:923] could not open file to read NUMA node: /sys/bus/pci/devices/0000:01:00.0/numa_node
Your kernel may have been built without NUMA support.
2025-02-12 21:04:46.803642: I tensorflow/stream_executor/cuda/cuda_gpu_executor.cc:923] could not open file to read NUMA node

In [5]:
# Convert the feature dictionary into a DataFrame.
df = pd.DataFrame([all_features])
df

Unnamed: 0,tempo_global_bpm,tempo_local_bpms,tempo_local_probs,temperley_key,temperley_scale,temperley_strength,krumhansl_key,krumhansl_scale,krumhansl_strength,edma_key,edma_scale,edma_strength,loudness_momentary_loudness,loudness_short_term_loudness,loudness_integrated_loudness,loudness_loudness_range
0,128.0,"[128.0, 128.0, 128.0, 128.0]","[0.9546993374824524, 0.9014107584953308, 0.892...",F#,major,0.922537,F#,major,0.985232,F#,major,0.93105,"[-9.38477, -9.104578, -8.783603, -8.4422655, -...","[-9.571363, -9.482447, -9.383096, -9.306011, -...",-7.263376,1.128182


In [None]:
import os
import essentia.standard as es
from pprint import pprint
import numpy as np
import sys
%matplotlib inline

# Set up project root and PYTHONPATH as before
project_root = os.path.abspath(os.path.join(os.getcwd(), ".."))
if project_root not in sys.path:
    sys.path.insert(0, project_root)
print("Project root:", project_root)

# Define path to a sample audio file.
raw_dir = os.path.join(project_root, "data", "raw")
sample_audio = os.path.join(raw_dir, "example.mp3")
print("Sample audio file:", sample_audio)

# Load audio with MonoLoader at 16000 Hz (the expected rate for embeddings).
audio = es.MonoLoader(filename=sample_audio, sampleRate=16000, resampleQuality=4)()
print("Loaded audio length:", len(audio))

# Import embedding extraction functions.
from src.extract_embeddings import extract_discogs_effnet_embeddings, extract_msd_musicnn_embeddings

# Define model paths (update these as needed).
discogs_model_file = os.path.join(project_root, "src", "discogs-effnet-bs64-1.pb")  # Update file name if necessary
musicnn_model_file = os.path.join(project_root, "src", "msd-musicnn-1.pb")           # Update file name if necessary

# Extract embeddings.
discogs_embedding = extract_discogs_effnet_embeddings(audio, model_file=discogs_model_file)
musicnn_embedding = extract_msd_musicnn_embeddings(audio, model_file=musicnn_model_file)

print("Discogs-Effnet embedding shape:", discogs_embedding.shape)
print("MSD-MusicCNN embedding shape:", musicnn_embedding.shape)

# Optionally, print or visualize the embeddings.
pprint(discogs_embedding)
pprint(musicnn_embedding)


Project root: /home/cepatinog/amplab
Sample audio file: /home/cepatinog/amplab/data/raw/example.mp3
Loaded audio length: 480001


2025-02-12 21:28:38.937114: I tensorflow/stream_executor/cuda/cuda_gpu_executor.cc:923] could not open file to read NUMA node: /sys/bus/pci/devices/0000:01:00.0/numa_node
Your kernel may have been built without NUMA support.
2025-02-12 21:28:38.937182: I tensorflow/core/common_runtime/gpu/gpu_device.cc:1733] Found device 0 with properties: 
pciBusID: 0000:01:00.0 name: NVIDIA GeForce GTX 1650 computeCapability: 7.5
coreClock: 1.515GHz coreCount: 14 deviceMemorySize: 4.00GiB deviceMemoryBandwidth: 178.84GiB/s
2025-02-12 21:28:38.937227: I tensorflow/stream_executor/cuda/cuda_gpu_executor.cc:923] could not open file to read NUMA node: /sys/bus/pci/devices/0000:01:00.0/numa_node
Your kernel may have been built without NUMA support.
2025-02-12 21:28:38.937251: I tensorflow/stream_executor/cuda/cuda_gpu_executor.cc:923] could not open file to read NUMA node: /sys/bus/pci/devices/0000:01:00.0/numa_node
Your kernel may have been built without NUMA support.
2025-02-12 21:28:38.937258: I tensor

RuntimeError: In TensorflowPredictEffnetDiscogs.compute: TensorflowPredict: Error running the Tensorflow session. 2 root error(s) found.
  (0) Internal: Blas xGEMM launch failed : a.shape=[1,64,1280], b.shape=[1,1280,400], m=64, n=400, k=1280
	 [[{{node MatMul}}]]
  (1) Internal: Blas xGEMM launch failed : a.shape=[1,64,1280], b.shape=[1,1280,400], m=64, n=400, k=1280
	 [[{{node MatMul}}]]
	 [[PartitionedCall/_3]]
0 successful operations.
0 derived errors ignored.

In [1]:
import os
os.environ['CUDA_VISIBLE_DEVICES'] = ''
import essentia.standard as es
from pprint import pprint
import numpy as np
import sys
%matplotlib inline

# Set up project root and PYTHONPATH as before
project_root = os.path.abspath(os.path.join(os.getcwd(), ".."))
if project_root not in sys.path:
    sys.path.insert(0, project_root)
print("Project root:", project_root)

# Define path to a sample audio file.
raw_dir = os.path.join(project_root, "data", "raw")
sample_audio = os.path.join(raw_dir, "example.mp3")
print("Sample audio file:", sample_audio)

# Load audio with MonoLoader at 16000 Hz (the expected rate for embeddings).
audio = es.MonoLoader(filename=sample_audio, sampleRate=16000, resampleQuality=4)()
print("Loaded audio length:", len(audio))


# Take only the first 10 seconds (if the audio is longer than 10 seconds)
duration = 10  # seconds
audio_clip = audio[:duration * 16000]

# Import embedding extraction functions.
from src.extract_embeddings import extract_discogs_effnet_embeddings

# Define model paths (update these as needed).
discogs_model_file = os.path.join(project_root, "src", "discogs-effnet-bs64-1.pb")  # Update file name if necessary

discogs_embedding = extract_discogs_effnet_embeddings(audio_clip, model_file=discogs_model_file)


2025-02-12 21:33:46.593325: I tensorflow/stream_executor/platform/default/dso_loader.cc:53] Successfully opened dynamic library libcudart.so.11.0
[   INFO   ] MusicExtractorSVM: no classifier models were configured by default
2025-02-12 21:33:47.680324: I tensorflow/core/platform/cpu_feature_guard.cc:142] This TensorFlow binary is optimized with oneAPI Deep Neural Network Library (oneDNN) to use the following CPU instructions in performance-critical operations:  SSE3 SSE4.1 SSE4.2 AVX AVX2 FMA
To enable them in other operations, rebuild TensorFlow with the appropriate compiler flags.
2025-02-12 21:33:47.684951: I tensorflow/stream_executor/platform/default/dso_loader.cc:53] Successfully opened dynamic library libcuda.so.1
2025-02-12 21:33:49.493086: E tensorflow/stream_executor/cuda/cuda_driver.cc:328] failed call to cuInit: CUDA_ERROR_NO_DEVICE: no CUDA-capable device is detected
2025-02-12 21:33:49.493136: I tensorflow/stream_executor/cuda/cuda_diagnostics.cc:156] kernel driver does 

Project root: /home/cepatinog/amplab
Sample audio file: /home/cepatinog/amplab/data/raw/example.mp3
Loaded audio length: 480001


[   INFO   ] TensorflowPredict: Successfully loaded graph file: `/home/cepatinog/amplab/src/discogs-effnet-bs64-1.pb`
2025-02-12 21:33:51.174574: I tensorflow/core/platform/profile_utils/cpu_utils.cc:114] CPU Frequency: 2496005000 Hz


In [2]:
print("Discogs-Effnet embedding shape:", discogs_embedding.shape)


Discogs-Effnet embedding shape: (1280,)
