### Cell 1: Environment Setup

In [1]:
import tensorflow as tf
# Enable GPU memory growth
gpus = tf.config.list_physical_devices('GPU')
if gpus:
    try:
        for gpu in gpus:
            tf.config.experimental.set_memory_growth(gpu, True)
        print("Memory growth enabled on GPU.")
    except RuntimeError as e:
        print(e)

import os
# Hide unnecessary TensorFlow messages
os.environ['TF_CPP_MIN_LOG_LEVEL'] = '2'

import sys
import essentia.standard as es
import pandas as pd
from pprint import pprint

import numpy as np
%matplotlib inline


# Determine project root (assuming the notebook is in the notebooks/ folder)
project_root = os.path.abspath(os.path.join(os.getcwd(), ".."))
if project_root not in sys.path:
    sys.path.insert(0, project_root)
print("Project root:", project_root)
print("Current PYTHONPATH (first few entries):", sys.path[:3])

# Define paths
raw_dir = os.path.join(project_root, "data", "raw")
sample_audio = os.path.join(raw_dir, "example.mp3")  # Ensure this file exists
tempo_model_file = os.path.join(project_root, "src", "deeptemp-k16-3.pb")
print("Sample audio file:", sample_audio)
print("Tempo model file:", tempo_model_file)


2025-02-13 12:34:01.138402: I tensorflow/stream_executor/cuda/cuda_gpu_executor.cc:922] could not open file to read NUMA node: /sys/bus/pci/devices/0000:01:00.0/numa_node
Your kernel may have been built without NUMA support.
2025-02-13 12:34:01.158601: I tensorflow/stream_executor/cuda/cuda_gpu_executor.cc:922] could not open file to read NUMA node: /sys/bus/pci/devices/0000:01:00.0/numa_node
Your kernel may have been built without NUMA support.
2025-02-13 12:34:01.158657: I tensorflow/stream_executor/cuda/cuda_gpu_executor.cc:922] could not open file to read NUMA node: /sys/bus/pci/devices/0000:01:00.0/numa_node
Your kernel may have been built without NUMA support.


Memory growth enabled on GPU.


[   INFO   ] MusicExtractorSVM: no classifier models were configured by default


Project root: /home/cepatinog/amplab
Current PYTHONPATH (first few entries): ['/home/cepatinog/amplab', '/home/cepatinog/miniconda3/envs/amplab_essentia/lib/python310.zip', '/home/cepatinog/miniconda3/envs/amplab_essentia/lib/python3.10']
Sample audio file: /home/cepatinog/amplab/data/raw/example.mp3
Tempo model file: /home/cepatinog/amplab/src/deeptemp-k16-3.pb


### Cell 2: Test Audio Loading

In [2]:
from src.load_audio import load_audio_file

audio_dict = load_audio_file(sample_audio, targetMonoSampleRate=44100, targetTempoSampleRate=11025)

print("Returned keys:")
pprint(list(audio_dict.keys()))

print("\nDetails of loaded audio:")
print("Stereo audio (first 5 samples):")
pprint(audio_dict['stereo_audio'][:5])
print("Mono audio length (for key extraction):", len(audio_dict['mono_audio']))
print("Mono audio length (for tempo extraction):", len(audio_dict['mono_tempo']))
print("Sample rate used for mono audio:", audio_dict['sampleRate'])
print("Number of channels in original file:", audio_dict['numChannels'])


Returned keys:
['stereo_audio', 'mono_audio', 'mono_tempo', 'sampleRate', 'numChannels']

Details of loaded audio:
Stereo audio (first 5 samples):
array([[-0.10971069,  0.09204102],
       [-0.17526245,  0.07278442],
       [-0.29336548, -0.0501709 ],
       [-0.28570557,  0.00180054],
       [-0.22473145,  0.12976074]], dtype=float32)
Mono audio length (for key extraction): 1323001
Mono audio length (for tempo extraction): 330750
Sample rate used for mono audio: 44100.0
Number of channels in original file: 2


### Cell 3: Test Individual Feature Extractors

In [3]:
from src.extract_tempo import extract_tempo_features
from src.key_extraction import extract_key_features
from src.extract_loudness import extract_loudness_features

tempo_features = extract_tempo_features(audio_dict['mono_tempo'], method='tempocnn', model_file=tempo_model_file)
print("Tempo Features:")
pprint(tempo_features)

key_features = extract_key_features(audio_dict['mono_audio'])
print("\nKey Features:")
pprint(key_features)

loudness_features = extract_loudness_features(audio_dict['stereo_audio'], hopSize=1024/44100, sampleRate=44100, startAtZero=True)
print("\nLoudness Features:")
pprint(loudness_features)


Tempo Features:
{'global_bpm': 128.0,
 'local_bpms': [128.0, 128.0, 128.0, 128.0],
 'local_probs': [0.9546993374824524,
                 0.9014108777046204,
                 0.8928115367889404,
                 0.8102707266807556]}


[   INFO   ] TensorflowPredict: Successfully loaded graph file: `/home/cepatinog/amplab/src/deeptemp-k16-3.pb`



Key Features:
{'edma_key': 'F#',
 'edma_scale': 'major',
 'edma_strength': 0.9310499429702759,
 'krumhansl_key': 'F#',
 'krumhansl_scale': 'major',
 'krumhansl_strength': 0.985231876373291,
 'temperley_key': 'F#',
 'temperley_scale': 'major',
 'temperley_strength': 0.9225367903709412}

Loudness Features:
{'integrated_loudness': -7.263375759124756,
 'loudness_range': 1.1281824111938477,
 'momentary_loudness': array([-9.38477 , -9.104578, -8.783603, ..., -8.875216, -9.161896,
       -9.573801], dtype=float32),
 'short_term_loudness': array([-9.571363, -9.482447, -9.383096, ..., -9.565043, -9.629582,
       -9.684465], dtype=float32)}


### Cell 4: Test Integrated Pipeline

In [4]:
from src.audio_analysis import extract_all_features

all_features = extract_all_features(audio_dict, tempo_method='tempocnn', tempo_model_file=tempo_model_file)
print("\nAll Integrated Extracted Features:")
pprint(all_features)

df = pd.DataFrame([all_features])
df


[   INFO   ] TensorflowPredict: Successfully loaded graph file: `/home/cepatinog/amplab/src/deeptemp-k16-3.pb`



All Integrated Extracted Features:
{'edma_key': 'F#',
 'edma_scale': 'major',
 'edma_strength': 0.9310499429702759,
 'krumhansl_key': 'F#',
 'krumhansl_scale': 'major',
 'krumhansl_strength': 0.985231876373291,
 'loudness_integrated_loudness': -7.263375759124756,
 'loudness_loudness_range': 1.1281824111938477,
 'loudness_momentary_loudness': array([-9.38477 , -9.104578, -8.783603, ..., -8.875216, -9.161896,
       -9.573801], dtype=float32),
 'loudness_short_term_loudness': array([-9.571363, -9.482447, -9.383096, ..., -9.565043, -9.629582,
       -9.684465], dtype=float32),
 'temperley_key': 'F#',
 'temperley_scale': 'major',
 'temperley_strength': 0.9225367903709412,
 'tempo_global_bpm': 128.0,
 'tempo_local_bpms': [128.0, 128.0, 128.0, 128.0],
 'tempo_local_probs': [0.9546993374824524,
                       0.9014108777046204,
                       0.8928115367889404,
                       0.8102707266807556]}


Unnamed: 0,tempo_global_bpm,tempo_local_bpms,tempo_local_probs,temperley_key,temperley_scale,temperley_strength,krumhansl_key,krumhansl_scale,krumhansl_strength,edma_key,edma_scale,edma_strength,loudness_momentary_loudness,loudness_short_term_loudness,loudness_integrated_loudness,loudness_loudness_range
0,128.0,"[128.0, 128.0, 128.0, 128.0]","[0.9546993374824524, 0.9014108777046204, 0.892...",F#,major,0.922537,F#,major,0.985232,F#,major,0.93105,"[-9.38477, -9.104578, -8.783603, -8.4422655, -...","[-9.571363, -9.482447, -9.383096, -9.306011, -...",-7.263376,1.128182


### Cell 5: Test Embedding Extraction

In [5]:
# Import embedding extraction functions.
from src.extract_embeddings import extract_discogs_effnet_embeddings, extract_msd_musicnn_embeddings

# Load audio for embeddings: using MonoLoader at 16 kHz.
audio_embeddings = es.MonoLoader(filename=sample_audio, sampleRate=16000, resampleQuality=4)()
print("Loaded audio for embeddings length:", len(audio_embeddings))

# Define model paths.
discogs_model_file = os.path.join(project_root, "src", "discogs-effnet-bs64-1.pb")  # Update filename if necessary
musicnn_model_file = os.path.join(project_root, "src", "msd-musicnn-1.pb")           # Update filename if necessary

# Extract Discogs-Effnet embeddings.
discogs_embedding = extract_discogs_effnet_embeddings(audio_embeddings, model_file=discogs_model_file)
print("Discogs-Effnet embedding shape:", discogs_embedding.shape)
print("Discogs-Effnet embedding:")
pprint(discogs_embedding)

# Extract MSD-MusicCNN embeddings.
musicnn_embedding = extract_msd_musicnn_embeddings(audio_embeddings, model_file=musicnn_model_file)
print("MSD-MusicCNN embedding shape:", musicnn_embedding.shape)
print("MSD-MusicCNN embedding:")
pprint(musicnn_embedding)



Loaded audio for embeddings length: 480001


[   INFO   ] TensorflowPredict: Successfully loaded graph file: `/home/cepatinog/amplab/src/discogs-effnet-bs64-1.pb`
[   INFO   ] TensorflowPredict: Successfully loaded graph file: `/home/cepatinog/amplab/src/msd-musicnn-1.pb`


Discogs-Effnet embedding shape: (1280,)
Discogs-Effnet embedding:
array([-0.00818077,  0.04617089,  0.08795194, ..., -0.0115188 ,
        0.22933842, -0.0307178 ], dtype=float32)
MSD-MusicCNN embedding shape: (200,)
MSD-MusicCNN embedding:
array([ 1.6210680e+00,  3.7390992e-01, -1.4761760e+00, -1.8138838e+00,
       -1.9977241e+00,  3.1101890e+00,  2.1713927e+00, -1.4830803e+00,
       -5.1325660e+00,  1.1591920e-01,  2.4978249e+00,  2.1131213e+00,
       -2.4963405e+00, -1.3414558e+00,  7.5094149e-02,  3.0526948e+00,
        1.6317570e-01, -4.4245654e-01, -2.4706023e+00, -2.8656778e+00,
        1.4657718e+00,  5.5882281e-01, -2.9023263e+00, -1.3423108e+00,
        7.5930852e-01, -2.9156735e+00,  1.9484429e+00,  3.3846447e+00,
        2.6429994e+00, -2.7658646e+00,  2.5999832e-01, -2.6172974e+00,
        7.8670716e-01, -3.4677293e+00,  3.1758797e+00,  3.5159130e+00,
       -2.7254109e+00,  3.6064587e+00, -2.2145446e-02,  3.6668515e-01,
        2.1951363e+00, -1.8875448e-01, -1.8936421e