# Audio Embedding and Tagging

## Import Packages

In [1]:
import os
os.environ["CUDA_VISIBLE_DEVICES"] = "-1"
os.environ["TF_CPP_MIN_LOG_LEVEL"] = "3" 
import json
import torch
import librosa
import openl3
import pandas as pd
import numpy as np
from concurrent.futures import ThreadPoolExecutor, as_completed

E0000 00:00:1745174555.903219  804636 cuda_dnn.cc:8579] Unable to register cuDNN factory: Attempting to register factory for plugin cuDNN when one has already been registered
E0000 00:00:1745174555.909119  804636 cuda_blas.cc:1407] Unable to register cuBLAS factory: Attempting to register factory for plugin cuBLAS when one has already been registered
W0000 00:00:1745174555.923913  804636 computation_placer.cc:177] computation placer already registered. Please check linkage and avoid linking the same target more than once.
W0000 00:00:1745174555.923939  804636 computation_placer.cc:177] computation placer already registered. Please check linkage and avoid linking the same target more than once.
W0000 00:00:1745174555.923941  804636 computation_placer.cc:177] computation placer already registered. Please check linkage and avoid linking the same target more than once.
W0000 00:00:1745174555.923942  804636 computation_placer.cc:177] computation placer already registered. Please check linka

## Get Embeddings and Tags

In [None]:
AUDIO_DIR = "../data/song_audio_files"
OUTPUT_CSV = "openl3_embeddings.csv"
NUM_WORKERS = 10

model = openl3.models.load_audio_embedding_model(
    input_repr="mel256",
    content_type="music",
    embedding_size=512
)

def embed_file(file):
    try:
        if not file.endswith(".mp3"):
            return None
        path = os.path.join(AUDIO_DIR, file)

        # Get total duration
        duration = librosa.get_duration(path=path)
        offset = max(0, duration / 2 - 15)  # Center 30s window

        # Load from middle
        audio, sr = librosa.load(path, sr=48000, mono=True, offset=offset, duration=30)

        # Get embedding
        emb, _ = openl3.get_audio_embedding(audio, sr, model=model)
        avg_emb = emb.mean(axis=0)

        return {"file": file, **{f"dim_{i}": float(val) for i, val in enumerate(avg_emb)}}

    except Exception as e:
        print(f"{file} failed: {e}")
        return None

# === Parallel loop ===
files = os.listdir(AUDIO_DIR)
results = []

with ThreadPoolExecutor(max_workers=NUM_WORKERS) as executor:
    futures = {executor.submit(embed_file, f): f for f in files}
    for future in as_completed(futures):
        result = future.result()
        if result:
            results.append(result)

df = pd.DataFrame(results)
df.to_csv(OUTPUT_CSV, index=False)
print(f"\nDone. {len(df)} files embedded and saved to {OUTPUT_CSV}")

	This alias will be removed in version 1.0.
  duration = librosa.get_duration(filename=path)


[1m10/10[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m89s[0m 9s/step
[1m 9/10[0m [32m━━━━━━━━━━━━━━━━━━[0m[37m━━[0m [1m9s[0m 10s/step 

	This alias will be removed in version 1.0.
  duration = librosa.get_duration(filename=path)


[1m10/10[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m89s[0m 9s/step
[1m10/10[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m90s[0m 9s/step
[1m10/10[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m90s[0m 9s/step
[1m10/10[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m91s[0m 9s/step
[1m10/10[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m91s[0m 9s/step
[1m10/10[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m91s[0m 9s/step
[1m10/10[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m92s[0m 9s/step
[1m10/10[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m92s[0m 9s/step
[1m10/10[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m93s[0m 9s/step
[1m10/10[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m88s[0m 9s/step
[1m10/10[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m87s[0m 9s/step
[1m10/10[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m89s[0m 9s/step
[1m10/10[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m85s[0m 8s/step
[1m10/10[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0



[1m10/10[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m82s[0m 8s/step
[1m10/10[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m83s[0m 8s/step
[1m10/10[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m83s[0m 8s/step
[1m10/10[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m84s[0m 8s/step
[1m10/10[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m84s[0m 8s/step
[1m10/10[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m80s[0m 8s/step
[1m10/10[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m81s[0m 8s/step
[1m10/10[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m83s[0m 8s/step
[1m10/10[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m84s[0m 8s/step
[1m10/10[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m89s[0m 9s/step
[1m10/10[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m85s[0m 9s/step
[1m10/10[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m85s[0m 8s/step
[1m10/10[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m86s[0m 9s/step
[1m10/10[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0