# Audio Embedding and Tagging

## Import Packages

In [None]:
import os
import torch
import openl3
import librosa
import numpy as np
from musicnn.tagger import top_tags
from multiprocessing import Pool

## Get Embeddings and Tags

Running our audio files through the OpenL3 model to extract vector embeddings that describe the music's mood and characteristics, and using Musicnn to generate top descriptive tags.

In [None]:
# === Settings ===
AUDIO_DIR = "downloads"
SAMPLE_RATE = 48000
DURATION = 30
EMBED_SIZE = 512

# === Get files ===
audio_files = [os.path.join(AUDIO_DIR, f) for f in os.listdir(AUDIO_DIR) if f.endswith(".mp3")]

# === GPU assignment ===
def assign_gpu(index):
    return 0 if index % 2 == 0 else 1

# === Processing logic ===
def process_audio(args):
    path, gpu_id = args
    torch.cuda.set_device(gpu_id)
    fname = os.path.basename(path)

    try:
        # Load 30s middle snippet
        y, sr = librosa.load(path, sr=SAMPLE_RATE)
        dur = librosa.get_duration(y=y, sr=sr)
        start = max(0, (dur - DURATION) / 2)
        y = y[int(start * sr):int((start + DURATION) * sr)]

        # OpenL3 embedding
        emb, _ = openl3.get_audio_embedding(
            y, sr,
            input_repr="music",
            content_type="music",
            embedding_size=EMBED_SIZE,
            frontend="librosa"
        )
        emb_mean = emb.mean(axis=0)

        # musicnn tags
        tags = top_tags(path, model='MSD_musicnn', input_length=DURATION)
        tags = [(tag, float(score)) for tag, score in tags]

        return {
            "file": fname,
            "embedding": emb_mean.tolist(),
            "tags": tags
        }
    except Exception as e:
        return {
            "file": fname,
            "error": str(e)
        }

# === Run parallel across 2 GPUs ===
file_gpu_pairs = [(f, assign_gpu(i)) for i, f in enumerate(audio_files)]

with Pool(processes=2) as pool:
    results = pool.map(process_audio, file_gpu_pairs)

# === Optional: Save or view
import json
with open("audio_features.json", "w") as f:
    json.dump(results, f, indent=2)