# Exploitation Zone Audio

This notebook builds an audio embedding pipeline: it loads S3/MinIO credentials, connects to MinIO and a ChromaDB HTTP server, loads a pretrained CLAP audio model and its processor from HuggingFace, it creates a chroma collection, iterates over the objects in the MinIO bucket, downloads them and runs the audio through the CLAP processor to obtain an embedding. Finally, this embeddings are stored in the chromadb to further use that information.

**TODO: Explicar que necessitem un TARGET_SAMPLE_RATE de 48000Hz to use the CLAP processor. Per tant, hauria de fer-se a altres zones**

In [2]:
import boto3
import os
from dotenv import load_dotenv

load_dotenv()
access_key_id = os.getenv("ACCESS_KEY_ID")
secret_access_key = os.getenv("SECRET_ACCESS_KEY")
minio_url = "http://" + os.getenv("S3_API_ENDPOINT")


minio_client = boto3.client(
    "s3",
    aws_access_key_id=access_key_id,
    aws_secret_access_key=secret_access_key,
    endpoint_url=minio_url
)

new_bucket = "exploitation-zone"
try:
    minio_client.create_bucket(Bucket=new_bucket)
except (minio_client.exceptions.BucketAlreadyExists, minio_client.exceptions.BucketAlreadyOwnedByYou):
    print(f"Bucket '{new_bucket}' already exists")

Bucket 'exploitation-zone' already exists


In [3]:
import chromadb
from transformers import ClapModel, ClapProcessor
import librosa
import io
import torch

client = chromadb.HttpClient(host="localhost", port=8000)
paginator = minio_client.get_paginator('list_objects_v2')
exploitation_zone = "exploitation-zone"
trusted_zone = "trusted-zone"

model_id = "laion/clap-htsat-unfused"
model = ClapModel.from_pretrained(model_id)
processor = ClapProcessor.from_pretrained(model_id)
collection_name = "exploitation_zone-audio"
TARGET_SAMPLE_RATE = 48000

try:
    client.delete_collection(name=collection_name)
except Exception:
    pass

try:
    collection = client.get_or_create_collection(name=collection_name)
except Exception as e:
    print(f"Error accessing or creating collection: {e}")
    exit(1)

for page in paginator.paginate(Bucket=trusted_zone, Prefix="audio/"):
    for obj in page.get("Contents", []):
        key = obj.get("Key", "")
        response = minio_client.get_object(Bucket=trusted_zone, Key=key)
        audio_bytes = response['Body'].read()
        audio_waveform, _ = librosa.load(
            io.BytesIO(audio_bytes), 
            sr=TARGET_SAMPLE_RATE, 
            mono=True
        )

        inputs = processor(
            audio=audio_waveform, 
            sampling_rate=TARGET_SAMPLE_RATE, 
            return_tensors="pt"
        )
        with torch.no_grad():
                audio_features = model.get_audio_features(**inputs)

        embedding = audio_features[0].numpy().tolist()

        collection.add(
            embeddings=[embedding],
            metadatas=[{"source": trusted_zone, "audio_path": key}],
            ids=[key]
        )

        minio_client.copy_object(
            Bucket=exploitation_zone,
            CopySource={'Bucket': trusted_zone, 'Key': key},
            Key=key
        )


result = collection.get()
print("returned keys:", list(result.keys()))

returned keys: ['ids', 'embeddings', 'metadatas', 'documents', 'data', 'uris', 'included']


In [None]:
import os
import io
import librosa
import torch
from IPython.display import Audio, display

# --- Assumes these variables are already defined from your ingestion script ---
# model: The loaded ClapModel
# processor: The loaded ClapProcessor
# client: The chromadb.HttpClient
# minio_client: The Minio client
# TARGET_SAMPLE_RATE: 48000
# exploitation_zone: "exploitation-zone"
# collection_name: "exploitation_zone-audio"
# --------------------------------------------------------------------------

# 1. Define and read your local query audio file
#    (Make sure this file exists!)
QUERY_AUDIO_KEY = os.path.join(os.getcwd(), "../../query_audio.wav") 

try:
    # Read audio from local file system
    with open(QUERY_AUDIO_KEY, "rb") as f:
        query_audio_data = f.read()
        
    print(f"Fetching query audio: {QUERY_AUDIO_KEY}")
    display(Audio(query_audio_data))

    # 2. Generate the query embedding using the *exact same process* as ingestion
    print("Generating query embedding...")
    query_waveform, _ = librosa.load(
        io.BytesIO(query_audio_data), 
        sr=TARGET_SAMPLE_RATE, 
        mono=True
    )

    inputs = processor(
        audio=query_waveform, 
        sampling_rate=TARGET_SAMPLE_RATE, 
        return_tensors="pt"
    )
    with torch.no_grad():
            audio_features = model.get_audio_features(**inputs)

    query_embedding = audio_features[0].numpy().tolist()
    print("Query embedding generated.")

    # 3. Get the collection
    collection = client.get_collection(name=collection_name)
    
    # 4. Query ChromaDB for similar audios
    print("Querying ChromaDB for 1 similar audio...")
    results = collection.query(
        query_embeddings=[query_embedding],
        n_results=1  # Same as your image search
    )

    # 5. Process and display the results
    if not results['ids'][0]:
        print("No similar audios found.")
    else:
        for i in range(len(results['ids'][0])):
            audio_id = results['ids'][0][i]  # This is the Minio Key
            distance = results['distances'][0][i]
            
            print(f"\nMatch {i+1}:")
            print(f"  ID (Minio Key): {audio_id}")
            print(f"  Similarity Distance: {distance:.4f}")
            
            # 6. Fetch the matched audio from Minio and display it
            response = minio_client.get_object(Bucket=exploitation_zone, Key=audio_id)
            matched_audio_data = response['Body'].read()
            
            print("Matched Audio:")
            display(Audio(matched_audio_data))

except FileNotFoundError:
    print(f"Error: Query audio file not found at {QUERY_AUDIO_KEY}")
except Exception as e:
    print(f"An error occurred: {e}")

{'ids': ['audio/answer_0.mp3', 'audio/answer_1.mp3', 'audio/answer_10.mp3', 'audio/answer_11.mp3', 'audio/answer_12.mp3', 'audio/answer_13.mp3', 'audio/answer_14.mp3', 'audio/answer_15.mp3', 'audio/answer_16.mp3', 'audio/answer_17.mp3', 'audio/answer_18.mp3', 'audio/answer_19.mp3', 'audio/answer_2.mp3', 'audio/answer_20.mp3', 'audio/answer_21.mp3', 'audio/answer_22.mp3', 'audio/answer_23.mp3', 'audio/answer_24.mp3', 'audio/answer_25.mp3', 'audio/answer_26.mp3', 'audio/answer_27.mp3', 'audio/answer_28.mp3', 'audio/answer_29.mp3', 'audio/answer_3.mp3', 'audio/answer_30.mp3', 'audio/answer_31.mp3', 'audio/answer_32.mp3', 'audio/answer_33.mp3', 'audio/answer_34.mp3', 'audio/answer_35.mp3', 'audio/answer_36.mp3', 'audio/answer_37.mp3', 'audio/answer_38.mp3', 'audio/answer_39.mp3', 'audio/answer_4.mp3', 'audio/answer_40.mp3', 'audio/answer_41.mp3', 'audio/answer_42.mp3', 'audio/answer_43.mp3', 'audio/answer_44.mp3', 'audio/answer_45.mp3', 'audio/answer_46.mp3', 'audio/answer_47.mp3', 'audio/