In [1]:
!pip install speechbrain torchaudio torch pinecone

Collecting speechbrain
  Downloading speechbrain-1.0.3-py3-none-any.whl.metadata (24 kB)
Collecting pinecone
  Downloading pinecone-7.0.2-py3-none-any.whl.metadata (9.5 kB)
Collecting hyperpyyaml (from speechbrain)
  Downloading HyperPyYAML-1.2.2-py3-none-any.whl.metadata (7.6 kB)
Collecting pinecone-plugin-assistant<2.0.0,>=1.6.0 (from pinecone)
  Downloading pinecone_plugin_assistant-1.6.1-py3-none-any.whl.metadata (27 kB)
Collecting pinecone-plugin-interface<0.0.8,>=0.0.7 (from pinecone)
  Downloading pinecone_plugin_interface-0.0.7-py3-none-any.whl.metadata (1.2 kB)
Collecting packaging (from speechbrain)
  Downloading packaging-24.2-py3-none-any.whl.metadata (3.2 kB)
Collecting ruamel.yaml>=0.17.28 (from hyperpyyaml->speechbrain)
  Downloading ruamel.yaml-0.18.12-py3-none-any.whl.metadata (24 kB)
Collecting ruamel.yaml.clib>=0.2.7 (from ruamel.yaml>=0.17.28->hyperpyyaml->speechbrain)
  Downloading ruamel.yaml.clib-0.2.12-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl.m

In [2]:
import os
import numpy as np
import torch
import torchaudio
from speechbrain.pretrained import EncoderClassifier
import pinecone
import uuid
from typing import List, Union

# ---------------- PINECONE CONFIGURATION ----------------
PINECONE_API_KEY = "YOUR_API_KEY"         # 🔁 Replace this
PINECONE_ENV = "YOUR_ENVIRONMENT"         # 🔁 Replace this
INDEX_NAME = "audio-embedding-index"
EMBEDDING_DIM = 192
UPSERT_BATCH_SIZE = 100

# ---------------- AUDIO TO EMBEDDING FUNCTION ----------------

def audio_to_embedding(audio_path):
    if not os.path.exists(audio_path):
        raise FileNotFoundError(f"Audio file not found: {audio_path}")

    try:
        model = EncoderClassifier.from_hparams(
            source="speechbrain/spkrec-ecapa-voxceleb",
            savedir="pretrained_models/spkrec-ecapa-voxceleb"
        )

        waveform, sample_rate = torchaudio.load(audio_path)

        if waveform.shape[0] > 1:
            waveform = torch.mean(waveform, dim=0, keepdim=True)

        if sample_rate != 16000:
            resampler = torchaudio.transforms.Resample(orig_freq=sample_rate, new_freq=16000)
            waveform = resampler(waveform)

        min_length = 16000
        if waveform.shape[1] < min_length:
            padding = min_length - waveform.shape[1]
            waveform = torch.nn.functional.pad(waveform, (0, padding))

        embedding = model.encode_batch(waveform)

        return embedding.squeeze().numpy()

    except Exception as e:
        raise RuntimeError(f"Error processing audio: {str(e)}")

# ---------------- EMBEDDING TEST ----------------

def test_embedding(embedding, file_name=""):
    print(f"\n=== TEST: {file_name} ===")
    print(f"Type: {type(embedding)}")
    print(f"Shape: {embedding.shape}")
    print(f"Min: {embedding.min():.4f}, Max: {embedding.max():.4f}, Mean: {embedding.mean():.4f}, Std: {embedding.std():.4f}")
    print("First 5:", embedding[:5], "... Last 5:", embedding[-5:])

    if isinstance(embedding, np.ndarray) and embedding.ndim == 1 and embedding.shape[0] > 0:
        print("✓ Valid embedding")
    else:
        print("✗ Invalid embedding")


DEBUG:speechbrain.utils.checkpoints:Registered checkpoint save hook for _speechbrain_save
DEBUG:speechbrain.utils.checkpoints:Registered checkpoint load hook for _speechbrain_load
DEBUG:speechbrain.utils.checkpoints:Registered checkpoint save hook for save
DEBUG:speechbrain.utils.checkpoints:Registered checkpoint load hook for load
DEBUG:speechbrain.utils.checkpoints:Registered checkpoint save hook for _save
DEBUG:speechbrain.utils.checkpoints:Registered checkpoint load hook for _recover
  from speechbrain.pretrained import EncoderClassifier


In [12]:
PINECONE_API_KEY = "pcsk_2yzKnb_DusX4M95CU1KTjQxkZFPdYWbtFghFc7kUD2cHzpUT4hWPLmMbPgEgT5NgoX3Fib"         # 🔁 Replace this
PINECONE_ENV = "us-east-1"         # 🔁 Replace this
INDEX_NAME = "voice-biometrics"
# Update the EMBEDDING_DIM to match the existing index dimension (1024)
EMBEDDING_DIM = 192
UPSERT_BATCH_SIZE = 100

In [10]:
# ---------------- PINECONE HANDLERS ----------------

def init_pinecone():
    # Create a Pinecone client instance
    # It looks like you have hardcoded an API key here, but also have variables
    # PINECONE_API_KEY and PINECONE_ENV defined earlier.
    # It is recommended to use the variables for consistency.

    # pc = pinecone.Pinecone(api_key="pcsk_2yzKnb_DusX4M95CU1KTjQxkZFPdYWbtFghFc7kUD2cHzpUT4hWPLmMbPgEgT5NgoX3Fib")
    pc = pinecone.Pinecone(api_key=PINECONE_API_KEY, environment=PINECONE_ENV)

    # List indexes and check if the index exists
    # Call the .names() method to get the list of names
    existing_indexes = pc.list_indexes().names() # Added parentheses here
    if INDEX_NAME not in existing_indexes:
        print(f"Index '{INDEX_NAME}' not found. Creating index...")
        # Create the index with ServerlessSpec or PodSpec
        # Assuming ServerlessSpec is desired, adjust cloud and region as needed

        # Also need to import ServerlessSpec and PodSpec if you plan to use them
        # from pinecone import ServerlessSpec, PodSpec

        pc.create_index(
            name=INDEX_NAME,
            dimension=EMBEDDING_DIM,
            metric="cosine",
            # Ensure you import ServerlessSpec or PodSpec if using them
            # spec=ServerlessSpec(cloud='aws', region='us-west-2') # Adjust cloud and region as needed
            # Alternatively, use PodSpec if you are using pod-based indexes:
            # spec=PodSpec(environment=PINECONE_ENV, pod_type="p1.x1") # Adjust environment and pod_type
            # Using PodSpec and environment from variables
            spec=pinecone.PodSpec(environment=PINECONE_ENV, pod_type="p1.x1") # Using PodSpec as an example
        )
        print(f"Index '{INDEX_NAME}' created.")
    else:
        print(f"Index '{INDEX_NAME}' already exists.")

    # Connect to the index
    return pc.Index(INDEX_NAME)

# The rest of the code remains the same
def format_embeddings(vectors: List[np.ndarray], ids: List[str], metadata_list: List[dict] = None):
    formatted = []
    for i, vec in enumerate(vectors):
        vec_id = ids[i]
        vec_data = vec.tolist()
        metadata = metadata_list[i] if metadata_list else None
        formatted.append((vec_id, vec_data, metadata) if metadata else (vec_id, vec_data))
    return formatted

def batch_upsert(index, data: List[Union[tuple, list]], batch_size: int = 100):
    for i in range(0, len(data), batch_size):
        batch = data[i:i + batch_size]
        # Upsert using the index object
        index.upsert(vectors=batch)
        print(f"✅ Upserted batch {i // batch_size + 1}: {len(batch)} vectors")

# ---------------- MAIN FUNCTION ----------------

def process_audio_directory(directory_path):
    embeddings = []
    ids = []
    metadata_list = []

    index = init_pinecone()
    print("🔍 Scanning for audio files...")

    for file_name in os.listdir(directory_path):
        if file_name.lower().endswith((".wav", ".mp3")):
            file_path = os.path.join(directory_path, file_name)

            try:
                embedding = audio_to_embedding(file_path)
                test_embedding(embedding, file_name)

                embeddings.append(embedding)
                ids.append(str(uuid.uuid4()))
                metadata_list.append({"file_name": file_name, "source": "audio-directory"})

            except Exception as e:
                print(f"❌ Skipping {file_name}: {e}")

    if embeddings:
        formatted = format_embeddings(embeddings, ids, metadata_list)
        batch_upsert(index, formatted, UPSERT_BATCH_SIZE)
        print(f"\n🎉 Finished processing {len(embeddings)} audio files.")
    else:
        print("⚠️ No valid audio files found.")

# ---------------- EXECUTION ----------------

if __name__ == "__main__":
    folder_path = "/content/sample_data/audio_samples"   # 🔁 Change this to your folder path
    process_audio_directory(folder_path)

INFO:speechbrain.utils.fetching:Fetch hyperparams.yaml: Using symlink found at '/content/pretrained_models/spkrec-ecapa-voxceleb/hyperparams.yaml'
INFO:speechbrain.utils.fetching:Fetch custom.py: Fetching from HuggingFace Hub 'speechbrain/spkrec-ecapa-voxceleb' if not cached


Index 'voice-biometrics' already exists.
🔍 Scanning for audio files...


DEBUG:speechbrain.utils.parameter_transfer:Collecting files (or symlinks) for pretraining in pretrained_models/spkrec-ecapa-voxceleb.
INFO:speechbrain.utils.fetching:Fetch embedding_model.ckpt: Using symlink found at '/content/pretrained_models/spkrec-ecapa-voxceleb/embedding_model.ckpt'
DEBUG:speechbrain.utils.parameter_transfer:Set local path in self.paths["embedding_model"] = /content/pretrained_models/spkrec-ecapa-voxceleb/embedding_model.ckpt
INFO:speechbrain.utils.fetching:Fetch mean_var_norm_emb.ckpt: Using symlink found at '/content/pretrained_models/spkrec-ecapa-voxceleb/mean_var_norm_emb.ckpt'
DEBUG:speechbrain.utils.parameter_transfer:Set local path in self.paths["mean_var_norm_emb"] = /content/pretrained_models/spkrec-ecapa-voxceleb/mean_var_norm_emb.ckpt
INFO:speechbrain.utils.fetching:Fetch classifier.ckpt: Using symlink found at '/content/pretrained_models/spkrec-ecapa-voxceleb/classifier.ckpt'
DEBUG:speechbrain.utils.parameter_transfer:Set local path in self.paths["clas


=== TEST: Uma.mp4.wav ===
Type: <class 'numpy.ndarray'>
Shape: (192,)
Min: -52.5055, Max: 38.1098, Mean: -1.6398, Std: 17.8356
First 5: [ 30.638468 -19.253576   9.849755  -6.862141  23.764004] ... Last 5: [ 4.388326 19.963375 18.87376  11.841298 -9.76167 ]
✓ Valid embedding


DEBUG:speechbrain.utils.parameter_transfer:Collecting files (or symlinks) for pretraining in pretrained_models/spkrec-ecapa-voxceleb.
INFO:speechbrain.utils.fetching:Fetch embedding_model.ckpt: Using symlink found at '/content/pretrained_models/spkrec-ecapa-voxceleb/embedding_model.ckpt'
DEBUG:speechbrain.utils.parameter_transfer:Set local path in self.paths["embedding_model"] = /content/pretrained_models/spkrec-ecapa-voxceleb/embedding_model.ckpt
INFO:speechbrain.utils.fetching:Fetch mean_var_norm_emb.ckpt: Using symlink found at '/content/pretrained_models/spkrec-ecapa-voxceleb/mean_var_norm_emb.ckpt'
DEBUG:speechbrain.utils.parameter_transfer:Set local path in self.paths["mean_var_norm_emb"] = /content/pretrained_models/spkrec-ecapa-voxceleb/mean_var_norm_emb.ckpt
INFO:speechbrain.utils.fetching:Fetch classifier.ckpt: Using symlink found at '/content/pretrained_models/spkrec-ecapa-voxceleb/classifier.ckpt'
DEBUG:speechbrain.utils.parameter_transfer:Set local path in self.paths["clas


=== TEST: Unnimaya.mp4.wav ===
Type: <class 'numpy.ndarray'>
Shape: (192,)
Min: -54.0115, Max: 55.3779, Mean: 0.1322, Std: 20.2655
First 5: [21.935171 27.508474 12.10532  18.792778 34.357635] ... Last 5: [ 14.363921    1.7086402 -16.356928    5.912272    9.303451 ]
✓ Valid embedding


PineconeApiException: (400)
Reason: Bad Request
HTTP response headers: HTTPHeaderDict({'Date': 'Mon, 02 Jun 2025 17:39:48 GMT', 'Content-Type': 'application/json', 'Content-Length': '103', 'Connection': 'keep-alive', 'x-pinecone-request-latency-ms': '65', 'x-pinecone-request-id': '1104578159837021453', 'x-envoy-upstream-service-time': '66', 'server': 'envoy'})
HTTP response body: {"code":3,"message":"Vector dimension 192 does not match the dimension of the index 1024","details":[]}


In [14]:
!pip install speechbrain torchaudio torch pinecone

import os
import numpy as np
import torch
import torchaudio
from speechbrain.pretrained import EncoderClassifier
import pinecone
import uuid
from typing import List, Union

# ---------------- PINECONE CONFIGURATION ----------------
PINECONE_API_KEY = os.getenv("PINECONE_API_KEY", "pcsk_2yzKnb_DusX4M95CU1KTjQxkZFPdYWbtFghFc7kUD2cHzpUT4hWPLmMbPgEgT5NgoX3Fib")  # Hardcoded for testing; use env vars in production
PINECONE_ENV = os.getenv("PINECONE_ENV", "us-east-1")  # Hardcoded for testing; use env vars in production
INDEX_NAME = "voice-biometrics"
EMBEDDING_DIM = 192  # Matches ECAPA-TDNN output
UPSERT_BATCH_SIZE = 100

# ---------------- AUDIO TO EMBEDDING FUNCTION ----------------

def audio_to_embedding(audio_path):
    if not os.path.exists(audio_path):
        raise FileNotFoundError(f"Audio file not found: {audio_path}")

    try:
        model = EncoderClassifier.from_hparams(
            source="speechbrain/spkrec-ecapa-voxceleb",
            savedir="pretrained_models/spkrec-ecapa-voxceleb"
        )

        waveform, sample_rate = torchaudio.load(audio_path)

        if waveform.shape[0] > 1:
            waveform = torch.mean(waveform, dim=0, keepdim=True)

        if sample_rate != 16000:
            resampler = torchaudio.transforms.Resample(orig_freq=sample_rate, new_freq=16000)
            waveform = resampler(waveform)

        min_length = 16000
        if waveform.shape[1] < min_length:
            padding = min_length - waveform.shape[1]
            waveform = torch.nn.functional.pad(waveform, (0, padding))

        embedding = model.encode_batch(waveform)

        return embedding.squeeze().numpy()

    except Exception as e:
        raise RuntimeError(f"Error processing audio: {str(e)}")

# ---------------- EMBEDDING TEST ----------------

def test_embedding(embedding, file_name=""):
    print(f"\n=== TEST: {file_name} ===")
    print(f"Type: {type(embedding)}")
    print(f"Shape: {embedding.shape}")
    print(f"Min: {embedding.min():.4f}, Max: {embedding.max():.4f}, Mean: {embedding.mean():.4f}, Std: {embedding.std():.4f}")
    print("First 5:", embedding[:5], "... Last 5:", embedding[-5:])

    if isinstance(embedding, np.ndarray) and embedding.ndim == 1 and embedding.shape[0] > 0:
        print("✓ Valid embedding")
    else:
        print("✗ Invalid embedding")

# ---------------- PINECONE HANDLERS ----------------

def init_pinecone():
    from pinecone import PodSpec  # Import required spec

    pc = pinecone.Pinecone(api_key=PINECONE_API_KEY, environment=PINECONE_ENV)

    # Check if index exists and verify its dimension
    existing_indexes = pc.list_indexes().names()
    if INDEX_NAME in existing_indexes:
        index_description = pc.describe_index(INDEX_NAME)
        if index_description.dimension != EMBEDDING_DIM:
            print(f"Index '{INDEX_NAME}' exists with dimension {index_description.dimension}, but {EMBEDDING_DIM} is required. Deleting and recreating...")
            pc.delete_index(INDEX_NAME)
            print(f"Deleted index '{INDEX_NAME}'.")
            create_index(pc)
        else:
            print(f"Index '{INDEX_NAME}' already exists with correct dimension ({EMBEDDING_DIM}).")
    else:
        print(f"Index '{INDEX_NAME}' not found. Creating index...")
        create_index(pc)

    return pc.Index(INDEX_NAME)

def create_index(pc):
    try:
        pc.create_index(
            name=INDEX_NAME,
            dimension=EMBEDDING_DIM,
            metric="cosine",
            spec=pinecone.PodSpec(environment=PINECONE_ENV, pod_type="p1.x1")
        )
        print(f"Index '{INDEX_NAME}' created with dimension {EMBEDDING_DIM}.")
    except Exception as e:
        raise RuntimeError(f"Failed to create index: {str(e)}")

def format_embeddings(vectors: List[np.ndarray], ids: List[str], metadata_list: List[dict] = None):
    formatted = []
    for i, vec in enumerate(vectors):
        vec_id = ids[i]
        vec_data = vec.tolist()
        metadata = metadata_list[i] if metadata_list else None
        formatted.append((vec_id, vec_data, metadata) if metadata else (vec_id, vec_data))
    return formatted

def batch_upsert(index, data: List[Union[tuple, list]], batch_size: int = 100):
    try:
        for i in range(0, len(data), batch_size):
            batch = data[i:i + batch_size]
            index.upsert(vectors=batch)
            print(f"✅ Upserted batch {i // batch_size + 1}: {len(batch)} vectors")
    except Exception as e:
        raise RuntimeError(f"Error upserting vectors: {str(e)}")

# ---------------- AUTHENTICATION FUNCTION ----------------

def authenticate_user(audio_path, index, threshold=0.8):
    try:
        query_embedding = audio_to_embedding(audio_path)
        results = index.query(vector=query_embedding.tolist(), top_k=1, include_metadata=True)
        if results["matches"]:
            match = results["matches"][0]
            if match["score"] >= threshold:
                return True, match["metadata"]["file_name"], match["score"]
            else:
                return False, None, match["score"]
        return False, None, 0.0
    except Exception as e:
        print(f"Authentication failed: {str(e)}")
        return False, None, 0.0

# ---------------- MAIN FUNCTION ----------------

def process_audio_directory(directory_path):
    embeddings = []
    ids = []
    metadata_list = []

    index = init_pinecone()
    print("🔍 Scanning for audio files...")

    for file_name in os.listdir(directory_path):
        if file_name.lower().endswith((".wav", ".mp3")):
            file_path = os.path.join(directory_path, file_name)

            try:
                embedding = audio_to_embedding(file_path)
                test_embedding(embedding, file_name)

                embeddings.append(embedding)
                ids.append(str(uuid.uuid4()))
                metadata_list.append({"file_name": file_name, "source": "audio-directory"})

            except Exception as e:
                print(f"❌ Skipping {file_name}: {e}")

    if embeddings:
        formatted = format_embeddings(embeddings, ids, metadata_list)
        batch_upsert(index, formatted, UPSERT_BATCH_SIZE)
        print(f"\n🎉 Finished processing {len(embeddings)} audio files.")
    else:
        print("⚠️ No valid audio files found.")

# ---------------- EXECUTION ----------------

if __name__ == "__main__":
    folder_path = "/content/sample_data/audio_samples"  # 🔁 Change this to your folder path
    process_audio_directory(folder_path)

    # Example authentication test
    index = init_pinecone()
    test_audio_path = "/content/sample_data/audio_samples/test.wav"  # 🔁 Replace with a test audio file
    authenticated, matched_file, score = authenticate_user(test_audio_path, index, threshold=0.8)
    if authenticated:
        print(f"✅ User authenticated! Matched file: {matched_file}, Score: {score:.4f}")
    else:
        print(f"❌ Authentication failed. Score: {score:.4f}")

Index 'voice-biometrics' exists with dimension 1024, but 192 is required. Deleting and recreating...
Deleted index 'voice-biometrics'.


RuntimeError: Failed to create index: (404)
Reason: Not Found
HTTP response headers: HTTPHeaderDict({'content-type': 'text/plain; charset=utf-8', 'access-control-allow-origin': '*', 'vary': 'origin,access-control-request-method,access-control-request-headers', 'access-control-expose-headers': '*', 'x-pinecone-api-version': '2025-04', 'x-cloud-trace-context': '6c9822feac31e9f890182090a6451cdf', 'date': 'Mon, 02 Jun 2025 17:44:38 GMT', 'server': 'Google Frontend', 'Content-Length': '84', 'Via': '1.1 google', 'Alt-Svc': 'h3=":443"; ma=2592000,h3-29=":443"; ma=2592000'})
HTTP response body: {"error":{"code":"NOT_FOUND","message":"Resource us-east-1 not found"},"status":404}
