<a href="https://colab.research.google.com/github/YashNigam65/gitfolder/blob/master/similar_audio_search_with_librosa.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

Write a python program to find similar audio files to the one you upload. Store many audio files and compare the one you upload with the stored ones. Using torchaudio framework or any other appropriate framework.

In [1]:
!pip install chromadb

Collecting chromadb
  Downloading chromadb-1.3.7-cp39-abi3-manylinux_2_17_x86_64.manylinux2014_x86_64.whl.metadata (7.2 kB)
Collecting build>=1.0.3 (from chromadb)
  Downloading build-1.3.0-py3-none-any.whl.metadata (5.6 kB)
Collecting pybase64>=1.4.1 (from chromadb)
  Downloading pybase64-1.4.3-cp312-cp312-manylinux1_x86_64.manylinux2014_x86_64.manylinux_2_17_x86_64.manylinux_2_5_x86_64.whl.metadata (8.7 kB)
Collecting posthog<6.0.0,>=2.4.0 (from chromadb)
  Downloading posthog-5.4.0-py3-none-any.whl.metadata (5.7 kB)
Collecting onnxruntime>=1.14.1 (from chromadb)
  Downloading onnxruntime-1.23.2-cp312-cp312-manylinux_2_27_x86_64.manylinux_2_28_x86_64.whl.metadata (5.1 kB)
Collecting opentelemetry-exporter-otlp-proto-grpc>=1.2.0 (from chromadb)
  Downloading opentelemetry_exporter_otlp_proto_grpc-1.39.1-py3-none-any.whl.metadata (2.5 kB)
Collecting pypika>=0.48.9 (from chromadb)
  Downloading PyPika-0.48.9.tar.gz (67 kB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m

In [2]:
import librosa
import numpy as np
from chromadb.config import Settings
from chromadb import Client

In [5]:
# Step 1: Function to extract audio embeddings using Mel-frequency cepstral coefficients (MFCC)
def extract_audio_embedding(audio_path):
    y, sr = librosa.load(audio_path, sr=None)  # Load audio file #
    mfcc = librosa.feature.mfcc(y=y, sr=sr, n_mfcc=13)  # Extract MFCC features
    embedding = np.mean(mfcc, axis=1)  # Take the mean across time frames
    return embedding



```
# y, sr = librosa.load(audio_path, sr=None)
```



y: A NumPy array representing the audio time series (the audio waveform).

sr: The sampling rate of the audio. By setting sr=None, librosa will use the native sampling rate of the audio file.


In [6]:
# Step 2: Initialize ChromaDB client
client = Client(Settings())
collection = client.get_or_create_collection(name="audio_collection")

In [7]:
# Step 3: Add audio files to the vector database
def add_audio_to_database(audio_paths):
    for idx, audio_path in enumerate(audio_paths):
        embedding = extract_audio_embedding(audio_path)
        metadata = {"path": audio_path}
        collection.add(
            ids=[f"audio_{idx}"],  # Changed 'embedding_ids' to 'ids'
            embeddings=[embedding],
            metadatas=[metadata]
        )


In [13]:
# Step 4: Perform similarity search for an audio query
def search_similar_audio(query_audio_path, top_k=3):
    query_embedding = extract_audio_embedding(query_audio_path)
    results = collection.query(query_embeddings=[query_embedding], n_results=top_k)
    print("results", results)
    # Accessing the list of metadatas correctly:
    for result_metadata in results["metadatas"]:  # Iterate through the list of metadatas
        print("Similar Audio Found:", result_metadata[0]["path"])  # Access the path from the first element

In [15]:
# Step 5: Example usage
if __name__ == "__main__":
    # Example audio files (replace with your actual audio file paths)
    audio_files = [
        "/content/sample_data/yash_audio.m4a",  # Replace with real audio file paths
        "/content/sample_data/youTube.m4a",
        "/content/sample_data/boly_song.m4a",
        "/content/sample_data/dhun_yt.m4a"
    ]

    # Add audio files to the database
    add_audio_to_database(audio_files)

    # Query with a new audio file
    # query_audio = "/content/sample_data/yt.m4a"  # Replace with the query audio file path
    # search_similar_audio(query_audio, top_k=3)

    query_audio = "/content/sample_data/dhun_yt.m4a"  # Replace with the query audio file path
    search_similar_audio(query_audio, top_k=3)


  y, sr = librosa.load(audio_path, sr=None)  # Load audio file #
	Deprecated as of librosa version 0.10.0.
	It will be removed in librosa version 1.0.
  y, sr_native = __audioread_load(path, offset, duration, dtype)
  y, sr = librosa.load(audio_path, sr=None)  # Load audio file #
	Deprecated as of librosa version 0.10.0.
	It will be removed in librosa version 1.0.
  y, sr_native = __audioread_load(path, offset, duration, dtype)
  y, sr = librosa.load(audio_path, sr=None)  # Load audio file #
	Deprecated as of librosa version 0.10.0.
	It will be removed in librosa version 1.0.
  y, sr_native = __audioread_load(path, offset, duration, dtype)
  y, sr = librosa.load(audio_path, sr=None)  # Load audio file #
	Deprecated as of librosa version 0.10.0.
	It will be removed in librosa version 1.0.
  y, sr_native = __audioread_load(path, offset, duration, dtype)
  y, sr = librosa.load(audio_path, sr=None)  # Load audio file #
	Deprecated as of librosa version 0.10.0.
	It will be removed in libros

results {'ids': [['audio_3', 'audio_2', 'audio_1']], 'embeddings': None, 'documents': [[None, None, None]], 'uris': None, 'included': ['metadatas', 'documents', 'distances'], 'data': None, 'metadatas': [[{'path': '/content/sample_data/dhun_yt.m4a'}, {'path': '/content/sample_data/boly_song.m4a'}, {'path': '/content/sample_data/youTube.m4a'}]], 'distances': [[0.0, 2764.05322265625, 7687.98291015625]]}
Similar Audio Found: /content/sample_data/dhun_yt.m4a
