<a href="https://colab.research.google.com/github/YashNigam65/gitfolder/blob/master/similar_audio_search_with_tourch_audio.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

Write a python program to find similar audio files to the one you upload. Store many audio files and compare the one you upload with the stored ones. Using torchaudio framework or any other appropriate framework.

In [26]:
!pip install chromadb
from chromadb.config import Settings
from chromadb import Client



In [27]:
!pip install torch torchaudio
!pip install torchcodec



In [28]:
import torch
import torchaudio
import torchaudio.transforms as T
import numpy as np

print(f"PyTorch version: {torch.__version__}")
print(f"Torchaudio version: {torchaudio.__version__}")

PyTorch version: 2.9.0+cpu
Torchaudio version: 2.9.0+cpu


In [29]:
# Step 1: Function to extract audio embeddings using Mel-frequency cepstral coefficients (MFCC) with torchaudio
def extract_audio_embedding_torchaudio(audio_path):
    # Load audio file using torchaudio
    waveform, sample_rate = torchaudio.load(audio_path)

    # Ensure waveform is mono (if stereo, average the channels)
    if waveform.shape[0] > 1:
        waveform = torch.mean(waveform, dim=0, keepdim=True)

    # Create MFCC transform. n_mfcc should be 13 to match librosa's default.
    # n_fft and hop_length are common parameters, adjust if needed.
    mfcc_transform = T.MFCC(sample_rate=sample_rate, n_mfcc=13, melkwargs={"n_fft": 2048, "hop_length": 512})
    mfcc = mfcc_transform(waveform)

    # Take the mean across time frames (the last dimension of MFCC output)
    # mfcc output shape: (num_channels, n_mfcc, num_frames)
    embedding = torch.mean(mfcc, dim=-1).squeeze().numpy() # Squeeze to remove channel dim if mono, then convert to numpy

    return embedding

In [30]:
# Step 2: Initialize a new ChromaDB client and collection for torchaudio embeddings
from chromadb.config import Settings
from chromadb import Client

client_torchaudio = Client(Settings())
collection_torchaudio = client_torchaudio.get_or_create_collection(name="audio_collection_torchaudio")

print(f"ChromaDB collection 'audio_collection_torchaudio' created/retrieved.")

ChromaDB collection 'audio_collection_torchaudio' created/retrieved.


In [31]:
# Step 3: Add audio files to the new vector database using torchaudio embeddings
def add_audio_to_database_torchaudio(audio_paths, collection_obj):
    for idx, audio_path in enumerate(audio_paths):
        try:
            embedding = extract_audio_embedding_torchaudio(audio_path)
            metadata = {"path": audio_path}
            collection_obj.add(
                ids=[f"audio_torchaudio_{idx}"],
                embeddings=[embedding.tolist()], # Convert numpy array to list for ChromaDB
                metadatas=[metadata]
            )
            print(f"Added {audio_path} to collection_torchaudio")
        except Exception as e:
            print(f"Error processing {audio_path}: {e}")

In [32]:
# Step 4: Perform similarity search for an audio query using torchaudio embeddings
def search_similar_audio_torchaudio(query_audio_path, collection_obj, top_k=3):
    query_embedding = extract_audio_embedding_torchaudio(query_audio_path)
    results = collection_obj.query(query_embeddings=[query_embedding.tolist()], n_results=top_k)
    print("Search Results (torchaudio):")
    for result_metadata in results["metadatas"]:
        print("Similar Audio Found:", result_metadata[0]["path"])

In [34]:
# Step 5: Example usage with torchaudio

# Using the dummy audio files generated previously (or your own files)
# audio_files should be a list of paths to your audio files
audio_files_to_add = [
    "/content/sample_data/boly_song.m4a",
    "/content/sample_data/dhun_yt.m4a",
    "/content/sample_data/youTube.m4a",
    "/content/sample_data/yt.m4a"
    # Add more paths if you have other dummy files or real files
]

# Add audio files to the new torchaudio database
add_audio_to_database_torchaudio(audio_files_to_add, collection_torchaudio)

# Query with one of the added audio files (or a new one)
query_audio_torchaudio = "/content/sample_data/boly_song.m4a"
search_similar_audio_torchaudio(query_audio_torchaudio, collection_torchaudio, top_k=3)

print(f"Total items in torchaudio collection: {collection_torchaudio.count()}")

Added /content/sample_data/boly_song.m4a to collection_torchaudio
Added /content/sample_data/dhun_yt.m4a to collection_torchaudio
Added /content/sample_data/youTube.m4a to collection_torchaudio
Added /content/sample_data/yt.m4a to collection_torchaudio
Search Results (torchaudio):
Similar Audio Found: /content/sample_data/boly_song.m4a
Total items in torchaudio collection: 4
