In [1]:
import torchaudio
from collections import defaultdict
import torchaudio.transforms as T
import numpy as np
import pandas as pd
from transformers import Wav2Vec2FeatureExtractor, WavLMForXVector
import torch
feature_extractor = Wav2Vec2FeatureExtractor.from_pretrained('microsoft/wavlm-base-plus-sv',sampling_rate=16000)
model = WavLMForXVector.from_pretrained('microsoft/wavlm-base-plus-sv')

In [None]:
from qdrant_client import QdrantClient

qdrant_client = QdrantClient(
    url="",
    api_key="",
)

print(qdrant_client.get_collections())

collections=[CollectionDescription(name='celebrity_voices')]


In [3]:
def preprocess_audio(path):
    signal, sr = torchaudio.load(path)

    if signal.shape[0] > 1:
        signal = signal.mean(dim=0, keepdim=True)

    if sr != 16000:
        resampler = T.Resample(sr, 16000)
        signal = resampler(signal)

    return signal.squeeze().to(torch.float32).tolist()


In [6]:
def search_sim(usr_path):
    audio = preprocess_audio(usr_path)
    inputs = feature_extractor(audio, padding=True, return_tensors="pt", sampling_rate=16000)
    embedding = model(**inputs).embeddings.cpu()
    user_emb = embedding.flatten().detach()

    search_results = qdrant_client.search(
        collection_name="celebrity_voices",
        query_vector=user_emb,
        limit=100
    )

    speaker_best = defaultdict(lambda: {"score": -float("inf"), "payload": None})

    for res in search_results:
        speaker_id = res.payload["id"]
        if res.score > speaker_best[speaker_id]["score"]:
            speaker_best[speaker_id] = {"score": res.score, "payload": res.payload}

    top_speakers = sorted(speaker_best.values(), key=lambda x: x["score"], reverse=True)

    return top_speakers[:5]

In [20]:
top_speakers = search_sim('nikita2.wav')
top_speakers

  search_results = qdrant_client.search(


[{'score': 0.91528195,
  'payload': {'id': 'id10305', 'path': '3QrLepYlH6o', 'num': '00013'}},
 {'score': 0.89936566,
  'payload': {'id': 'id10292', 'path': '6Bh6P9nGsM4', 'num': '00003'}},
 {'score': 0.8867233,
  'payload': {'id': 'id10277', 'path': '0rpfN7wThsg', 'num': '00001'}},
 {'score': 0.8784633,
  'payload': {'id': 'id10276', 'path': '5YncBThNd5E', 'num': '00017'}},
 {'score': 0.8616822,
  'payload': {'id': 'id10279', 'path': 'bmoAV7tZziI', 'num': '00001'}}]

In [None]:
df = pd.read_csv('dataset/vox1_meta.csv',delimiter='\t')
for res in top_speakers:
    name = df[df['VoxCeleb1 ID'] == res['payload']['id']]['VGGFace1 ID'].values[0]
    name = name.replace('_', ' ')
    print('Name:', name, '| Score:', res['score'] )

Name: Eugenio Derbez | Score: 0.91528195
Name: Emraan Hashmi | Score: 0.89936566
Name: Eduardo Noriega | Score: 0.8867233
Name: Edgar Wright | Score: 0.8784633
Name: Efren Ramirez | Score: 0.8616822
