In [4]:
from transformers import Wav2Vec2FeatureExtractor, WavLMForXVector
from datasets import load_dataset
import torch
import torchaudio
from scipy.io import wavfile
from scipy import signal
import pandas as pd
import numpy as np
import os
from pathlib import Path
import librosa

feature_extractor = Wav2Vec2FeatureExtractor.from_pretrained('microsoft/wavlm-base-plus-sv')
model = WavLMForXVector.from_pretrained('microsoft/wavlm-base-plus-sv')

def extract_wav(path, f_low, f_high):
    sr, audio_data = wavfile.read(path)
    
    # Create a bandpass filter with a passband between f_low and f_high
    nyquist_rate = 0.5 * sr
    filter_order = 6
    filter_cutoff = [f_low / nyquist_rate, f_high / nyquist_rate]
    b, a = signal.butter(filter_order, filter_cutoff, btype='bandpass')

    # Apply the bandpass filter to the audio data
    filtered_audio = signal.filtfilt(b, a, audio_data)

    # Normalize the audio data to the range [-1, 1]
    #normalized_audio = filtered_audio / max(abs(filtered_audio))

    return filtered_audio

def extract_embedding(audio_data):
    # audio files are decoded on the fly
    audio = [audio_data]
    inputs = feature_extractor(audio, padding=True, return_tensors="pt", sampling_rate = 16000)
    embeddings = model(**inputs).embeddings
    #embeddings = torch.nn.functional.normalize(embeddings, dim=-1).cpu()
    return embeddings[0]

def calculate_score(embeddings1, embeddings2):    
    # the resulting embeddings can be used for cosine similarity-based retrieval
    cosine_sim = torch.nn.CosineSimilarity(dim=-1)
    similarity = cosine_sim(embeddings1, embeddings2)
    threshold = 0.86  # the optimal threshold is dataset-dependent
    # if similarity < threshold:
    #     print("Speakers are not the same!")
    return float(similarity)

In [5]:
y, sr = librosa.load("Example/p233_001.wav", sr=16000)
embedding_data = extract_embedding(y)
torch.save(embedding_data, f"Example/p233_001.pt")



In [7]:
y, sr = librosa.load("Example/p233_006.wav", sr=16000)
embedding_data = extract_embedding(y)
torch.save(embedding_data, f"Example/p233_006.pt")



In [2]:
FLOW = 50
FHIGH = 4000

In [3]:
FLOW = 50
FHIGH = 4000

for index, row in df_sample.iterrows():
    sample_path = row['fullpath']
    save_path = row['fullpath'].replace("datasets_split","embeddings").replace(f"{row['filename']}.wav","")
    Path(save_path).mkdir(parents=True, exist_ok=True)

    try:
        embedding_data = extract_embedding(extract_wav(sample_path, FLOW, FHIGH))
        torch.save(embedding_data, f"{save_path}{row['filename']}.pt")
    except:
        print(save_path)





In [4]:
for index, row in df_unknown.iterrows():
    sample_path = row['fullpath']
    save_path = row['fullpath'].replace("datasets_split","embeddings").replace(f"{row['filename']}.wav","")
    Path(save_path).mkdir(parents=True, exist_ok=True)

    embedding_data = extract_embedding(extract_wav(sample_path, FLOW, FHIGH))
    torch.save(embedding_data, f"{save_path}{row['filename']}.pt")

In [None]:
for index, row in df_unknown.where(df_unknown["sample_compare"] == "Andreas Guntoro").dropna().iterrows():
    sample_path = row['fullpath']
    save_path = row['fullpath'].replace("datasets","embeddings").replace(f"{row['filename']}.wav","")
    Path(save_path).mkdir(parents=True, exist_ok=True)

    embedding_data = extract_embedding(extract_wav(sample_path, 50, 4000))
    torch.save(embedding_data, f"{save_path}{row['filename']}.pt")

for index, row in df_sample.where(df_sample["speaker"] == "Andreas Guntoro").dropna().iterrows():
    sample_path = row['fullpath']
    save_path = row['fullpath'].replace("datasets","embeddings").replace(f"{row['filename']}.wav","")
    Path(save_path).mkdir(parents=True, exist_ok=True)

    embedding_data = extract_embedding(extract_wav(sample_path, 50, 4000))
    torch.save(embedding_data, f"{save_path}{row['filename']}.pt")

In [3]:
for index, row in df_unknown.where(df_unknown["sample_compare"] == "Asep Gunawan").dropna().iterrows():
    sample_path = row['fullpath']
    save_path = row['fullpath'].replace("datasets","embeddings").replace(f"{row['filename']}.wav","")
    Path(save_path).mkdir(parents=True, exist_ok=True)

    embedding_data = extract_embedding(extract_wav(sample_path, 300, 3500))
    torch.save(embedding_data, f"{save_path}{row['filename']}.pt")

for index, row in df_sample.where(df_sample["speaker"] == "Asep Gunawan").dropna().iterrows():
    sample_path = row['fullpath']
    save_path = row['fullpath'].replace("datasets","embeddings").replace(f"{row['filename']}.wav","")
    Path(save_path).mkdir(parents=True, exist_ok=True)

    embedding_data = extract_embedding(extract_wav(sample_path, 300, 3500))
    torch.save(embedding_data, f"{save_path}{row['filename']}.pt")



In [None]:
for index, row in df_unknown.where(df_unknown["sample_compare"] == "Dadang").dropna().iterrows():
    sample_path = row['fullpath']
    save_path = row['fullpath'].replace("datasets","embeddings").replace(f"{row['filename']}.wav","")
    Path(save_path).mkdir(parents=True, exist_ok=True)

    embedding_data = extract_embedding(extract_wav(sample_path, 150, 4000))
    torch.save(embedding_data, f"{save_path}{row['filename']}.pt")

for index, row in df_sample.where(df_sample["speaker"] == "Dadang").dropna().iterrows():
    sample_path = row['fullpath']
    save_path = row['fullpath'].replace("datasets","embeddings").replace(f"{row['filename']}.wav","")
    Path(save_path).mkdir(parents=True, exist_ok=True)

    embedding_data = extract_embedding(extract_wav(sample_path, 50, 4000))
    torch.save(embedding_data, f"{save_path}{row['filename']}.pt")

In [10]:
for index, row in df_unknown.where(df_unknown["sample_compare"] == "Nadya Nurul Anisa").dropna().iterrows():
    sample_path = row['fullpath']
    save_path = row['fullpath'].replace("datasets","embeddings").replace(f"{row['filename']}.wav","")
    Path(save_path).mkdir(parents=True, exist_ok=True)

    embedding_data = extract_embedding(extract_wav(sample_path, 150, 3500))
    torch.save(embedding_data, f"{save_path}{row['filename']}.pt")

for index, row in df_sample.where(df_sample["speaker"] == "Nadya Nurul Anisa").dropna().iterrows():
    sample_path = row['fullpath']
    save_path = row['fullpath'].replace("datasets","embeddings").replace(f"{row['filename']}.wav","")
    Path(save_path).mkdir(parents=True, exist_ok=True)

    embedding_data = extract_embedding(extract_wav(sample_path, 150, 3500))
    torch.save(embedding_data, f"{save_path}{row['filename']}.pt")



In [2]:
for index, row in df_unknown.where(df_unknown["sample_compare"] == "Khairur Rizal").dropna().iterrows():
    sample_path = row['fullpath']
    save_path = row['fullpath'].replace("datasets","embeddings").replace(f"{row['filename']}.wav","")
    Path(save_path).mkdir(parents=True, exist_ok=True)

    embedding_data = extract_embedding(extract_wav(sample_path, 350, 3500))
    torch.save(embedding_data, f"{save_path}{row['filename']}.pt")

for index, row in df_sample.where(df_sample["speaker"] == "Khairur Rizal").dropna().iterrows():
    sample_path = row['fullpath']
    save_path = row['fullpath'].replace("datasets","embeddings").replace(f"{row['filename']}.wav","")
    Path(save_path).mkdir(parents=True, exist_ok=True)

    embedding_data = extract_embedding(extract_wav(sample_path, 350, 3500))
    torch.save(embedding_data, f"{save_path}{row['filename']}.pt")



In [3]:
for index, row in df_unknown.where(df_unknown["sample_compare"] == "Ferlian Hady").dropna().iterrows():
    sample_path = row['fullpath']
    save_path = row['fullpath'].replace("datasets","embeddings").replace(f"{row['filename']}.wav","")
    Path(save_path).mkdir(parents=True, exist_ok=True)

    embedding_data = extract_embedding(extract_wav(sample_path, 150, 3500))
    torch.save(embedding_data, f"{save_path}{row['filename']}.pt")

for index, row in df_sample.where(df_sample["speaker"] == "Ferlian Hady").dropna().iterrows():
    sample_path = row['fullpath']
    save_path = row['fullpath'].replace("datasets","embeddings").replace(f"{row['filename']}.wav","")
    Path(save_path).mkdir(parents=True, exist_ok=True)

    embedding_data = extract_embedding(extract_wav(sample_path, 150, 3500))
    torch.save(embedding_data, f"{save_path}{row['filename']}.pt")



In [3]:
for index, row in df_unknown.where(df_unknown["sample_compare"] == "Sony Setiadi").dropna().iterrows():
    sample_path = row['fullpath']
    save_path = row['fullpath'].replace("datasets","embeddings").replace(f"{row['filename']}.wav","")
    Path(save_path).mkdir(parents=True, exist_ok=True)

    embedding_data = extract_embedding(extract_wav(sample_path, 350, 3500))
    torch.save(embedding_data, f"{save_path}{row['filename']}.pt")

for index, row in df_sample.where(df_sample["speaker"] == "Sony Setiadi").dropna().iterrows():
    sample_path = row['fullpath']
    save_path = row['fullpath'].replace("datasets","embeddings").replace(f"{row['filename']}.wav","")
    Path(save_path).mkdir(parents=True, exist_ok=True)

    embedding_data = extract_embedding(extract_wav(sample_path, 350, 3500))
    torch.save(embedding_data, f"{save_path}{row['filename']}.pt")



In [13]:
for index, row in df_unknown.where(df_unknown["sample_compare"] == "Yana Mulyana").dropna().iterrows():
    sample_path = row['fullpath']
    save_path = row['fullpath'].replace("datasets","embeddings").replace(f"{row['filename']}.wav","")
    Path(save_path).mkdir(parents=True, exist_ok=True)
    print(sample_path)

    embedding_data = extract_embedding(extract_wav(sample_path, 150, 3500))
    torch.save(embedding_data, f"{save_path}{row['filename']}.pt")

for index, row in df_sample.where(df_sample["speaker"] == "Yana Mulyana").dropna().iterrows():
    sample_path = row['fullpath']
    save_path = row['fullpath'].replace("datasets","embeddings").replace(f"{row['filename']}.wav","")
    Path(save_path).mkdir(parents=True, exist_ok=True)

    embedding_data = extract_embedding(extract_wav(sample_path, 150, 3500))
    torch.save(embedding_data, f"{save_path}{row['filename']}.pt")

datasets/unknown/Yana Mulyana/phone1_unknown2_15.wav




datasets/unknown/Yana Mulyana/phone2_unknown1_2.wav
datasets/unknown/Yana Mulyana/phone2_unknown1_3.wav
datasets/unknown/Yana Mulyana/phone2_unknown1_4.wav
datasets/unknown/Yana Mulyana/phone2_unknown1_5.wav
datasets/unknown/Yana Mulyana/phone2_unknown1_6.wav
datasets/unknown/Yana Mulyana/phone2_unknown1_7.wav
datasets/unknown/Yana Mulyana/phone2_unknown1_8.wav
datasets/unknown/Yana Mulyana/phone2_unknown1_9.wav
datasets/unknown/Yana Mulyana/phone2_unknown2_1.wav
datasets/unknown/Yana Mulyana/phone2_unknown2_10.wav
datasets/unknown/Yana Mulyana/phone2_unknown2_2.wav
datasets/unknown/Yana Mulyana/phone2_unknown2_3.wav
datasets/unknown/Yana Mulyana/phone2_unknown2_4.wav
datasets/unknown/Yana Mulyana/phone2_unknown2_5.wav
datasets/unknown/Yana Mulyana/phone2_unknown2_6.wav
datasets/unknown/Yana Mulyana/phone2_unknown2_7.wav
datasets/unknown/Yana Mulyana/phone2_unknown2_8.wav
datasets/unknown/Yana Mulyana/phone2_unknown2_9.wav
datasets/unknown/Yana Mulyana/phone1_unknown1_1.wav
datasets/un



In [None]:
# for index, row in df_unknown.iterrows():
#     sample_path = row['fullpath']
#     save_path = row['fullpath'].replace("datasets","embeddings").replace(f"{row['filename']}.wav","")
#     Path(save_path).mkdir(parents=True, exist_ok=True)
#     print(sample_path)

#     embedding_data = extract_embedding(extract_wav(sample_path))
#     torch.save(embedding_data, f"{save_path}{row['filename']}.pt")

In [None]:
# df_sample = pd.read_csv("data_sample_telephone.csv")
# df_unknown = pd.read_csv("data_uknown_telephone.csv")

# for index, row in df_sample.iterrows():
#     sample_path = row['fullpath']
#     save_path = row['fullpath'].replace("datasets_telephone","embeddings_telephone").replace(f"{row['filename']}.wav","")
#     Path(save_path).mkdir(parents=True, exist_ok=True)
#     print(sample_path)

#     embedding_data = extract_embedding(extract_wav(sample_path))
#     torch.save(embedding_data, f"{save_path}{row['filename']}.pt")

# for index, row in df_unknown.iterrows():
#     sample_path = row['fullpath']
#     save_path = row['fullpath'].replace("datasets_telephone","embeddings_telephone").replace(f"{row['filename']}.wav","")
#     Path(save_path).mkdir(parents=True, exist_ok=True)
#     print(sample_path)

#     embedding_data = extract_embedding(extract_wav(sample_path))
#     torch.save(embedding_data, f"{save_path}{row['filename']}.pt")