In [1]:
import numpy as np
import librosa
import tensorflow as tf
from itertools import combinations
from tensorflow.keras import layers
import os
import random

In [2]:
def build_pairs(num_pairs=5, num_speakers=60, data_path='AudioMNIST/data'):
    speaker_folders = [os.path.join(data_path, folder) for folder in os.listdir(data_path) if os.path.isdir(os.path.join(data_path, folder))][:num_speakers]

    positive_pairs = []
    negative_pairs = []

    for this_speaker_folder in speaker_folders:
        files1 = [os.path.join(this_speaker_folder, f) for f in os.listdir(this_speaker_folder)]
        
        positive_pairs += random.sample(list(combinations(files1, 2)), num_pairs * (num_speakers - 1))
        
        for other_speaker_folder in set(speaker_folders) - {this_speaker_folder}:
            files2 = [os.path.join(other_speaker_folder, f) for f in os.listdir(other_speaker_folder)]
            negative_pairs += random.sample([(f1, f2) for f1 in files1 for f2 in files2], num_pairs)

    return [(pair, 1) for pair in positive_pairs] + [(pair, 0) for pair in negative_pairs]


pairs = build_pairs()

In [3]:
def extract_mfcc(file_path, n_mfcc=39):
    y, sr = librosa.load(file_path, sr=None)
    mfccs = librosa.feature.mfcc(y=y, sr=sr, n_mfcc=n_mfcc)
    mfccs = np.mean(mfccs.T, axis=0)  # Taking mean across time
    return mfccs

In [None]:
x1 = []
x2 = []
y = []

for (file1, file2), label in pairs:
    x1.append(extract_mfcc(file1))
    x2.append(extract_mfcc(file2))
    y.append(label)

x1 = np.array(x1)
x2 = np.array(x2)
y = np.array(y)

data = list(zip(x1, x2, y))
random.shuffle(data)
split_idx = int(0.8 * len(data))
train, test = data[:split_idx], data[split_idx:]

x1_train, x2_train, y_train = map(np.array, zip(*train))
x1_test, x2_test, y_test = map(np.array, zip(*test))


In [None]:
nn = tf.keras.Sequential([
    layers.Dense(64, activation="relu", input_shape=(39,)),
    layers.Dense(32, activation="relu"),
    layers.Dense(1, activation="sigmoid")
])
nn.compile(optimizer='adam', loss="binary_crossentropy", metrics=["accuracy"])

In [None]:
nn.fit(tf.abs(x1_train - x2_train), y_train, epochs=100)
nn.save('model.keras')

In [None]:
results = nn.evaluate(tf.abs(x1_test - x2_test), y_test)
print("test loss, test acc:", results)