In [1]:
import tensorflow as tf
import librosa
import numpy as np
import pickle
from sklearn.preprocessing import StandardScaler

In [2]:
# model and encoder
loaded_model = tf.keras.models.load_model('model_speaker_recognition_BILSTM.keras')

with open('label_encoder.pkl', 'rb') as encoder_file:
    label_encoder = pickle.load(encoder_file)

In [3]:
# Function to extract features from a new audio file
def extract_features_from_file(file_path, fixed_length=40):
    try:
        audio, sr = librosa.load(file_path, sr=None, duration=1)
        mfccs = librosa.feature.mfcc(y=audio, sr=sr, n_mfcc=13)

        # Normalize MFCC features
        mfccs = StandardScaler().fit_transform(mfccs)

        # Adjust the length of MFCC sequences (padding or truncating)
        if mfccs.shape[1] < fixed_length:
            pad_width = fixed_length - mfccs.shape[1]
            mfccs = np.pad(mfccs, ((0, 0), (0, pad_width)), mode='constant')
        else:
            mfccs = mfccs[:, :fixed_length]  # Truncate if too long

        return mfccs.T  # Transpose to match the input shape expected by the model
    except Exception as e:
        print(f"Error processing {file_path}: {str(e)}")
        return None

# Predict the speaker for a new audio file
def predict_speaker(file_path):
    features = extract_features_from_file(file_path)
    
    if features is not None:
        features = np.expand_dims(features, axis=0)  # Add batch dimension
        
        # Make prediction
        prediction_prob = loaded_model.predict(features)
        predicted_class = np.argmax(prediction_prob, axis=1)
        
        # Decode the label
        predicted_label = label_encoder.inverse_transform(predicted_class)
        
        print(f"Predicted speaker for {file_path}: {predicted_label[0]}")
    else:
        print("Could not extract features from the file.")

In [4]:
# Test with a new audio file
test_audio_path = "Speaker9_test.wav"
predict_speaker(test_audio_path)

Predicted speaker for Speaker9_test.wav: S9
