In [48]:
import os

import tensorflow as tf
import librosa
import numpy as np
import math

In [49]:
model_path = 'wake_word.keras'
audio_file_path = os.path.join('..', 'samples', 'positive', '0_sample.wav')

In [50]:
=SAMPLE_RATE = 16000
DURATION = 1.5
SAMPLES_PER_TRACK = int(SAMPLE_RATE * DURATION)
N_MFCC = 13
N_FFT = 2048
HOP_LENGTH = 512


PREDICTION_THRESHOLD = 0.95

In [51]:
def predict_wake_word(model, audio_file_path):
    print(f"Scanning audio file: {audio_file_path}...")

    try:
        signal, sr = librosa.load(audio_file_path, sr=SAMPLE_RATE)
    except Exception as e:
        print(f"Error loading audio file: {e}")
        return

    num_samples_in_window = int(DURATION * sr)
    step_size = int(0.25 * sr)
    detections = 0

    for i in range(0, len(signal) - num_samples_in_window, step_size):
        window = signal[i : i + num_samples_in_window]
        mfcc = librosa.feature.mfcc(y=window, sr=sr, n_mfcc=N_MFCC, n_fft=N_FFT, hop_length=HOP_LENGTH)
        mfcc = mfcc.T
        mfcc_reshaped = mfcc[np.newaxis, ..., np.newaxis]
        prediction = model.predict(mfcc_reshaped, verbose=0)
        predicted_probability = prediction[0][0]

        if predicted_probability > PREDICTION_THRESHOLD:
            detections += 1
            timestamp = i / sr
            print(f"Wake word DETECTED at: {timestamp:.2f} seconds with {predicted_probability:.2f} confidence.")

    if detections == 0:
        print("No wake word was detected in the file.")
    else:
        print(f"\nScan complete. Found {detections} potential detections.")


In [52]:
model = tf.keras.models.load_model(model_path)
predict_wake_word(model, audio_file_path)

Scanning audio file: ../samples/positive/0_sample.wav...
No wake word was detected in the file.
