#Intro
This project demonstrates keyword spotting for 'yes'/'no' using a pre-trained TFLite model. It accepts user-uploaded WAV files and classifies spoken commands using efficient on-device inference.

# Downloading Packages

In [None]:
!pip install -q tensorflow scipy numpy


In [None]:

import tensorflow as tf
import numpy as np
from scipy.io import wavfile
from google.colab import files



# Upload your .tflite model and a .wav file

In [None]:

uploaded = files.upload()


KeyboardInterrupt: 


# Load TFLite model

In [None]:

interpreter = tf.lite.Interpreter(model_path="yes_no.tflite")
interpreter.allocate_tensors()

input_details = interpreter.get_input_details()
output_details = interpreter.get_output_details()



# Load and preprocess .wav file

In [None]:
filename = '/content/no-252379.wav'
sr, audio = wavfile.read(filename)
# If the audio is stereo, select one channel
if len(audio.shape) > 1:
    audio = audio[:, 0]
audio = audio.astype(np.float32) / 32768.0  # normalize
audio = np.pad(audio, (0, max(0, 16000 - len(audio))))[:16000]  # pad/trim to 1s
# Reshape the audio data to match the expected input shape of the model [1, 16000]
audio = np.expand_dims(audio, axis=0) # Add batch dimension, shape becomes [1, 16000]

# Audio

In [None]:
from IPython.display import Audio
Audio(filename)


# Run inference

In [None]:
# Get the expected input details
input_details = interpreter.get_input_details()
input_shape = input_details[0]['shape']
input_dtype = input_details[0]['dtype']

# Create a NumPy array with the expected shape and dtype
input_tensor = np.zeros(input_shape, dtype=input_dtype)

# Copy the audio data into the input tensor
# Ensure the audio data is also of the correct dtype
audio = audio.astype(input_dtype)

# The current audio shape is (1, 16000). The expected shape is also (1, 16000).
# Directly assign the reshaped audio to the input_tensor
input_tensor[:] = audio


interpreter.set_tensor(input_details[0]['index'], input_tensor)
interpreter.invoke()
output = interpreter.get_tensor(output_details[0]['index'])

prob_no = output[0][0]
prob_yes = output[0][1]

print(f"Probabilities: \n Probability of 'no' is {prob_no * 100:.2f}% ", f"\n probability of 'yes' is {prob_yes * 100:.2f}%")
print("Predicted word: ", "yes" if np.argmax(output) == 1 else "no")

Probabilities: 
 Probability of 'no' is 72.99%  
 probability of 'yes' is 27.01%
Predicted word:  no


In [None]:
print(input_details)

[{'name': 'serving_default_keras_tensor_7:0', 'index': 0, 'shape': array([    1, 16000], dtype=int32), 'shape_signature': array([   -1, 16000], dtype=int32), 'dtype': <class 'numpy.float32'>, 'quantization': (0.0, 0), 'quantization_parameters': {'scales': array([], dtype=float32), 'zero_points': array([], dtype=int32), 'quantized_dimension': 0}, 'sparsity_parameters': {}}]
