In [1]:
!pip install tflite-support



In [2]:
# Imports
from tflite_support.task import audio
from tflite_support.task import core
from tflite_support.task import processor
import pyaudio
import numpy as np
import soundfile as sf
import matplotlib.pyplot as plt
import librosa
import librosa.display


# Initialization
base_options = core.BaseOptions(file_name="soundclassifier.tflite")
classification_options = processor.ClassificationOptions(max_results=2)
options = audio.AudioClassifierOptions(base_options=base_options, classification_options=classification_options)
classifier = audio.AudioClassifier.create_from_options(options)

def record_audio(seconds=2, fs=44100):
    p = pyaudio.PyAudio()
    stream = p.open(format=pyaudio.paInt16, channels=1, rate=fs, input=True, frames_per_buffer=fs)
    print("Recording...")
    frames = []
    for _ in range(int(fs * seconds / fs)):
        data = stream.read(fs)
        frames.append(np.frombuffer(data, dtype=np.int16))
    print("Finished recording.")
    stream.stop_stream()
    stream.close()
    p.terminate()
    audio_data = np.concatenate(frames)
    return audio_data

def save_audio_data(audio_data, file_path):
    sf.write(file_path, audio_data, samplerate=44100, format='wav')

def create_spectrogram(audio_data, image_file):
    fig = plt.figure()
    ax = fig.add_subplot(1, 1, 1)
    fig.subplots_adjust(left=0, right=1, bottom=0, top=1)

    # Convert audio data to floating-point format
    audio_data_float = audio_data.astype(np.float32)
    
    # Compute mel spectrogram
    ms = librosa.feature.melspectrogram(y=audio_data_float, sr=44100)
    log_ms = librosa.power_to_db(ms, ref=np.max)
    
    # Display mel spectrogram
    librosa.display.specshow(log_ms, sr=44100)

    # Save figure as image
    fig.savefig(image_file)
    plt.close(fig)


2024-02-28 16:19:09.013021: I tensorflow/tsl/cuda/cudart_stub.cc:28] Could not find cuda drivers on your machine, GPU will not be used.
2024-02-28 16:19:09.058421: I tensorflow/tsl/cuda/cudart_stub.cc:28] Could not find cuda drivers on your machine, GPU will not be used.
2024-02-28 16:19:09.059258: I tensorflow/core/platform/cpu_feature_guard.cc:182] This TensorFlow binary is optimized to use available CPU instructions in performance-critical operations.
To enable the following instructions: AVX2 FMA, in other operations, rebuild TensorFlow with the appropriate compiler flags.
INFO: Created TensorFlow Lite XNNPACK delegate for CPU.


In [3]:
while True:
    # Record audio for two seconds
    recorded_audio = record_audio()

    # Save the recorded audio data
    save_audio_data(recorded_audio, 'recorded_audio.wav')

    # Run inference
    audio_file = audio.TensorAudio.create_from_wav_file("recorded_audio.wav", classifier.required_input_buffer_size)
    audio_result = classifier.classify(audio_file)

    # Print the classification result
    print("Classification Results:")
    for classification in audio_result.classifications:
        for category in classification.categories:
            print(f"Category: {category.category_name}, Score: {category.score}")

    # Uncomment the below line if you want to create and save the spectrogram
    # create_spectrogram(recorded_audio, 'recorded_spectrogram.png')


Recording...
Finished recording.
Classification Results:
Category: 0 Background Noise, Score: 0.9940126538276672
Category: 3 Gunshots, Score: 0.003293450688943267
Recording...
Finished recording.
Classification Results:
Category: 0 Background Noise, Score: 0.8954781293869019
Category: 3 Gunshots, Score: 0.07009774446487427
Recording...
Finished recording.
Classification Results:
Category: 3 Gunshots, Score: 0.3668978810310364
Category: 0 Background Noise, Score: 0.3142758309841156
Recording...
Finished recording.
Classification Results:
Category: 0 Background Noise, Score: 0.9646139740943909
Category: 1 Clapping, Score: 0.025540195405483246
Recording...
Finished recording.
Classification Results:
Category: 0 Background Noise, Score: 0.9896882176399231
Category: 2 Glass Breaking, Score: 0.00433580856770277
Recording...
Finished recording.
Classification Results:
Category: 0 Background Noise, Score: 0.9975818395614624
Category: 3 Gunshots, Score: 0.0014339103363454342
Recording...
Finish

KeyboardInterrupt: 