# new release of tensorflow, version 2.10 cannot be retrieved
!pip install tensorflow
!pip install tensorflow-io
!pip install cherrypy==18.8.0
!pip install paho-mqtt==1.6.1
!pip install redis==4.3.4
!pip install librosa
!pip install tensorflow_model_optimization
!pip install pandas
!pip install keras
!pip install pydub
!pip install noisereduce

In [35]:
import os
import sounddevice as sd
import numpy as np
import time
from time import time
from time import sleep
from scipy.io.wavfile import write
import argparse as ap
import tensorflow as tf
import tensorflow_io as tfio
import uuid
import redis
import psutil
# import myConnection as mc
from datetime import datetime
import argparse as ap
import pandas as pd
import random
import paho.mqtt.client as mqtt

In [36]:
import noisereduce as nr
from scipy.io.wavfile import write
from scipy.io import wavfile

In [37]:
try:
    os.chdir('./datasets/dsl_data/')
except:
    print("")

seed = 42
os.environ['PYTHONHASHSEED'] = str(seed)
os.environ['TF_DETERMINISTIC_OPS'] = '1'
random.seed(seed)
tf.random.set_seed(seed)
np.random.seed(seed)




In [38]:
parser = ap.ArgumentParser()

parser.add_argument('--resolution', default=8000, type=int, help="Resolution for capturing audio")
# blocksize
#parser.add_argument('--blocksize', default=32000, type=int, help="Blocksize for captured audio, change only if you previously changed")
parser.add_argument('--downsampling_rate', default=8000, type=int, help="Resolution for capturing audio")
parser.add_argument('--device', default=0, type=int, help="Default device is 0, change for others")


parser.add_argument('--output_directory', default='.',type=str, help='Used to specify output folder')


args = parser.parse_args(['--device','31','--resolution','8000' ])
#args = parser.parse_args()

In [39]:
blocksize = 4 * args.resolution
LABELS = ['change languagenone', 'activatemusic', 'deactivatelights', 'increasevolume', 'decreasevolume', 'increaseheat', 'decreaseheat', 'nannan']

In [40]:
print(LABELS)

['change languagenone', 'activatemusic', 'deactivatelights', 'increasevolume', 'decreasevolume', 'increaseheat', 'decreaseheat', 'nannan']


# Necessary preprocessing args

In [41]:
frame_length_in_s = 0.04#0.032*2 # /2 for resnet18
frame_step_in_s  = frame_length_in_s#frame_length_in_s

PREPROCESSING_ARGS = {
    'downsampling_rate': args.resolution,
    'frame_length_in_s': frame_length_in_s,
    'frame_step_in_s': frame_step_in_s,
}

num_mel_bins = (int) ((args.resolution - args.resolution * PREPROCESSING_ARGS['frame_length_in_s'])/(args.resolution*PREPROCESSING_ARGS['frame_step_in_s']))+1
# print(num_mel_bins)

PREPROCESSING_ARGS = {
    **PREPROCESSING_ARGS,
    'num_mel_bins': num_mel_bins,
    'lower_frequency': 20,   #40
    'upper_frequency': args.resolution/2, #4000
}

downsampling_rate = PREPROCESSING_ARGS['downsampling_rate']
sampling_rate_int64 = tf.cast(downsampling_rate, tf.int64)
frame_length = int(downsampling_rate * PREPROCESSING_ARGS['frame_length_in_s'])
#print("Frame_length: {}".format(frame_length))
frame_step = int(downsampling_rate * PREPROCESSING_ARGS['frame_step_in_s'])
#print("Frame_length: {}".format(frame_step))
num_spectrogram_bins = frame_length // 2 + 1
num_mel_bins = PREPROCESSING_ARGS['num_mel_bins']
lower_frequency = PREPROCESSING_ARGS['lower_frequency']
upper_frequency = PREPROCESSING_ARGS['upper_frequency']

linear_to_mel_weight_matrix = tf.signal.linear_to_mel_weight_matrix(
    num_mel_bins=num_mel_bins,
    num_spectrogram_bins=num_spectrogram_bins,
    sample_rate=downsampling_rate,
    lower_edge_hertz=lower_frequency,
    upper_edge_hertz=upper_frequency
)

In [42]:
modelName = "model_24"

interpreter = tf.lite.Interpreter(model_path=f'./tflite_models/{modelName}.tflite')
interpreter.allocate_tensors()
input_details = interpreter.get_input_details()
output_details = interpreter.get_output_details()

In [43]:
mqtt_topic = "topic/ML4IOT_Project_Polito"

In [44]:
def get_audio_from_numpy(indata):
    indata = tf.convert_to_tensor(indata, dtype=tf.float32)
    #print("Shape of indata: ",tf.reduce_max(indata))
    indata = 2 * ((indata + 32768) / (32767 + 32768)) -1
    indata = tf.squeeze(indata)
    #print("After of indata: ",tf.reduce_max(indata))
    return indata

def get_spectrogram(indata, frame_length_in_s, frame_step_in_s):
    data = get_audio_from_numpy(indata)
    
    sampling_rate_float32 = tf.cast(args.downsampling_rate, tf.float32)
    frame_length = int(frame_length_in_s * sampling_rate_float32)
    frame_step = int(frame_step_in_s * sampling_rate_float32)

    stft = tf.signal.stft(
        data,
        frame_length=frame_length,
        frame_step=frame_step,
        fft_length=frame_length
    )
    spectrogram = tf.abs(stft)

    return spectrogram

In [45]:
def send_prediction_as_mqtt(file):
    # f'./{args.output_directory}/{timestamp}.wav'
    
    #print(type(predicted_label))
    #print(predicted_label.shape)
    #print(predicted_label)
    print("predicted label:",predicted_label)
    print("max:",predicted_label.max())
    index = ( np.where(predicted_label == predicted_label.max() )  )
    index = index[0][0]
    print("index",index)
    print("label",LABELS[index])
    print()
    

print(LABELS[index])

In [46]:
def prediction_on_indata(indata):
    frame_length_in_s = 0.04
    frame_step_in_s   = frame_length_in_s
    #audio = get_audio_from_numpy(indata)
    audio_binary = tf.io.read_file(indata)
    audio, sampling_rate = tf.audio.decode_wav(audio_binary)
    audio = tf.squeeze(audio, axis=-1) #all our audio are mono, drop extra axis
    
    
    frame_length = int(frame_length_in_s * args.resolution)
    frame_step = int(frame_step_in_s * args.resolution)
    stft = tf.signal.stft(
        audio,
        frame_length=frame_length,
        frame_step=frame_step,
        fft_length=frame_length
    )
    
    spectrogram = tf.abs(stft)
    
    mel_spectrogram = tf.matmul(spectrogram, linear_to_mel_weight_matrix)
    log_mel_spectrogram = tf.math.log(mel_spectrogram + 1.e-6)
    log_mel_spectrogram = tf.expand_dims(log_mel_spectrogram, 0)  # batch axis
    log_mel_spectrogram = tf.expand_dims(log_mel_spectrogram, -1)  # channel axis
    mfcss = tf.signal.mfccs_from_log_mel_spectrograms(log_mel_spectrogram)
    #print("Shape ",input_details[0])
    interpreter.set_tensor(input_details[0]['index'], mfcss)
    interpreter.invoke()
    output = interpreter.get_tensor(output_details[0]['index'])

    #print("change languagenone",output[0][0]*100,"%")
    #print("activatemusic",output[0][1]*100,"%")
    #print("deactivatelights",output[0][2]*100,"%")
    #print("increasevolume",output[0][3]*100,"%")
    #print("decreasevolume",output[0][4]*100,"%")
    #print("increaseheat",output[0][5]*100,"%")
    #print("decreaseheat",output[0][6]*100,"%")
    #print("nannan",output[0][7]*100,"%")
    
    send_prediction_as_mqtt(output[0])
    return

In [47]:
values = sd.query_devices()
device = 0

for value in values:
    if value['name'] == 'default':
        device = value['index']

In [48]:
def callback(indata, frames, callback_time, status):
    timestamp = time()

    write(f'./{args.output_directory}/{timestamp}.wav', args.resolution, indata)
    
    rate,data = wavfile.read(f'./{args.output_directory}/{timestamp}.wav')
    indata = nr.reduce_noise(y=data, sr=rate) #sr=indata.shape[0])
    #print(indata.shape[0])

    os.remove(f'./{args.output_directory}/{timestamp}.wav')
    wavfile.write(f'./{args.output_directory}/{timestamp}.wav',args.downsampling_rate,indata)
   
    
    prediction_on_indata(f'./{args.output_directory}/{timestamp}.wav')
    
    print("Elapsed time: ",time()-timestamp)

In [49]:
def main():

    #print(LABELS)
    while True:
        with sd.InputStream(device=device, channels=1, dtype='int16', samplerate=args.resolution, blocksize=blocksize, callback=callback):
            print("") # to print a new line, improving readability in the terminal

if __name__ == '__main__':
    main()


before


Exception ignored from cffi callback <function _StreamBase.__init__.<locals>.callback_ptr at 0x7f651843f9c0>:
Traceback (most recent call last):
  File "/home/gaetano/.local/lib/python3.11/site-packages/sounddevice.py", line 846, in callback_ptr
    return _wrap_callback(callback, data, frames, time, status)
           ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
  File "/home/gaetano/.local/lib/python3.11/site-packages/sounddevice.py", line 2687, in _wrap_callback
    callback(*args)
  File "/tmp/ipykernel_38858/2402715777.py", line 16, in callback
  File "/tmp/ipykernel_38858/3767703333.py", line 40, in prediction_on_indata
  File "/tmp/ipykernel_38858/3306615471.py", line 7, in send_prediction_as_mqtt
NameError: name 'predicted_label' is not defined


after




Exception ignored from cffi callback <function _StreamBase.__init__.<locals>.callback_ptr at 0x7f64f37e7ec0>:
Traceback (most recent call last):
  File "/home/gaetano/.local/lib/python3.11/site-packages/sounddevice.py", line 846, in callback_ptr
    return _wrap_callback(callback, data, frames, time, status)
           ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
  File "/home/gaetano/.local/lib/python3.11/site-packages/sounddevice.py", line 2687, in _wrap_callback
    callback(*args)
  File "/tmp/ipykernel_38858/2402715777.py", line 16, in callback
  File "/tmp/ipykernel_38858/3767703333.py", line 40, in prediction_on_indata
  File "/tmp/ipykernel_38858/3306615471.py", line 7, in send_prediction_as_mqtt
NameError: name 'predicted_label' is not defined


before
after



Exception ignored from cffi callback <function _StreamBase.__init__.<locals>.callback_ptr at 0x7f64f37e74c0>:
Traceback (most recent call last):
  File "/home/gaetano/.local/lib/python3.11/site-packages/sounddevice.py", line 846, in callback_ptr
    return _wrap_callback(callback, data, frames, time, status)
           ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
  File "/home/gaetano/.local/lib/python3.11/site-packages/sounddevice.py", line 2687, in _wrap_callback
    callback(*args)
  File "/tmp/ipykernel_38858/2402715777.py", line 16, in callback
  File "/tmp/ipykernel_38858/3767703333.py", line 40, in prediction_on_indata
  File "/tmp/ipykernel_38858/3306615471.py", line 7, in send_prediction_as_mqtt
NameError: name 'predicted_label' is not defined


before
after


KeyboardInterrupt: 

# Test

In [None]:
import functools

print("Test area")

blocksize = 4 * args.resolution

identifier = 0
label = ""

def callback2(indata, frames, callback_time, status):
    """This is called (from a separate thread) for each audio block."""
    # print(is_silence(indata))
    # print(type(indata))  # Type is numpy.ndarray 
    global identifier
    global label
    
    write(f'./{args.output_directory}/{identifier}_{label}.wav', args.resolution, indata)
    filesize_in_bytes = os.path.getsize(f'./{args.output_directory}/{identifier}_{label}.wav')
    filesize_in_kb = filesize_in_bytes / 1024
    print(f'Size: {filesize_in_kb:.2f}KB')
    


# set up your global variables as before

def callback(indata, frames, callback_time, status):
    global identifier
    global label
    
    # reduce noise in the audio data
    
    # write the audio data to file
    write(f'./{args.output_directory}/{identifier}_{label}.wav', args.resolution, indata)
    filesize_in_bytes = os.path.getsize(f'./{args.output_directory}/{identifier}_{label}.wav')
    filesize_in_kb = filesize_in_bytes / 1024
    print(f'Size: {filesize_in_kb:.2f}KB')
    
    
    rate,data = wavfile.read(f'./{args.output_directory}/{identifier}_{label}.wav')
    reduced_noise = nr.reduce_noise(y=data, sr=rate)
    os.remove(f'./{args.output_directory}/{identifier}_{label}.wav')
    wavfile.write(f'./{args.output_directory}/{identifier}_{label}.wav',rate,reduced_noise)
    
    
#callback_with_args = functools.partial(my_callback, identifier=identifier, label=label)

# 10 fron on screen microphone
# 14 from microphone nada?

def test(ident,lab):
    print("New Recording!")
    global identifier
    global label
    identifier = ident
    label = lab
    with sd.InputStream(device=args.device, channels=1, dtype='int16', samplerate=args.resolution, blocksize=blocksize, callback=callback):
        print("")

In [None]:
filename1 = "./Train_Dataset_Truncated/0_change languagenone.wav"
filename2 = "./Train_Dataset_Truncated/15_deactivatelights.wav"
    
    
filename21 = "./Train_Dataset_Truncated/113_increasevolume.wav"
filename22 = "./Train_Dataset_Truncated/151_increasevolume.wav"
filename23 = "./Train_Dataset_Truncated/212_increaseheat.wav"
    
filename3 = "./AudioFiles/1678717382.0476763.wav"   
filename4 = "./AudioFiles/1678717386.5524414.wav"
filename5 = "./AudioFiles/1678717391.0566754.wav"
filename6 = "./AudioFiles/1678717395.5633824.wav"  
direct="./AudioFiles/"
filename7 = direct+"4_increasevolume.wav"

                
def test2(filename):
    print("Prediction for",filename)
    frame_length_in_s = 0.04
    frame_step_in_s   = frame_length_in_s
    global state
    audio_binary = tf.io.read_file(filename)
    audio, sampling_rate = tf.audio.decode_wav(audio_binary)
    audio = tf.squeeze(audio, axis=-1) #all our audio are mono, drop extra axis
    
    frame_length = int(frame_length_in_s * args.resolution)
    frame_step = int(frame_step_in_s * args.resolution)
    stft = tf.signal.stft(
        audio,
        frame_length=frame_length,
        frame_step=frame_step,
        fft_length=frame_length
    )
    
    spectrogram = tf.abs(stft)
    
    mel_spectrogram = tf.matmul(spectrogram, linear_to_mel_weight_matrix)
    log_mel_spectrogram = tf.math.log(mel_spectrogram + 1.e-6)
    log_mel_spectrogram = tf.expand_dims(log_mel_spectrogram, 0)  # batch axis
    log_mel_spectrogram = tf.expand_dims(log_mel_spectrogram, -1)  # channel axis
    mfcss = tf.signal.mfccs_from_log_mel_spectrograms(log_mel_spectrogram)
    #print("Shape ",input_details[0])
    interpreter.set_tensor(input_details[0]['index'], mfcss)
    interpreter.invoke()
    output = interpreter.get_tensor(output_details[0]['index'])

    print("change languagenone",output[0][0]*100,"%")
    print("activatemusic",output[0][1]*100,"%")
    print("deactivatelights",output[0][2]*100,"%")
    print("increasevolume",output[0][3]*100,"%")
    print("decreasevolume",output[0][4]*100,"%")
    print("increaseheat",output[0][5]*100,"%")
    print("decreaseheat",output[0][6]*100,"%")
    #print("nannan",output[0][7]*100,"%")
    
    send_prediction_as_mqtt(output[0])

In [None]:
test(16,"decreaseheat")

In [None]:
direct="./AudioFiles/"
filename7 = direct+"16_decreaseheat.wav"

In [None]:
test2(filename7)