In [3]:
import os
import sounddevice as sd
import numpy as np
import time
from time import time
from time import sleep
from scipy.io.wavfile import write
import argparse as ap
import tensorflow as tf
import tensorflow_io as tfio
import uuid
import redis
import psutil
# import myConnection as mc
from datetime import datetime
import argparse as ap
import pandas as pd
import random

In [4]:
try:
    os.chdir('./datasets/dsl_data/')
except:
    print("")

seed = 42
os.environ['PYTHONHASHSEED'] = str(seed)
os.environ['TF_DETERMINISTIC_OPS'] = '1'
random.seed(seed)
tf.random.set_seed(seed)
np.random.seed(seed)




In [5]:
parser = ap.ArgumentParser()

parser.add_argument('--resolution', default=8000, type=int, help="Resolution for capturing audio")
# blocksize
#parser.add_argument('--blocksize', default=32000, type=int, help="Blocksize for captured audio, change only if you previously changed")
parser.add_argument('--downsampling_rate', default=8000, type=int, help="Resolution for capturing audio")
parser.add_argument('--device', default=0, type=int, help="Default device is 0, change for others")


parser.add_argument('--output_directory', default='./AudioFiles',type=str, help='Used to specify output folder')


args = parser.parse_args(['--device','14','--resolution','16000' ])
#args = parser.parse_args()

In [6]:
blocksize = 4 * args.resolution
LABELS = ['change languagenone', 'activatemusic', 'deactivatelights', 'increasevolume', 'decreasevolume', 'increaseheat', 'decreaseheat', 'nannan']

In [7]:
print(LABELS)

['change languagenone', 'activatemusic', 'deactivatelights', 'increasevolume', 'decreasevolume', 'increaseheat', 'decreaseheat', 'nannan']


# Necessary preprocessing args

In [8]:
frame_length_in_s = 0.04#0.032*2 # /2 for resnet18
frame_step_in_s  = frame_length_in_s#frame_length_in_s

PREPROCESSING_ARGS = {
    'downsampling_rate': args.resolution,
    'frame_length_in_s': frame_length_in_s,
    'frame_step_in_s': frame_step_in_s,
}

num_mel_bins = (int) ((args.resolution - args.resolution * PREPROCESSING_ARGS['frame_length_in_s'])/(args.resolution*PREPROCESSING_ARGS['frame_step_in_s']))+1
# print(num_mel_bins)

PREPROCESSING_ARGS = {
    **PREPROCESSING_ARGS,
    'num_mel_bins': num_mel_bins,
    'lower_frequency': 20,   #40
    'upper_frequency': args.resolution/2, #4000
}

downsampling_rate = PREPROCESSING_ARGS['downsampling_rate']
sampling_rate_int64 = tf.cast(downsampling_rate, tf.int64)
frame_length = int(downsampling_rate * PREPROCESSING_ARGS['frame_length_in_s'])
#print("Frame_length: {}".format(frame_length))
frame_step = int(downsampling_rate * PREPROCESSING_ARGS['frame_step_in_s'])
#print("Frame_length: {}".format(frame_step))
num_spectrogram_bins = frame_length // 2 + 1
num_mel_bins = PREPROCESSING_ARGS['num_mel_bins']
lower_frequency = PREPROCESSING_ARGS['lower_frequency']
upper_frequency = PREPROCESSING_ARGS['upper_frequency']

linear_to_mel_weight_matrix = tf.signal.linear_to_mel_weight_matrix(
    num_mel_bins=num_mel_bins,
    num_spectrogram_bins=num_spectrogram_bins,
    sample_rate=downsampling_rate,
    lower_edge_hertz=lower_frequency,
    upper_edge_hertz=upper_frequency
)

2023-03-13 14:24:09.248637: I tensorflow/stream_executor/cuda/cuda_gpu_executor.cc:980] successful NUMA node read from SysFS had negative value (-1), but there must be at least one NUMA node, so returning NUMA node zero
2023-03-13 14:24:09.248843: W tensorflow/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libcudart.so.11.0'; dlerror: libcudart.so.11.0: cannot open shared object file: No such file or directory
2023-03-13 14:24:09.248894: W tensorflow/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libcublas.so.11'; dlerror: libcublas.so.11: cannot open shared object file: No such file or directory
2023-03-13 14:24:09.248943: W tensorflow/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libcublasLt.so.11'; dlerror: libcublasLt.so.11: cannot open shared object file: No such file or directory
2023-03-13 14:24:09.248990: W tensorflow/stream_executor/platform/default/dso_loader.cc:64] Co

In [9]:
modelName = "model_24"

interpreter = tf.lite.Interpreter(model_path=f'./tflite_models/{modelName}.tflite')
interpreter.allocate_tensors()
input_details = interpreter.get_input_details()
output_details = interpreter.get_output_details()

INFO: Created TensorFlow Lite XNNPACK delegate for CPU.


In [10]:
def get_audio_from_numpy(indata):
    indata = tf.convert_to_tensor(indata, dtype=tf.float32)
    #print("Shape of indata: ",tf.reduce_max(indata))
    indata = 2 * ((indata + 32768) / (32767 + 32768)) -1
    indata = tf.squeeze(indata)
    #print("After of indata: ",tf.reduce_max(indata))
    return indata

def get_spectrogram(indata, frame_length_in_s, frame_step_in_s):
    data = get_audio_from_numpy(indata)
    
    sampling_rate_float32 = tf.cast(args.downsampling_rate, tf.float32)
    frame_length = int(frame_length_in_s * sampling_rate_float32)
    frame_step = int(frame_step_in_s * sampling_rate_float32)

    stft = tf.signal.stft(
        data,
        frame_length=frame_length,
        frame_step=frame_step,
        fft_length=frame_length
    )
    spectrogram = tf.abs(stft)

    return spectrogram

In [11]:
state = False

In [12]:
def send_prediction_as_mqtt(predicted_label):
    #print(type(predicted_label))
    #print(predicted_label.shape)
    #print(predicted_label)
    print("MAX: ")
    print(predicted_label.max())
    index = ( np.where(predicted_label == predicted_label.max() )  )
    index = index[0][0]
    print(index)
    print(LABELS[index])
    print()

print(LABELS[index])

In [13]:
def prediction_on_indata(indata):
    frame_length_in_s = 0.04
    frame_step_in_s   = frame_length_in_s
    global state
    audio = get_audio_from_numpy(indata)
    
    frame_length = int(frame_length_in_s * args.resolution)
    frame_step = int(frame_step_in_s * args.resolution)
    stft = tf.signal.stft(
        audio,
        frame_length=frame_length,
        frame_step=frame_step,
        fft_length=frame_length
    )
    
    spectrogram = tf.abs(stft)
    
    mel_spectrogram = tf.matmul(spectrogram, linear_to_mel_weight_matrix)
    log_mel_spectrogram = tf.math.log(mel_spectrogram + 1.e-6)
    log_mel_spectrogram = tf.expand_dims(log_mel_spectrogram, 0)  # batch axis
    log_mel_spectrogram = tf.expand_dims(log_mel_spectrogram, -1)  # channel axis
    mfcss = tf.signal.mfccs_from_log_mel_spectrograms(log_mel_spectrogram)
    #print("Shape ",input_details[0])
    interpreter.set_tensor(input_details[0]['index'], mfcss)
    interpreter.invoke()
    output = interpreter.get_tensor(output_details[0]['index'])

    threshold = 0.8
    print("change languagenone",output[0][0]*100,"%")
    print("activatemusic",output[0][1]*100,"%")
    print("deactivatelights",output[0][2]*100,"%")
    print("increasevolume",output[0][3]*100,"%")
    print("decreasevolume",output[0][4]*100,"%")
    print("increaseheat",output[0][5]*100,"%")
    print("decreaseheat",output[0][6]*100,"%")
    #print("nannan",output[0][7]*100,"%")
    
    send_prediction_as_mqtt(output[0])
    
    if (output[0][1] > threshold):
        print("Start monitoring")
        state = True
    if (output[0][5] > threshold):
        print("Stop monitoring")
        state = False
    return state

In [14]:
values = sd.query_devices()
device = 0

for value in values:
    if value['name'] == 'default':
        device = value['index']

In [15]:
def callback(indata, frames, callback_time, status):
    timestamp = time()
    global state
    global mac_address
    #if is_silence(indata) == 0 :
        #calculate next step of FSM!
    prediction_on_indata(indata)
    print("Elapsed time: ",time()-timestamp)

In [16]:
print(LABELS)

['change languagenone', 'activatemusic', 'deactivatelights', 'increasevolume', 'decreasevolume', 'increaseheat', 'decreaseheat', 'nannan']


In [17]:
def main():

    #print(LABELS)
    while True:
        with sd.InputStream(device=args.device, channels=1, dtype='int16', samplerate=args.resolution, blocksize=blocksize, callback=callback):
            print("") # to print a new line, improving readability in the terminal

if __name__ == '__main__':
    main()


change languagenone 10.611309856176376 %
activatemusic 29.77244257926941 %
deactivatelights 19.482383131980896 %
increasevolume 10.55290400981903 %
decreasevolume 15.701436996459961 %
increaseheat 8.356352895498276 %
decreaseheat 5.523171275854111 %
MAX: 
0.29772443
1
activatemusic

Elapsed time:  0.1684856414794922

change languagenone 12.623538076877594 %
activatemusic 27.134987711906433 %
deactivatelights 19.95556354522705 %
increasevolume 10.034121572971344 %
decreasevolume 16.14125370979309 %
increaseheat 8.09742957353592 %
decreaseheat 6.013109907507896 %
MAX: 
0.27134988
1
activatemusic

Elapsed time:  0.11675405502319336

change languagenone 7.996156066656113 %
activatemusic 35.82159876823425 %
deactivatelights 20.026396214962006 %
increasevolume 12.150074541568756 %
decreasevolume 9.088749438524246 %
increaseheat 10.973664373159409 %
decreaseheat 3.9433643221855164 %
MAX: 
0.358216
1
activatemusic

Elapsed time:  0.11530256271362305

change languagenone 7.909988611936569 %
ac

KeyboardInterrupt: 

# Test

In [None]:
print("Test")
def callback(indata, frames, callback_time, status):
    """This is called (from a separate thread) for each audio block."""
    timestamp = time()
    # print(is_silence(indata))
    # print(type(indata))  # Type is numpy.ndarray
    
    print("Noise!")
    write(f'./{args.output_directory}/{timestamp}.wav', args.resolution, indata)
    filesize_in_bytes = os.path.getsize(f'./{args.output_directory}/{timestamp}.wav')
    filesize_in_kb = filesize_in_bytes / 1024
    print(f'Size: {filesize_in_kb:.2f}KB')

# 10 fron on screen microphone
# 14 from microphone nada?

def test():
    with sd.InputStream(device=args.device, channels=1, dtype='int16', samplerate=args.resolution, blocksize=blocksize, callback=callback):
        while True:
            key = input()
            if key in ('q', 'Q'):
                print('Stop recording.')
                break
                
test()