# Because virtualenvs and python interpreters are being incredibly annoying

In [3]:
# Author: Adam Fong
# Date: December 27, 2021
# Purpose: Create a function that takes a wave file and preprocesses it to be valid input for angle grinder tflite model

## TODO: Impove processing time by a lot. Currently takes 9s to process a 2 second clip (well I guess 2, 2 second clips)

# NOTE: to run tflite_runtime.interpreter you need numpy ~1.19.2

import math
import numpy as np
import pandas as pd
from pickle import load
import scipy.io.wavfile as wav
import tensorflow as tf

print(np.__version__)
samplerate = 44100
N_BINS = 1000

# making chunk size
# currently only neat if each chunk is 1 second 
def createChunks(raw_data_array):
    seconds = 2
    chunk_size = int(seconds * samplerate)
    chunks_final = pd.DataFrame([np.zeros(chunk_size)])

    # if seconds != 1, there will be some lost data. Hard to avoid this if we are going to have a lot of time recorded 
    # removes time from the beginning of the recording because there is more often noise there than at the end
    for rec in raw_data_array:
        n_chunks = math.floor(len(rec) / chunk_size)
        for i in range(len(rec) - (n_chunks * chunk_size), len(rec), chunk_size):
            chunk = rec[i:(i+chunk_size)]
            #print(f"Length: {len(chunk)}, First Value: {chunk[0]}, Last Value: {chunk[len(chunk) - 1]}")
            chunks_final = chunks_final.append(pd.Series(chunk), ignore_index = True)

    # get rid of filler zero's line
    chunks_final = chunks_final.iloc[1:, :]
    return chunks_final

def rawAudioToFreq(arr: np.array, bins: int):
    n = len(arr)                       # length of the signal
    k = np.arange(n)
    T = n/samplerate
    
    frq = k/T # two sides frequency range
    
    
    zz=int(n/2)
    freq = frq[range(zz)]           # one side frequency range
    Y0 = np.fft.fft(arr)/n              # fft computing and normalization
    Y = Y0[range(zz)]

    # obtaining maximum amplitude and its corresponding frequency 
    Y_max = abs(Y).max()
    freq_max = freq[np.where(abs(Y) == Y_max)[0][0]]
    
    arr = np.array([freq.astype(int), Y.astype(int)])
    bin_size = math.floor(arr[0, arr.shape[1]-1] / bins)
    bin_minimums = np.arange(0, arr[0, arr.shape[1] - 1], bin_size)
    bin_arr = np.array([bin_minimums, np.zeros(len(bin_minimums))])

    # collecting magintudes in bins 
    for i in range(0, arr[0, arr.shape[1] - 1], math.floor(bin_size)):
        bin_arr[1, int(i / bin_size)] = np.sum(abs(Y)[i:(i+bin_size)])

    return freq, abs(Y), bin_arr, Y_max, freq_max

def getFreqs(chunk_df: pd.DataFrame, bins: int):
    freqs, Ys, bin_array, Y_max, freq_max = rawAudioToFreq(chunk_df.to_numpy()[0, :], bins)
#     for row in range(chunk_df.shape[0]):
        
#         freqs, Ys, bin_array, Y_max, freq_max = rawAudioToFreq(chunk_df.to_numpy()[row, :], bins)

#         # don't want the bin array if there's only one sample to check
#         if row == 0:
#             freqs_df = pd.DataFrame([bin_array[0, :]])
#         else:
#         freqs_df = freqs_df.append(pd.Series(bin_array[1,:]), ignore_index = True)
    return pd.DataFrame([pd.Series(bin_array[1,:])])

def audioProcessor(file):
    # loading the same scaler that scaled the data for model training 
    scaler = load(open('C:/Users/adamf/OneDrive/Documents/university/UBC/homework_Winter_2021/IGEN_330/BikeSentry/device/audio_recognition/fft_examples/audio_scaler.pkl', 'rb'))

    # take file and convert to pandas dataframe
    sr, y = wav.read(file)
    
    # time of clips
    s = 2 #seconds

    if y.shape[1] == 2: # for 2 channel audio recording (iPhone)
        y0 = y[:(sr*s), 0]
        y1 = y[:(sr*s), 1]
        
        chunk0 = pd.DataFrame([y0])
        chunk1 = pd.DataFrame([y1])
        
        freqs0 = getFreqs(chunk0, N_BINS)
        freqs1 = getFreqs(chunk1, N_BINS)
        
        freqs_scaled0 = scaler.fit_transform(freqs0.to_numpy()[0,:].reshape(-1,1))
        freqs_scaled1 = scaler.fit_transform(freqs1.to_numpy()[0,:].reshape(-1,1))

        return(2, [freqs_scaled0, freqs_scaled1])
    else: # 1 channel audio recording (our mic)
        # force file to exactly 2 seconds or 44.1k * 2sec samples
        y = y[:sr*s]

        # create chunk
        chunk = createChunks([y])

        # get frequencies
        freqs = getFreqs(chunk, N_BINS)

        # scale frequencies
        freqs_scaled = scaler.fit_transform(freqs.to_numpy()[0, :].reshape(-1,1)) #first row is bin values

        return (1, freqs_scaled)

if __name__ == "__main__":
    # load ML model
    interpreter = tf.lite.Interpreter(model_path = "angle-grinder-detector-2s.tflite")
    interpreter.allocate_tensors()
    
    # get data from file
    filename = "C:/Users/adamf/OneDrive/Documents/university/UBC/homework_Winter_2021/IGEN_330/BikeSentry_data/angle-grinders/red-4m-trimmed.wav"
    _, test = wav.read(filename)
    
    # Get input and output tensors.
    input_details = interpreter.get_input_details()
    output_details = interpreter.get_output_details()
    
    # process data
    channels, inp = audioProcessor(filename)
    
    # change to float32
    inp = np.float32(inp)
        
    # Test the model on random input data.
    input_shape = input_details[0]['shape']
    
    #set value of input tensor
    if channels == 1: # one channel audio
        input_data = inp
        
        interpreter.set_tensor(input_details[0]['index'], input_data)

        interpreter.invoke()
        
        output_data = interpreter.get_tensor(output_details[0]['index'])

        print(output_data)
    else: # more than 1 channel audio
        for channel in range(channels):
            input_data = inp[channel].T
            interpreter.set_tensor(input_details[0]['index'], input_data)

            interpreter.invoke()

            output_data = interpreter.get_tensor(output_details[0]['index'])
            
            print(output_data)
    


1.19.2


  arr = np.array([freq.astype(int), Y.astype(int)])


[[0.99998564]]
[[0.99964786]]
