In [30]:
import os
import numpy as np
import librosa

import pandas as pd

import tensorflow as tf
from sklearn.model_selection import train_test_split
from sklearn.model_selection import StratifiedShuffleSplit

from tqdm import tqdm
import random
from sklearn.metrics import f1_score
import wandb
from audiomentations import AddBackgroundNoise
from tensorflow.keras.layers import Lambda



In [31]:
import warnings
warnings.filterwarnings("ignore")

In [32]:
class CONFIG:
    SEED = 42
    CLASSIFIER_LR = 3e-4
    EPOCH = 200
    BATCH_SIZE = 8

    SR = 16000
    DURATION = 20 # ms
    FRAME_SIZE = 320

    N_FFT = 64 # (SR*4ms/1000)
    HOP_LENGTH = 32 # (SR*2ms/1000)
    VOICE_DIR = './cv-corpus-19.0-2024-09-13/ko/clips/'
    NOISE_DIR = './ESC-50-master/audio/'

In [33]:
def periodic_hann_window(window_length, dtype):
    return 0.5 - 0.5 * tf.math.cos(2.0 *
                                   np.pi *
                                   tf.range(tf.cast(window_length, tf.float32)) /
                                   tf.cast(window_length, tf.float32))

In [34]:
def wave2log_mel_spectrogram(wave):
    signal_stft = tf.signal.stft(tf.cast(wave, tf.float32),
                                 frame_length=400,
                                 frame_step=160,
                                 fft_length=1024,
                                 window_fn=periodic_hann_window)
    # print(signal_stft.shape) # (98, 513)
    spectogram = tf.abs(signal_stft)

    linear_to_mel = tf.signal.linear_to_mel_weight_matrix(40,
                                signal_stft.shape[-1],
                                16000,
                                300.0,
                                4000.0)
    mel_spectrogram = tf.tensordot(spectogram, linear_to_mel, 1)
    log_mel_spectrogram = tf.math.log(mel_spectrogram + 1e-12)
    return log_mel_spectrogram

In [None]:
def representative_data_gen():
    valid_df = pd.read_csv('valid_dataset.csv')
    for _, row in valid_df.iterrows():
        path = row['path']
        label = row['label']

        audio_raw = tf.io.read_file(path)
        wave, sr = tf.audio.decode_wav(audio_raw, desired_channels=1)
        wave = tf.squeeze(wave, axis=-1)
        # print(wave.shape) (16000, )
        log_mel_spectrogram = wave2log_mel_spectrogram(wave)
        log_mel_spectrogram = np.expand_dims(log_mel_spectrogram, axis=-1)
        log_mel_spectrogram = np.expand_dims(log_mel_spectrogram, axis=0)

        # print(log_mel_spectrogram.shape) 

        yield [log_mel_spectrogram.astype(np.float32)]


In [None]:
converter = tf.lite.TFLiteConverter.from_saved_model('./vad_slimnet')
converter.inference_input_type = tf.float32
converter.inference_output_type = tf.float32


converter.representative_dataset = representative_data_gen
converter.optimizations = [tf.lite.Optimize.DEFAULT]
converter.target_spec.supported_ops = [tf.lite.OpsSet.TFLITE_BUILTINS, tf.lite.OpsSet.TFLITE_BUILTINS_INT8, tf.lite.OpsSet.EXPERIMENTAL_TFLITE_BUILTINS_ACTIVATIONS_INT16_WEIGHTS_INT8]
converter._experimental_lower_tensor_list_ops = False

converter.experimental_new_converter = True
#converter.allow_custom_ops = False
converter.experimental_enable_resource_variables = True

# 모델 변환
tflite_model = converter.convert()

# TF Select ops: TensorListReserve, TensorListSetItem, TensorListStack 지원하지 않는다
# https://www.tensorflow.org/lite/guide/op_select_allowlist?hl=ko&_gl=1*2zbz7k*_up*MQ..*_ga*MTU4NzE4MDgzMS4xNzMyMjY2NTQx*_ga_W0YLR4190T*MTczMjI2NjU0MS4xLjAuMTczMjI2NjU0MS4wLjAuMA..
# raw ops로는 지원하나, tflite로는 지원하고 있지 않음

# https://www.tensorflow.org/lite/guide/ops_compatibility?hl=ko&_gl=1*1747d60*_up*MQ..*_ga*MTU4NzE4MDgzMS4xNzMyMjY2NTQx*_ga_W0YLR4190T*MTczMjI2NjU0MS4xLjAuMTczMjI2NjY2Mi4wLjAuMA..
# CUSTOM: 다음 TensorFlow Lite 연산도 제공되지만 사용자 정의 모델에 사용할 준비가 되지 않았습니다.
with open('vad_slimnet.tflite', 'wb') as f:
    f.write(tflite_model)

W0000 00:00:1732266498.191898 2836236 tf_tfl_flatbuffer_helpers.cc:365] Ignored output_format.
W0000 00:00:1732266498.191912 2836236 tf_tfl_flatbuffer_helpers.cc:368] Ignored drop_control_dependency.
2024-11-22 18:08:18.192042: I tensorflow/cc/saved_model/reader.cc:83] Reading SavedModel from: ./vad_slimnet
2024-11-22 18:08:18.192651: I tensorflow/cc/saved_model/reader.cc:52] Reading meta graph with tags { serve }
2024-11-22 18:08:18.192661: I tensorflow/cc/saved_model/reader.cc:147] Reading SavedModel debug info (if present) from: ./vad_slimnet
2024-11-22 18:08:18.197256: I tensorflow/cc/saved_model/loader.cc:236] Restoring SavedModel bundle.
2024-11-22 18:08:18.216252: I tensorflow/cc/saved_model/loader.cc:220] Running initialization op on SavedModel bundle at path: ./vad_slimnet
2024-11-22 18:08:18.224654: I tensorflow/cc/saved_model/loader.cc:466] SavedModel load for tags { serve }; Status: success: OK. Took 32615 microseconds.
loc(callsite(callsite(fused["TensorListReserve:", "fun

ConverterError: Could not translate MLIR to FlatBuffer.<unknown>:0: error: loc(callsite(callsite(fused["TensorListReserve:", "functional_1_1/custom_attention_model_10_1/rnn_12_1/TensorArrayV2_1@__inference___call___1940569"] at fused["StatefulPartitionedCall:", "StatefulPartitionedCall@__inference_signature_wrapper___call___1940604"]) at fused["StatefulPartitionedCall:", "StatefulPartitionedCall_1"])): 'tf.TensorListReserve' op is neither a custom op nor a flex op
<unknown>:0: note: loc(fused["StatefulPartitionedCall:", "StatefulPartitionedCall_1"]): called from
<unknown>:0: note: loc(callsite(callsite(fused["TensorListReserve:", "functional_1_1/custom_attention_model_10_1/rnn_12_1/TensorArrayV2_1@__inference___call___1940569"] at fused["StatefulPartitionedCall:", "StatefulPartitionedCall@__inference_signature_wrapper___call___1940604"]) at fused["StatefulPartitionedCall:", "StatefulPartitionedCall_1"])): see current operation: %13 = "tf.TensorListReserve"(%6, %3) {device = ""} : (tensor<2xi32>, tensor<i32>) -> tensor<!tf_type.variant<tensor<?x40xf32>>>
<unknown>:0: note: loc(callsite(callsite(fused["TensorListReserve:", "functional_1_1/custom_attention_model_10_1/rnn_12_1/TensorArrayV2_1@__inference___call___1940569"] at fused["StatefulPartitionedCall:", "StatefulPartitionedCall@__inference_signature_wrapper___call___1940604"]) at fused["StatefulPartitionedCall:", "StatefulPartitionedCall_1"])): Error code: ERROR_NEEDS_FLEX_OPS
<unknown>:0: error: loc(callsite(callsite(fused["TensorListStack:", "functional_1_1/custom_attention_model_10_1/rnn_12_1/TensorArrayV2Stack/TensorListStack@__inference___call___1940569"] at fused["StatefulPartitionedCall:", "StatefulPartitionedCall@__inference_signature_wrapper___call___1940604"]) at fused["StatefulPartitionedCall:", "StatefulPartitionedCall_1"])): 'tf.TensorListStack' op is neither a custom op nor a flex op
<unknown>:0: note: loc(fused["StatefulPartitionedCall:", "StatefulPartitionedCall_1"]): called from
<unknown>:0: note: loc(callsite(callsite(fused["TensorListStack:", "functional_1_1/custom_attention_model_10_1/rnn_12_1/TensorArrayV2Stack/TensorListStack@__inference___call___1940569"] at fused["StatefulPartitionedCall:", "StatefulPartitionedCall@__inference_signature_wrapper___call___1940604"]) at fused["StatefulPartitionedCall:", "StatefulPartitionedCall_1"])): see current operation: %24 = "tf.TensorListStack"(%23#2, %6) <{num_elements = 98 : i64}> {device = ""} : (tensor<!tf_type.variant<tensor<?x40xf32>>>, tensor<2xi32>) -> tensor<98x?x40xf32>
<unknown>:0: note: loc(callsite(callsite(fused["TensorListStack:", "functional_1_1/custom_attention_model_10_1/rnn_12_1/TensorArrayV2Stack/TensorListStack@__inference___call___1940569"] at fused["StatefulPartitionedCall:", "StatefulPartitionedCall@__inference_signature_wrapper___call___1940604"]) at fused["StatefulPartitionedCall:", "StatefulPartitionedCall_1"])): Error code: ERROR_NEEDS_FLEX_OPS
<unknown>:0: error: loc(callsite(fused["TensorListSetItem:", "functional_1_1/custom_attention_model_10_1/rnn_12_1/while/TensorArrayV2Write/TensorListSetItem@functional_1_1_custom_attention_model_10_1_rnn_12_1_while_body_1940439"] at callsite(callsite(fused["While:", "functional_1_1/custom_attention_model_10_1/rnn_12_1/while@__inference___call___1940569"] at fused["StatefulPartitionedCall:", "StatefulPartitionedCall@__inference_signature_wrapper___call___1940604"]) at fused["StatefulPartitionedCall:", "StatefulPartitionedCall_1"]))): 'tf.TensorListSetItem' op is neither a custom op nor a flex op
<unknown>:0: note: loc(callsite(callsite(fused["While:", "functional_1_1/custom_attention_model_10_1/rnn_12_1/while@__inference___call___1940569"] at fused["StatefulPartitionedCall:", "StatefulPartitionedCall@__inference_signature_wrapper___call___1940604"]) at fused["StatefulPartitionedCall:", "StatefulPartitionedCall_1"])): called from
<unknown>:0: note: loc(fused["StatefulPartitionedCall:", "StatefulPartitionedCall_1"]): called from
<unknown>:0: note: loc(callsite(fused["TensorListSetItem:", "functional_1_1/custom_attention_model_10_1/rnn_12_1/while/TensorArrayV2Write/TensorListSetItem@functional_1_1_custom_attention_model_10_1_rnn_12_1_while_body_1940439"] at callsite(callsite(fused["While:", "functional_1_1/custom_attention_model_10_1/rnn_12_1/while@__inference___call___1940569"] at fused["StatefulPartitionedCall:", "StatefulPartitionedCall@__inference_signature_wrapper___call___1940604"]) at fused["StatefulPartitionedCall:", "StatefulPartitionedCall_1"]))): see current operation: %22 = "tf.TensorListSetItem"(%arg2, %arg1, %21) <{resize_if_index_out_of_bounds = false}> {device = ""} : (tensor<!tf_type.variant<tensor<?x40xf32>>>, tensor<i32>, tensor<?x40xf32>) -> tensor<!tf_type.variant<tensor<?x40xf32>>>
<unknown>:0: note: loc(callsite(fused["TensorListSetItem:", "functional_1_1/custom_attention_model_10_1/rnn_12_1/while/TensorArrayV2Write/TensorListSetItem@functional_1_1_custom_attention_model_10_1_rnn_12_1_while_body_1940439"] at callsite(callsite(fused["While:", "functional_1_1/custom_attention_model_10_1/rnn_12_1/while@__inference___call___1940569"] at fused["StatefulPartitionedCall:", "StatefulPartitionedCall@__inference_signature_wrapper___call___1940604"]) at fused["StatefulPartitionedCall:", "StatefulPartitionedCall_1"]))): Error code: ERROR_NEEDS_FLEX_OPS
<unknown>:0: error: failed while converting: 'main': 
Some ops are not supported by the native TFLite runtime, you can enable TF kernels fallback using TF Select. See instructions: https://www.tensorflow.org/lite/guide/ops_select 
TF Select ops: TensorListReserve, TensorListSetItem, TensorListStack
Details:
	tf.TensorListReserve(tensor<2xi32>, tensor<i32>) -> (tensor<!tf_type.variant<tensor<?x40xf32>>>) : {device = ""}
	tf.TensorListSetItem(tensor<!tf_type.variant<tensor<?x40xf32>>>, tensor<i32>, tensor<?x40xf32>) -> (tensor<!tf_type.variant<tensor<?x40xf32>>>) : {device = "", resize_if_index_out_of_bounds = false}
	tf.TensorListStack(tensor<!tf_type.variant<tensor<?x40xf32>>>, tensor<2xi32>) -> (tensor<98x?x40xf32>) : {device = "", num_elements = 98 : i64}

<unknown>:0: note: see current operation: 
"func.func"() <{arg_attrs = [{tf_saved_model.index_path = ["keras_tensor_12"]}], function_type = (tensor<?x98x40x1xf32>) -> tensor<?x2xf32>, res_attrs = [{tf_saved_model.index_path = ["output_0"]}], sym_name = "main"}> ({
^bb0(%arg0: tensor<?x98x40x1xf32>):
  %0 = "arith.constant"() <{value = dense_resource<__elided__> : tensor<2x40xf32>}> : () -> tensor<2x40xf32>
  %1 = "arith.constant"() <{value = dense_resource<__elided__> : tensor<1x1x40xf32>}> : () -> tensor<1x1x40xf32>
  %2 = "arith.constant"() <{value = dense<[-0.00183030369, 0.00183030439]> : tensor<2xf32>}> : () -> tensor<2xf32>
  %3 = "arith.constant"() <{value = dense<98> : tensor<i32>}> : () -> tensor<i32>
  %4 = "arith.constant"() <{value = dense<0.000000e+00> : tensor<f32>}> : () -> tensor<f32>
  %5 = "arith.constant"() <{value = dense<[1, 0, 2]> : tensor<3xi32>}> : () -> tensor<3xi32>
  %6 = "arith.constant"() <{value = dense<[-1, 40]> : tensor<2xi32>}> : () -> tensor<2xi32>
  %7 = "arith.constant"() <{value = dense<0> : tensor<1xi32>}> : () -> tensor<1xi32>
  %8 = "arith.constant"() <{value = dense<40> : tensor<i32>}> : () -> tensor<i32>
  %9 = "arith.constant"() <{value = dense<1> : tensor<1xi32>}> : () -> tensor<1xi32>
  %10 = "arith.constant"() <{value = dense<0> : tensor<i32>}> : () -> tensor<i32>
  %11 = "arith.constant"() <{value = dense<1> : tensor<i32>}> : () -> tensor<i32>
  %12 = "arith.constant"() <{value = dense<[0, 2, 1]> : tensor<3xi32>}> : () -> tensor<3xi32>
  %13 = "tf.TensorListReserve"(%6, %3) {device = ""} : (tensor<2xi32>, tensor<i32>) -> tensor<!tf_type.variant<tensor<?x40xf32>>>
  %14 = "tfl.shape"(%arg0) : (tensor<?x98x40x1xf32>) -> tensor<4xi32>
  %15 = "tfl.strided_slice"(%14, %7, %9, %9) <{begin_mask = 0 : i32, ellipsis_mask = 0 : i32, end_mask = 0 : i32, new_axis_mask = 0 : i32, offset = false, shrink_axis_mask = 1 : i32}> : (tensor<4xi32>, tensor<1xi32>, tensor<1xi32>, tensor<1xi32>) -> tensor<i32>
  %16 = "tfl.pack"(%15, %3, %8) <{axis = 0 : i32, values_count = 3 : i32}> : (tensor<i32>, tensor<i32>, tensor<i32>) -> tensor<3xi32>
  %17 = "tfl.reshape"(%arg0, %16) : (tensor<?x98x40x1xf32>, tensor<3xi32>) -> tensor<?x98x40xf32>
  %18 = "tfl.shape"(%17) : (tensor<?x98x40xf32>) -> tensor<3xi32>
  %19 = "tfl.strided_slice"(%18, %7, %9, %9) <{begin_mask = 0 : i32, ellipsis_mask = 0 : i32, end_mask = 0 : i32, new_axis_mask = 0 : i32, offset = false, shrink_axis_mask = 1 : i32}> : (tensor<3xi32>, tensor<1xi32>, tensor<1xi32>, tensor<1xi32>) -> tensor<i32>
  %20 = "tfl.pack"(%19, %8) <{axis = 0 : i32, values_count = 2 : i32}> : (tensor<i32>, tensor<i32>) -> tensor<2xi32>
  %21 = "tfl.fill"(%20, %4) : (tensor<2xi32>, tensor<f32>) -> tensor<?x40xf32>
  %22 = "tfl.transpose"(%17, %5) : (tensor<?x98x40xf32>, tensor<3xi32>) -> tensor<98x?x40xf32>
  %23:6 = "tfl.while"(%10, %10, %13, %21, %21, %22) <{is_stateless = false}> ({
  ^bb0(%arg6: tensor<i32>, %arg7: tensor<i32>, %arg8: tensor<!tf_type.variant<tensor<?x40xf32>>>, %arg9: tensor<?x40xf32>, %arg10: tensor<?x40xf32>):
    %34 = "func.call"(%arg6, %arg7, %arg8, %arg9, %arg10, %22) <{callee = @"functional_1_1/custom_attention_model_10_1/rnn_12_1/while_cond"}> : (tensor<i32>, tensor<i32>, tensor<!tf_type.variant<tensor<?x40xf32>>>, tensor<?x40xf32>, tensor<?x40xf32>, tensor<98x?x40xf32>) -> tensor<i1>
    "tfl.yield"(%34) : (tensor<i1>) -> ()
  }, {
  ^bb0(%arg1: tensor<i32>, %arg2: tensor<i32>, %arg3: tensor<!tf_type.variant<tensor<?x40xf32>>>, %arg4: tensor<?x40xf32>, %arg5: tensor<?x40xf32>):
    %33:6 = "func.call"(%arg1, %arg2, %arg3, %arg4, %arg5, %22) <{callee = @"functional_1_1/custom_attention_model_10_1/rnn_12_1/while_body"}> : (tensor<i32>, tensor<i32>, tensor<!tf_type.variant<tensor<?x40xf32>>>, tensor<?x40xf32>, tensor<?x40xf32>, tensor<98x?x40xf32>) -> (tensor<i32>, tensor<i32>, tensor<!tf_type.variant<tensor<?x40xf32>>>, tensor<?x40xf32>, tensor<?x40xf32>, tensor<98x?x40xf32>)
    "tfl.yield"(%33#0, %33#1, %33#2, %33#3, %33#4, %33#5) : (tensor<i32>, tensor<i32>, tensor<!tf_type.variant<tensor<?x40xf32>>>, tensor<?x40xf32>, tensor<?x40xf32>, tensor<98x?x40xf32>) -> ()
  }) : (tensor<i32>, tensor<i32>, tensor<!tf_type.variant<tensor<?x40xf32>>>, tensor<?x40xf32>, tensor<?x40xf32>, tensor<98x?x40xf32>) -> (tensor<i32>, tensor<i32>, tensor<!tf_type.variant<tensor<?x40xf32>>>, tensor<?x40xf32>, tensor<?x40xf32>, tensor<98x?x40xf32>)
  %24 = "tf.TensorListStack"(%23#2, %6) <{num_elements = 98 : i64}> {device = ""} : (tensor<!tf_type.variant<tensor<?x40xf32>>>, tensor<2xi32>) -> tensor<98x?x40xf32>
  %25 = "tfl.transpose"(%24, %5) : (tensor<98x?x40xf32>, tensor<3xi32>) -> tensor<?x98x40xf32>
  %26 = "tfl.batch_matmul"(%1, %25) <{adj_x = false, adj_y = true}> : (tensor<1x1x40xf32>, tensor<?x98x40xf32>) -> tensor<?x1x98xf32>
  %27 = "tfl.softmax"(%26) <{beta = 1.000000e+00 : f32}> : (tensor<?x1x98xf32>) -> tensor<?x1x98xf32>
  %28 = "tfl.transpose"(%27, %12) : (tensor<?x1x98xf32>, tensor<3xi32>) -> tensor<?x98x1xf32>
  %29 = "tfl.mul"(%25, %28) <{fused_activation_function = "NONE"}> : (tensor<?x98x40xf32>, tensor<?x98x1xf32>) -> tensor<?x98x40xf32>
  %30 = "tfl.sum"(%29, %11) <{keep_dims = false}> : (tensor<?x98x40xf32>, tensor<i32>) -> tensor<?x40xf32>
  %31 = "tfl.fully_connected"(%30, %0, %2) <{fused_activation_function = "NONE", keep_num_dims = false, weights_format = "DEFAULT"}> : (tensor<?x40xf32>, tensor<2x40xf32>, tensor<2xf32>) -> tensor<?x2xf32>
  %32 = "tfl.softmax"(%31) <{beta = 1.000000e+00 : f32}> : (tensor<?x2xf32>) -> tensor<?x2xf32>
  "func.return"(%32) : (tensor<?x2xf32>) -> ()
}) {tf.entry_function = {control_outputs = "", inputs = "serving_default_keras_tensor_12:0", outputs = "StatefulPartitionedCall_1:0"}, tf_saved_model.exported_names = ["serving_default"]} : () -> ()
