# ESC-10 Dataset

In [63]:
!gdown https://drive.google.com/u/0/uc?id=1ioAloEiizmkS1Up6NpwPvv3LzWY2BKGZ&export=download

Downloading...
From: https://drive.google.com/u/0/uc?id=1ioAloEiizmkS1Up6NpwPvv3LzWY2BKGZ
To: /content/esc_classification_nuk.zip
100% 150M/150M [00:01<00:00, 106MB/s]


In [64]:
!unzip /content/esc_classification_nuk.zip

Archive:  /content/esc_classification_nuk.zip
replace esc_classification_nuk/code/Predictor.py? [y]es, [n]o, [A]ll, [N]one, [r]ename: 

# Data Preprocess

In [118]:
def PrepareDataset(dataDir:str, batchSize:int=10) -> dict:
  """
  Prepare the training and validation dataset with tf.data.Dataset.

  Args:
      dataDir: The directory of data.
      batchSize: Number of samples in a batch.
  Returns:
      A dictionary of training and validation dataset.
  """

  ## Get class index dictionary
  subsetDirInfo = {"train":pathlib.Path(dataDir)/"train", "valid":pathlib.Path(dataDir)/"valid"}
  classes = sorted([each.name for each in subsetDirInfo["train"].glob("*")])
  classInfo = dict(zip(classes,range(len(classes))))

  ## Get file paths and labels
  paths = {"train":list(), "valid":list()}
  labels = {"train":list(), "valid":list()}
  for eachSet, eachDir in subsetDirInfo.items():
      for eachPath in eachDir.rglob("*"):
          if eachPath.is_file():
              paths[eachSet].append(str(eachPath))
              labels[eachSet].append(classInfo[eachPath.parts[-2]])

  ## Build tf.data.Dataset
  dataset = {"class_indices": classInfo}
  reader = lambda x, y: [tf.audio.decode_wav(tf.io.read_file(x))[0], tf.one_hot(y, len(classInfo))]
  for eachSet in subsetDirInfo.keys():
      eachTFData = tf.data.Dataset.from_tensor_slices((paths[eachSet],labels[eachSet]))
      dataset.update({eachSet:eachTFData})
      dataset[eachSet] = dataset[eachSet].shuffle(len(paths[eachSet]), reshuffle_each_iteration=True)
      dataset[eachSet] = dataset[eachSet].map(reader, num_parallel_calls=tf.data.AUTOTUNE)
      dataset[eachSet] = dataset[eachSet].batch(batchSize, drop_remainder=True).prefetch(tf.data.AUTOTUNE)

  return dataset

# Define Model

In [137]:
import tensorflow as tf
import pathlib, json

class MyModel(tf.keras.Model):
    """
    Generate a new model with loss function and optimizer, and provide training and validation steps.
    """
    def __init__(self, xSize:tuple, ySize:tuple, modelInfo:dict):
        """
        Args:
            xSize: Input data size. Ex. (44100, 2) for the stereo audio.
            ySize: Output data size. Ex. (10,) for 10 classes.
            modelInfo: Model parameters information.
        """
        super().__init__()
        self._xSize = xSize
        self._ySize = ySize
        self._modelInfo = modelInfo
        self._model = self._BuildModel()
        self._learner = self._BuildLearner()

    @tf.function
    def call(self, x:tf.Tensor, training:bool=False) -> tf.Tensor:
        """
        Defines activities when the model is called (or called by `__call__`).

        Args:
            x: A batch of input data.
            training: Training (True) or inferencing (False) mode.
        Returns:
            Output of the model.
        """
        output = self._model(x, training=training)

        return output

    @tf.function
    def Train(self, x:tf.Tensor, y:tf.Tensor):
        """
        Train the model once with a batch data of input `x` and target `y`.

        Args:
            x: A batch of input data.
            y: A batch of target data.
        """
        with tf.GradientTape() as tape:
            output = self.__call__(x, training=True)
            classLoss = self._learner["get_loss"](output, y)
        cGradients = tape.gradient(classLoss, self._model.trainable_variables)
        self._learner["optimize"].apply_gradients(zip(cGradients, self._model.trainable_variables))

    @tf.function
    def Validate(self, x:tf.Tensor, y:tf.Tensor) -> tf.Tensor:
        """
        Validate the model once with a batch data of input `x` and target `y`.

        Args:
            x: A batch of input data.
            y: A batch of target data.
        Returns:
            The performance value.
        """
        output = self.__call__(x, training=False)
        review = tf.math.in_top_k(tf.math.argmax(y,axis=1), output, 1)
        perf = tf.math.reduce_mean(tf.cast(review, dtype="float32"))

        return perf

    def _BuildModel(self) -> tf.keras.Model:
        """
        Build the NN model.

        Returns:
            A Keras model instance.
        """
        inputTensor = tf.keras.Input(shape=self._xSize)
        featureMap = inputTensor
        featureMap = tf.keras.layers.Conv1D(32, [9], strides=[8], padding="same", use_bias=False)(featureMap)
        featureMap = tf.keras.layers.BatchNormalization()(featureMap)
        featureMap = tf.keras.layers.ReLU()(featureMap)
        featureMap = tf.keras.layers.Conv1D(64, [9], strides=[8], padding="same", use_bias=False)(featureMap)
        featureMap = tf.keras.layers.BatchNormalization()(featureMap)
        featureMap = tf.keras.layers.ReLU()(featureMap)
        featureMap = tf.keras.layers.Conv1D(128, [3], strides=[2], padding="same", use_bias=False)(featureMap)
        featureMap = tf.keras.layers.BatchNormalization()(featureMap)
        featureMap = tf.keras.layers.ReLU()(featureMap)
        featureMap = tf.keras.layers.Conv1D(256, [3], strides=[2], padding="same", use_bias=False)(featureMap)
        featureMap = tf.keras.layers.BatchNormalization()(featureMap)
        featureMap = tf.keras.layers.ReLU()(featureMap)
        featureMap = tf.keras.layers.Conv1D(512, [3], strides=[2], padding="same", use_bias=False)(featureMap)
        featureMap = tf.keras.layers.BatchNormalization()(featureMap)
        featureMap = tf.keras.layers.ReLU()(featureMap)
        featureMap = tf.keras.layers.Conv1D(1024, [3], strides=[2], padding="same", use_bias=False)(featureMap)
        featureMap = tf.keras.layers.BatchNormalization()(featureMap)
        featureMap = tf.keras.layers.ReLU()(featureMap)
        embedding = tf.keras.layers.GlobalAveragePooling1D()(featureMap)
        embedding = tf.keras.layers.Dropout(rate=self._modelInfo["dropout"])(embedding)
        outputTensor = tf.keras.layers.Dense(units=self._ySize[-1], activation="softmax")(embedding)
        model = tf.keras.Model(inputTensor, outputTensor)

        return model

    def _BuildLearner(self) -> dict:
        """
        Build loss functions and optimizers.

        Returns:
            A dictionary of loss function and optimizer.
        """
        classLoss = lambda p, y: tf.reduce_mean(-tf.reduce_sum(y*tf.math.log(p+1e-13), axis=1))
        classOptimizer = tf.keras.optimizers.Adam(learning_rate=self._modelInfo["learning_rate"])
        learner = {"get_loss": classLoss, "optimize": classOptimizer}

        return learner






# Train Model

In [139]:
"""
  Basic CNN model training for audio classification.
"""
## Configuration
dataDir = r"/content/esc_classification_nuk/data/esc10"
batchSize = 10
xSize = (44100*5, 1)
ySize = (10,)
epochs = 26
newModel = r"./model/test"
dropout = 0.2
learningRate = 1e-4

print("Preparing dataset...")
dataset = PrepareDataset(dataDir, batchSize=batchSize)

print("Build the NN model...")
modelInfo = {"dropout": dropout, "learning_rate": learningRate}
myModel = MyModel(xSize, ySize, modelInfo)


print("Start training...")
for epoch in range(epochs):
    perfDict = {"train":[], "valid":[]}
    for inData, outData in dataset["train"]:
        myModel.Train(inData, outData)
        # break
    for inData, outData in dataset["train"]:
        perfDict["train"].append(myModel.Validate(inData, outData))
        # break
    for inData, outData in dataset["valid"]:
        perfDict["valid"].append(myModel.Validate(inData, outData))
        # break
    trainPerf = tf.math.reduce_mean(perfDict["train"]) * 100
    validPerf = tf.math.reduce_mean(perfDict["valid"]) * 100
    print(f"Epoch: {epoch},    Train perf: {trainPerf:.2f},    Valid perf: {validPerf:.2f}")

print("Export the model and information...")
newModel = pathlib.Path(newModel)
myModel.save(newModel/"model", include_optimizer=False)
with open(newModel/"class_info.json", "w") as wFile:
    json.dump(dataset["class_indices"], wFile, indent=4)

print("Completed!")

Preparing dataset...
Build the NN model...
Start training...
Epoch: 0,    Train perf: 10.94,    Valid perf: 13.75
Epoch: 1,    Train perf: 10.62,    Valid perf: 12.50
Epoch: 2,    Train perf: 11.88,    Valid perf: 13.75
Epoch: 3,    Train perf: 10.00,    Valid perf: 10.00
Epoch: 4,    Train perf: 10.00,    Valid perf: 10.00
Epoch: 5,    Train perf: 10.62,    Valid perf: 10.00
Epoch: 6,    Train perf: 12.81,    Valid perf: 11.25
Epoch: 7,    Train perf: 15.31,    Valid perf: 16.25
Epoch: 8,    Train perf: 22.81,    Valid perf: 22.50
Epoch: 9,    Train perf: 20.62,    Valid perf: 22.50
Epoch: 10,    Train perf: 21.25,    Valid perf: 25.00
Epoch: 11,    Train perf: 27.19,    Valid perf: 31.25
Epoch: 12,    Train perf: 29.06,    Valid perf: 33.75
Epoch: 13,    Train perf: 46.25,    Valid perf: 42.50
Epoch: 14,    Train perf: 47.19,    Valid perf: 51.25
Epoch: 15,    Train perf: 57.50,    Valid perf: 52.50
Epoch: 16,    Train perf: 63.75,    Valid perf: 52.50
Epoch: 17,    Train perf: 67.50

In [159]:
model = MyModel(xSize, ySize, modelInfo)
model.build((1,220500,1))
print(model._model.summary())

Model: "model_39"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 input_59 (InputLayer)       [(None, 220500, 1)]       0         
                                                                 
 conv1d_118 (Conv1D)         (None, 27563, 32)         288       
                                                                 
 batch_normalization_108 (B  (None, 27563, 32)         128       
 atchNormalization)                                              
                                                                 
 re_lu_108 (ReLU)            (None, 27563, 32)         0         
                                                                 
 conv1d_119 (Conv1D)         (None, 3446, 64)          18432     
                                                                 
 batch_normalization_109 (B  (None, 3446, 64)          256       
 atchNormalization)                                       

# Test Model

In [160]:
import tensorflow as tf
import json

class Predictor:
    """
    Inferencing interface of model.
    """
    def __init__(self, modelPath:str):
        """
        Args:
            modelPath: Path of the model.
        """
        self.__model = tf.keras.models.load_model(modelPath+"/model", compile=False)
        with open(modelPath+"/class_info.json", "r") as rFile:
            self.__classInfo = json.loads(rFile.read())
            self.__classInfo = {value:key for key, value in self.__classInfo.items()}

    def __call__(self, path:str) -> str:
        """
        Read the file and make an inference.

        Args:
            path: A file path.
        Returns:
            A class name result.
        """
        inputData = tf.expand_dims(tf.audio.decode_wav(tf.io.read_file(path))[0], 0)
        pred = self.__model(inputData,training=False)
        output = tf.squeeze(tf.argmax(self.__model(inputData,training=False), axis=1)).numpy()

        result = self.__classInfo[output]

        return pred[0][output].numpy() , result


audioPath = r"/content/esc_classification_nuk/data/esc10/valid/helicopter/5-177957-A-40.wav" #.replace("\\","/")
predictor = Predictor(modelPath="/content/model/test/") #.replace("\\","/"))
print(predictor(audioPath))



(0.94573957, 'rooster')


# Export Model

In [70]:
!pip install -U tf2onnx



In [146]:
!python -m tf2onnx.convert --saved-model "/content/model/test/model/" --output "./bs10_l4_d2.onnx" --opset 12 --inputs input_1:0[1,220500,1]

2023-10-21 11:50:48.944885: W tensorflow/core/common_runtime/gpu/gpu_bfc_allocator.cc:47] Overriding orig_value setting because the TF_FORCE_GPU_ALLOW_GROWTH environment variable is set. Original config value was 0.
2023-10-21 11:50:50,306 - INFO - Signatures found in model: [serving_default].
2023-10-21 11:50:50,306 - INFO - Output names: ['output_1']
2023-10-21 11:50:50,738 - INFO - Using tensorflow=2.13.0, onnx=1.14.1, tf2onnx=1.15.1/37820d
2023-10-21 11:50:50,738 - INFO - Using opset <onnx, 12>
2023-10-21 11:50:50,771 - INFO - Apply shape override:
2023-10-21 11:50:50,772 - INFO - 	Set input_1:0 shape to [1, 220500, 1]
2023-10-21 11:50:50,804 - INFO - Computed 0 values for constant folding
2023-10-21 11:50:50,882 - INFO - Optimizing ONNX model
2023-10-21 11:50:51,023 - INFO - After optimization: Const -6 (27->21), GlobalAveragePool +1 (0->1), Identity -2 (2->0), ReduceMean -1 (1->0), Reshape +1 (0->1), Squeeze +1 (6->7), Transpose -12 (12->0)
2023-10-21 11:50:51,034 - INFO - 
2023-