In [None]:
# !pip install datasets
# !pip install librosa

In [None]:
import IPython.display as pds
import numpy as np
import pandas as pd
import warnings
import random
import zipfile
import librosa
import os
import re
import json
import tensorflow as tf
warnings.filterwarnings("ignore")

from tensorflow.keras import layers
from sklearn.model_selection import train_test_split
from scipy import signal

In [None]:
!git clone https://github.com/Bangkit-Capstone-Team/IOH-Chat-App.git

In [None]:
SPEECH_DATA_DIR = "/content/IOH-Chat-App/Machine Learning/datasets/speech/audio.zip"

zf = zipfile.ZipFile(SPEECH_DATA_DIR)
zf.extractall("/tmp")
zf.close()

In [None]:
DATASET_PATH = "/tmp/audio/"

In [None]:
def decode_audio(audio_path):
  audio, _ = librosa.load(audio_path)
  return audio

def get_label(file_path):
  string_split = os.path.split(file_path)
  result = re.findall(r"^\w*", string_split[-1])[0]
  return result

def audio_signal_and_label(file_path):
  label = get_label(file_path)
  feature = decode_audio(file_path)
  return feature, label

def prepare_dataset(dataset_path):
  json_data = {
      "labels": list(),
      "features": list(),
      "one_hot_labels": list(),
  }

  filenames = os.listdir(dataset_path)
  audio_path = [os.path.join(dataset_path, filename) for filename in filenames]

  for i, path in enumerate(audio_path):
    if os.path.getsize(path) != 0:
      features, labels = audio_signal_and_label(path)

      json_data["labels"].append(labels)
      json_data["features"].append(features)
      json_data["one_hot_labels"].append(i)

      i =+ 1

  return json_data

In [None]:
dataset = prepare_dataset(DATASET_PATH)

In [None]:
df = pd.DataFrame(dataset)
df.head()

In [None]:
features = np.array(df.features)
labels = np.array(df.labels)

x_train, y_train, x_test, y_test = train_test_split(features, labels, test_size=0.2, random_state=1)

x_train = np.expand_dims(x_train, axis=1)
x_test = np.expand_dims(x_test, axis=1)

input_shape = (x_train.shape[0], x_train.shape[1], 1)

In [None]:
def getModel(n_class, input_shape):
  model = tf.keras.Sequential()

  model.add(layers.Input(shape=input_shape))
  model.add(layers.Resizing(32, 32))
  
  model.add(layers.Conv2D(32, (3, 3), activation='relu', padding="same"))
  model.add(layers.MaxPooling2D(pool_size=(2, 2), padding="same"))

  model.add(layers.Conv2D(64, (3, 3), activation='relu', padding="same"))
  model.add(layers.MaxPooling2D(pool_size=(2, 2), padding="same"))
  
  model.add(layers.Conv2D(128, (3, 3), activation='relu', padding="same"))
  model.add(layers.MaxPooling2D(pool_size=(2, 2), padding="same"))
  
  model.add(layers.Flatten())

  model.add(layers.Dense(128, activation='relu'))
  model.add(layers.Dropout(0.5))

  model.add(layers.Dense(64, activation='relu'))
  model.add(layers.Dropout(0.5))

  model.add(layers.Dense(n_class, activation='softmax'))

  return model

In [None]:
LR = 1e-4
OPTIMIZER = tf.keras.optimizers.Adam(learning_rate=LR)
LOSS = tf.keras.losses.sparse_categorical_crossentropy

model = getModel(len(y_train), input_shape)

model.compile(optimizer=OPTIMIZER,
              loss=LOSS,
              metrics=["accuracy"])

model.summary()

In [None]:
EPOCHS = 15

model.fit(x_train, 
          y_train, 
          epochs=EPOCHS, 
          validation_data=(x_test, y_test), 
          batch_size=128)