# Audio Classifier

Train a CNN based classifier with __TensorFlow__ and __Teal__ on GTZAN Music Speech dataset

Install Teal:

In [None]:
!pip install git+https://github.com/am1tyadav/teal

Restart kernel for installation to take effect

In [None]:
import IPython

IPython.Application.instance().kernel.do_shutdown(True)

Import TensorFlow and Teal after the kernel restarts

In [None]:
import tensorflow as tf
import tensorflow_datasets as tfds
import teal

## Download GTZAN Dataset

In [None]:
dataset = tfds.load(name="gtzan_music_speech", data_dir="./tmp")

In [None]:
def process_example(example):
    audio = example["audio"]
    label = example["label"]
    audio = tf.cast(audio, dtype=tf.float32) / 32768.
    label = tf.cast(label, dtype=tf.float32)
    
    audio_splits = tf.split(audio, num_or_size_splits=10, axis=0)
    label = tf.repeat(label, repeats=10)
    return audio_splits, label

There are 64 files per class and we divide each file in 10 examples making 640 examples per class

In [None]:
BATCH_SIZE = 4

train_dataset = dataset["train"]
train_dataset = train_dataset.map(process_example)
train_dataset = train_dataset.unbatch()

In [None]:
TOTAL_EXAMPLES = len(train_dataset)

train_dataset = train_dataset.shuffle(buffer_size=TOTAL_EXAMPLES).batch(BATCH_SIZE)

## Models

### Feature Model - Log Mel Spectrogram

In [None]:
SAMPLE_LEN = 66150
SAMPLE_RATE = 22050
N_FFT = 1024
HOP_LEN = 512
N_MELS = 128


feature_model = tf.keras.models.Sequential([
    tf.keras.layers.Input(shape=(SAMPLE_LEN, )),
    teal.NormalizeAudio(),
    teal.AudioToMelSpectrogram(SAMPLE_RATE, N_FFT, HOP_LEN, N_MELS),
    teal.PowerToDb(),
    teal.NormalizeSpectrum()
], name="feature_model")

feature_model.summary()

### Augmentation Model

In [None]:
aug_model = tf.keras.models.Sequential([
    tf.keras.layers.Input(shape=(SAMPLE_LEN, )),
    teal.InversePolarity(0.5),
    teal.RandomGain(0.2),
    teal.RandomNoise(0.4),
    teal.PitchShift(0.5, 200),
    teal.RandomGain(0.2)
], name="augmentation_model")

aug_model.summary()

### CNN Model

In [None]:
cnn = tf.keras.models.Sequential([
    tf.keras.layers.Input(shape=(128, 128)),
    tf.keras.layers.Lambda(lambda x: tf.expand_dims(x, axis=-1)),
    tf.keras.layers.Conv2D(16, 3, padding="same", strides=2, activation="relu"),
    tf.keras.layers.Conv2D(32, 3, padding="same", strides=2, activation="relu"),
    tf.keras.layers.BatchNormalization(),
    tf.keras.layers.Conv2D(64, 3, padding="same", strides=2, activation="relu"),
    tf.keras.layers.Conv2D(128, 3, padding="same", strides=2, activation="relu"),
    tf.keras.layers.BatchNormalization(),
    tf.keras.layers.Flatten(),
    tf.keras.layers.Dense(64, activation="relu"),
    tf.keras.layers.Dropout(0.4),
    tf.keras.layers.Dense(1, activation="sigmoid")
], name="cnn")

cnn.summary()

### Composite Model

In [None]:
_input = tf.keras.layers.Input(shape=(SAMPLE_LEN, ))
_data = aug_model(_input)
_feature = feature_model(_data)
_output = cnn(_feature)

model = tf.keras.models.Model(_input, _output, name="composite_model")
model.compile(
    loss="binary_crossentropy",
    optimizer="adam",
    metrics=["accuracy"]
)

model.summary()

## Training

In [None]:
_ = model.fit(
    train_dataset,
    epochs=2
)

## Saving Model for Production

You probably don't want the augmentation model/ layers in your deployed model!

In [None]:
_input = model.input

_feature = model.layers[2](_input)
_output = model.layers[3](_feature)

model = tf.keras.models.Model(_input, _output, name="prod_model")
model.summary()

## Get Some Predictions

In [None]:
examples, labels = next(iter(train_dataset))

preds = model.predict(examples)

preds > 0.5

In [None]:
(labels > 0.5).numpy()