VGG16 model - ALL DATA
Creation of VGG16 model for sound recognition
Dataset includes Freesound, UrbanSound8k and some custom sounds generated from an iPhone

In [None]:
from google.colab import drive #Only if you are using Google Drive
drive.mount('/content/gdrive')
drive.mount("/content/gdrive", force_remount=True)
%cd /content/gdrive/My\ Drive/

Mounted at /content/gdrive
Mounted at /content/gdrive
/content/gdrive/My Drive


In [None]:
import matplotlib.pyplot as plt
import matplotlib.pylab as plabt
import numpy as np
import os
import PIL
import time
import datetime
import PIL.Image as Image

import tensorflow as tf
import tensorflow_hub as hub
from tensorflow import keras
from tensorflow.keras import layers, datasets, models, losses
from tensorflow.keras.models import Sequential

#from tensorboardcolab import TensorBoardColab, TensorBoardColabCallback


%load_ext tensorboard

In [None]:
batch_size = 32
img_height = 224
img_width = 224
#BATCH_SIZE = 255
IMG_SIZE = (224, 224)

#train_dir = '/content/gdrive/MyDrive/content/img_dir/combined_data/train'
#validation_dir = '/content/gdrive/MyDrive/content/img_dir/combined_data/test/'
data_dir = '/content/gdrive/MyDrive/content/img_dir/combined_data/all_labeled/'


Get VGG16 model without the top layers

In [None]:
feature_extractor_model = tf.keras.applications.VGG16(weights = 'imagenet', include_top = False, input_shape = (img_width,img_height,3))
for layer in feature_extractor_model.layers:
  layer.trainable = False
feature_extractor_model.summary()

Downloading data from https://storage.googleapis.com/tensorflow/keras-applications/vgg16/vgg16_weights_tf_dim_ordering_tf_kernels_notop.h5
Model: "vgg16"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 input_1 (InputLayer)        [(None, 224, 224, 3)]     0         
                                                                 
 block1_conv1 (Conv2D)       (None, 224, 224, 64)      1792      
                                                                 
 block1_conv2 (Conv2D)       (None, 224, 224, 64)      36928     
                                                                 
 block1_pool (MaxPooling2D)  (None, 112, 112, 64)      0         
                                                                 
 block2_conv1 (Conv2D)       (None, 112, 112, 128)     73856     
                                                                 
 block2_conv2 (Conv2D)       (None, 112, 112, 128)    

Dataset

In [None]:
train_ds = tf.keras.utils.image_dataset_from_directory(
  data_dir,
  validation_split=0.2,
  subset="training",
  seed=123,
  image_size=(img_height, img_width),
  batch_size=batch_size)
#train_ds = tf.keras.applications.vgg16.preprocess_input(train_ds)

val_ds = tf.keras.utils.image_dataset_from_directory(
  data_dir,
  validation_split=0.2,
  subset="validation",
  seed=123,
  image_size=(img_height, img_width),
  batch_size=batch_size)
#val_ds = tf.keras.applications.vgg16.preprocess_input(val_ds)

data_augmentation = tf.keras.Sequential([
  tf.keras.layers.RandomFlip('horizontal'),
  tf.keras.layers.RandomRotation(0.2),
])

class_names = np.array(train_ds.class_names)
print(class_names)


Found 49449 files belonging to 217 classes.
Using 39560 files for training.
Found 49449 files belonging to 217 classes.
Using 9889 files for validation.
['Accelerating_and_revving_and_vroom' 'Accordion' 'Acoustic_guitar'
 'Aircraft' 'Alarm' 'Animal' 'Applause' 'Bark' 'Bass_drum' 'Bass_guitar'
 'Bathtub_filling_or_washing' 'Bell' 'Bicycle' 'Bicycle_bell' 'Bird'
 'Bird_vocalization_and_bird_call_and_bird_song' 'Boat_and_Water_vehicle'
 'Boiling' 'Boom' 'Bowed_string_instrument' 'Brass_instrument' 'Breathing'
 'Burping_and_eructation' 'Bus' 'Buzz' 'Camera' 'Car' 'Car_passing_by'
 'Cat' 'Chatter' 'Cheering' 'Chewing_and_mastication'
 'Chicken_and_rooster' 'Child_speech_and_kid_speaking' 'Chime'
 'Chink_and_clink' 'Chirp_and_tweet' 'Chuckle_and_chortle' 'Church_bell'
 'Clapping' 'Clock' 'Coin_dropping' 'Computer_keyboard' 'Conversation'
 'Cough' 'Cowbell' 'Crack' 'Crackle' 'Crash_cymbal' 'Cricket' 'Crow'
 'Crowd' 'Crumpling_and_crinkling' 'Crushing' 'Crying_and_sobbing'
 'Cupboard_open_or_c

In [None]:
normalization_layer = tf.keras.layers.Rescaling(1./255)
train_ds = train_ds.map(lambda x, y: (normalization_layer(x), y)) # Where x—images, y—labels.
val_ds = val_ds.map(lambda x, y: (normalization_layer(x), y)) # Where x—images, y—labels.

AUTOTUNE = tf.data.AUTOTUNE
train_ds = train_ds.cache().prefetch(buffer_size=AUTOTUNE)
val_ds = val_ds.cache().prefetch(buffer_size=AUTOTUNE)

for image_batch, labels_batch in train_ds:
  print(image_batch.shape)
  print(labels_batch.shape)
  break
  

(32, 224, 224, 3)
(32,)


Wrap the pre-trained VGG16 model as a Keras layer with hub. Use the trainable=False argument to freeze the variables, so that the training only modifies the new classifier layer:

In [None]:
feature_extractor_layer = hub.KerasLayer(
    feature_extractor_model,
    input_shape=(224, 224, 3),
    trainable=False)

feature_batch = feature_extractor_layer(image_batch)
print(feature_batch.shape)


(32, 7, 7, 512)


Create a new sequential model using the pre-trained model hub as the first layer an new classification top layer

In [None]:
num_classes = len(class_names)

model = tf.keras.Sequential([
  feature_extractor_layer,
  tf.keras.layers.Dense(num_classes)
])

model.summary()

Model: "sequential_1"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 keras_layer (KerasLayer)    (None, 7, 7, 512)         14714688  
                                                                 
 dense (Dense)               (None, 7, 7, 217)         111321    
                                                                 
Total params: 14,826,009
Trainable params: 111,321
Non-trainable params: 14,714,688
_________________________________________________________________


Attach a classification head

In [None]:
predictions = model(image_batch)
predictions.shape

TensorShape([32, 7, 7, 217])

**Train the model**

Use Model.compile to configure the training process and add a tf.keras.callbacks.TensorBoard callback to create and store logs:

In [None]:
# changed default adam optimizer to a very low learning rate
#   optimizer=tf.keras.optimizers.Adam(1e-5),
model.compile(
  optimizer=tf.keras.optimizers.Adam(1e-3),
  loss=tf.keras.losses.SparseCategoricalCrossentropy(from_logits=True),
  metrics=['acc'])

log = "/content/gdrive/MyDrive/content/img_dir/logs/vgg16/all_combined/fit/" + datetime.datetime.now().strftime("%Y%m%d-%H%M%S")
tensorboard_callback = tf.keras.callbacks.TensorBoard(log_dir=log, histogram_freq=1) # Enable histogram computation for every epoch.

Now use the Model.fit method to train the model.

To visualize the training progress in TensorBoard later, create and store logs an a TensorBoard callback.

In [None]:
 %reload_ext tensorboard

In [None]:
!pip install tensorboardcolab

Looking in indexes: https://pypi.org/simple, https://us-python.pkg.dev/colab-wheels/public/simple/
Collecting tensorboardcolab
  Downloading tensorboardcolab-0.0.22.tar.gz (2.5 kB)
Building wheels for collected packages: tensorboardcolab
  Building wheel for tensorboardcolab (setup.py) ... [?25l[?25hdone
  Created wheel for tensorboardcolab: filename=tensorboardcolab-0.0.22-py3-none-any.whl size=3859 sha256=16a6bbcf102ae3abab73ac6e78093d45dbbd92677262d27ada3d34453dadb9b4
  Stored in directory: /root/.cache/pip/wheels/69/4e/4a/1c6c267395cb10edded1050df12af165d3254cfce324e80941
Successfully built tensorboardcolab
Installing collected packages: tensorboardcolab
Successfully installed tensorboardcolab-0.0.22


In [None]:
from tensorboardcolab import TensorBoardColab, TensorBoardColabCallback

In [None]:
NUM_EPOCHS = 40

history = model.fit(train_ds,
                    validation_data=val_ds,
                    epochs=NUM_EPOCHS,
                    callbacks=tensorboard_callback)


Got `type(handle)`: <class 'keras.engine.functional.Functional'>
Epoch 1/40


InvalidArgumentError: ignored