# Training vgg11 on gammatone spectograms

In [None]:
# cd ..
!pwd

In [None]:
%cd ..
%cd openmic-2018

In [None]:
import json
import os
import numpy as np
import pandas as pd
import tensorflow as tf

from keras.callbacks import EarlyStopping, ModelCheckpoint, ReduceLROnPlateau
from sklearn.metrics import multilabel_confusion_matrix, classification_report

## data loading and functions

In [None]:
data = np.load("openmic-2018.npz", allow_pickle = True)
X, Y_true, Y_mask, sample_key = data['X'], data['Y_true'], data['Y_mask'], data['sample_key']
print(X.shape, Y_true.shape, Y_mask.shape, sample_key.shape)

In [None]:
print(Y_true[0])
Y_true1 = np.where(Y_true > 0.5, 1, 0)
print(Y_true1[0])

In [None]:
IMG_SIZE = 224 # Specify height and width of image to match the input format of the model
CHANNELS = 3 # Keep RGB color channels to match the input format of the model

In [None]:
def parse_function(filename, label):
    """Function that returns a tuple of normalized image array and labels array.
    Args:
        filename: string representing path to image
        label: 0/1 one-dimensional array of size N_LABELS
    """
    # Read an image from a file
    image_string = tf.io.read_file(filename)
    # Decode it into a dense vector
    image_decoded = tf.image.decode_jpeg(image_string, channels=CHANNELS)
    # Resize it to fixed shape
    image_resized = tf.image.resize(image_decoded, [IMG_SIZE, IMG_SIZE])
    # Normalize it from [0, 255] to [0.0, 1.0]
    image_normalized = image_resized / 255.0
    return image_normalized, label

In [None]:
BATCH_SIZE = 32 # Big enough to measure an F1-score
AUTOTUNE = tf.data.experimental.AUTOTUNE # Adapt preprocessing and prefetching dynamically to reduce GPU and CPU idle time
SHUFFLE_BUFFER_SIZE = 128 # Shuffle the training data by a chunck of 128 observations

In [None]:
def create_dataset(filenames, labels, is_training=True):
    """Load and parse dataset.
    Args:
        filenames: list of image paths
        labels: numpy array of shape (BATCH_SIZE, N_LABELS)
        is_training: boolean to indicate training mode
    """
    
    # Create a first dataset of file paths and labels
    dataset = tf.data.Dataset.from_tensor_slices((filenames, labels))
    # Parse and preprocess observations in parallel
    dataset = dataset.map(parse_function, num_parallel_calls=AUTOTUNE)
    
    # if is_training == True:
    #     # This is a small dataset, only load it once, and keep it in memory.
    #     dataset = dataset.cache()
    # Shuffle the data each buffer size
    # dataset = dataset.shuffle(buffer_size=SHUFFLE_BUFFER_SIZE)
        
    # Batch the data for multiple steps
    dataset = dataset.batch(BATCH_SIZE)
    # Fetch batches in the background while the model is training.
    dataset = dataset.prefetch(buffer_size=AUTOTUNE)
    
    return dataset

In [None]:
# Load training split csv file
with open("partitions/split01_train.csv") as f:
    train_IDs = f.readlines()
    train_IDs = np.array([ID.strip() for ID in train_IDs])
print(train_IDs.shape)

# Load test split csv file
with open("partitions/split01_test.csv") as f:
    test_IDs = f.readlines()
    test_IDs = np.array([ID.strip() for ID in test_IDs])
print(test_IDs.shape)

# Get the training and testing split data into np arrays
# Creates an array, which contains the index of the sample in data.  
train_index = np.array([i for i in range(20000) if sample_key[i] in train_IDs])
test_index = np.array([i for i in range(20000) if sample_key[i] in test_IDs])

print(train_index.shape)
print(test_index[1:10])

In [None]:
train_paths = [f"/home/studio-lab-user/sagemaker-studiolab-notebooks/ESP3201-Instrument-indentification/openmic-2018/audio-gammatone/{i[:3]}/{i}.png" for i in train_IDs]
print(train_paths[1])

test_paths = [f"/home/studio-lab-user/sagemaker-studiolab-notebooks/ESP3201-Instrument-indentification/openmic-2018/audio-gammatone/{i[:3]}/{i}.png" for i in test_IDs]
print(test_paths[1])

train_labels = [Y_true1[i] for i in train_index]
print(train_labels[1])

test_labels = [Y_true1[i] for i in test_index]
print(test_labels[1])

In [None]:
x_train = create_dataset(train_paths, train_labels)
print(x_train)

x_test = create_dataset(test_paths, test_labels)
print(x_test)

In [None]:
# test the dataloader
for element in x_train.as_numpy_iterator():
    print(element[1].shape)
    break

## model

In [None]:
# import necessary layers  
from tensorflow.keras.layers import Input, Conv2D 
from tensorflow.keras.layers import MaxPool2D, Flatten, Dense 
from tensorflow.keras import Model
# input

input = Input(shape =(224,224,3))
# 1st Conv Block
x = Conv2D (filters =64, kernel_size =3, padding ='same', activation='relu')(input)
x = MaxPool2D(pool_size =2, strides =2, padding ='same')(x)

# 2nd Conv Block
x = Conv2D (filters =128, kernel_size =3, padding ='same', activation='relu')(x)
x = MaxPool2D(pool_size =2, strides =2, padding ='same')(x)

# 3rd Conv block
x = Conv2D (filters =256, kernel_size =3, padding ='same', activation='relu')(x)
x = Conv2D (filters =256, kernel_size =3, padding ='same', activation='relu')(x)
x = MaxPool2D(pool_size =2, strides =2, padding ='same')(x)

# 4th Conv block
x = Conv2D (filters =512, kernel_size =3, padding ='same', activation='relu')(x)
x = Conv2D (filters =512, kernel_size =3, padding ='same', activation='relu')(x)
x = MaxPool2D(pool_size =2, strides =2, padding ='same')(x)

# Fully connected layers
x = Flatten()(x)
x = Dense(units = 4096, activation ='relu')(x)
x = Dense(units = 4096, activation ='relu')(x)
output = Dense(units = 20, activation ='sigmoid')(x)

# creating the model

model = Model (inputs=input, outputs =output)
model.summary()


In [None]:
LR = 1e-5 # Keep it small when transfer learning
EPOCHS = 40
loss_fn = tf.keras.losses.BinaryCrossentropy()

In [None]:
model.compile(
  optimizer=tf.keras.optimizers.Adam(learning_rate=LR),
  loss=loss_fn, metrics = ['Recall', 'Precision'])

In [None]:
earlyStopping = EarlyStopping(monitor='val_loss', patience=10, verbose=0, mode='min')
mcp_save = ModelCheckpoint('mdl_wts.hdf5', save_best_only=True, monitor='val_loss', mode='min')
reduce_lr_loss = ReduceLROnPlateau(monitor='val_loss', factor=0.1, patience=7, verbose=1, epsilon=1e-4, mode='min')

# train
history = model.fit(x_train, epochs = EPOCHS, callbacks=[earlyStopping, reduce_lr_loss], validation_data = x_test)

## visualise results

In [None]:
# list all data in history
print(history.history.keys())

In [None]:
import matplotlib.pyplot as plt
# summarize history for loss
plt.plot(history.history['loss'])
plt.plot(history.history['val_loss'])
plt.title('model loss')
plt.ylabel('loss')
plt.xlabel('epoch')
plt.legend(['train', 'test'], loc='upper left')
plt.show()

In [None]:
y_pred = model.predict(x_test, batch_size = 1)
print(y_pred[0])
y_pred1 = np.where(y_pred > 0.5, 1, 0)
print(y_pred1[0])

In [None]:
print(multilabel_confusion_matrix(test_labels, y_pred1))

In [None]:
print(classification_report(test_labels, y_pred1))