# Transfer learning with Yamnet

### Colab preparations

In [None]:
! git clone https://github.com/fbnspl/rfcx-rainforest.git
%cd rfcx-rainforest/yamnet/
# ! pip install tensorflow-io==0.16

### Imports

In [None]:
# Imports.
import numpy as np
import pandas as pd
import librosa
import glob
import pickle
import matplotlib.pyplot as plt
from tqdm import tqdm
from pathlib import Path
# import tensorflow_io as tfio
import tensorflow as tf
import tensorflow.keras.layers.experimental.preprocessing as kp

from sklearn.model_selection import train_test_split

from functions.augment import time_mask, freq_mask, mixup_one_hot
from functions.metrics import LWLRAP

import params as yamnet_params
import yamnet as yamnet_model


---

## 0. Load data dict from pickle

In [None]:
with open('data/data_train.pickle', 'rb') as f:
    data_train = pickle.load(f)

print(data_train.keys())

In [None]:
with open('data/data_val.pickle', 'rb') as f:
    data_val = pickle.load(f)

print(data_val.keys())

## 6. Transfer learn with yamnet

In [None]:
def build_model():
    # The graph is designed for a sampling rate of 16 kHz, but higher rates should work too.
    # We also generate scores at a 10 Hz frame rate.
    # Set up the YAMNet model.
    params = yamnet_params.Params(sample_rate=16000, patch_hop_seconds=0.1)
    class_names = yamnet_model.class_names('yamnet_class_map.csv')
    yamnet = yamnet_model.yamnet_frames_model(params)
    yamnet.load_weights('yamnet.h5')

    # get layers from yamnet
    layers = [l for l in yamnet.layers]
    core_layers = layers[79:-2]

    # add new imput layer
    input_layer = tf.keras.Input(shape=(96, 64, 1), name='Input')
    x = kp.RandomContrast(factor=0.2)(input_layer)

    # attach layer again from convolutions on
    for i, layer in enumerate(core_layers):
        x = layer(x)
        
    # add new prediction layer
    x = tf.keras.layers.Dense(24, activation='sigmoid')(x)

    # construct model
    yamnet_tl = tf.keras.Model(inputs=input_layer, outputs=x)

    '''
    # freeze some layers 
    for layer in yamnet_tl.layers[:50]:
        layer.trainable =  False
    '''
    
    return yamnet_tl

#### Make dataset

In [None]:
# autotune computation
AUTOTUNE = tf.data.experimental.AUTOTUNE

train_dataset = tf.data.Dataset.from_tensor_slices((data_train['X_train'], data_train['y_one_hot_train']))
n_mels, n_frames, n_channels = train_dataset.element_spec[0].shape


train_dataset = train_dataset.cache()
train_dataset = train_dataset.shuffle(buffer_size=4096)
train_dataset = train_dataset.map(lambda mel_spec, y: (tf.cast(mel_spec, tf.float32), tf.cast(y, tf.float32)), num_parallel_calls=AUTOTUNE)
train_dataset = train_dataset.map(lambda mel_spec, y: (tf.squeeze(mel_spec, axis=2), y), num_parallel_calls=AUTOTUNE)

# MIXUP
train_dataset = train_dataset.batch(32)
train_dataset = train_dataset.map(lambda mel_spec, y: mixup_one_hot(mel_spec, y, 0.5), num_parallel_calls=AUTOTUNE)
train_dataset = train_dataset.unbatch()

# SPEC AUGMENTATIONS
# train_dataset = train_dataset.map(lambda mel_spec, y: (tf.roll(mel_spec, tf.random.uniform((), minval=-15, maxval=15, dtype=tf.dtypes.int32), axis=1), y), num_parallel_calls=AUTOTUNE)
# train_dataset = train_dataset.map(lambda mel_spec, y: (time_mask(mel_spec, param=int(n_frames * 0.1)), y), num_parallel_calls=AUTOTUNE)
# train_dataset = train_dataset.map(lambda mel_spec, y: (time_mask(mel_spec, param=int(n_frames * 0.1)), y), num_parallel_calls=AUTOTUNE)
# train_dataset = train_dataset.map(lambda mel_spec, y: (freq_mask(mel_spec, param=int(n_mels * 0.1)), y), num_parallel_calls=AUTOTUNE)
# train_dataset = train_dataset.map(lambda mel_spec, y: (freq_mask(mel_spec, param=int(n_mels * 0.1)), y), num_parallel_calls=AUTOTUNE)
# train_dataset = train_dataset.map(lambda mel_spec, y: (freq_mask(mel_spec, param=int(n_mels * 0.1)), y), num_parallel_calls=AUTOTUNE)


train_dataset = train_dataset.map(lambda mel_spec, y: (tf.expand_dims(mel_spec, axis=2), y), num_parallel_calls=AUTOTUNE)
train_dataset = train_dataset.batch(32)

print(train_dataset)
print(data_train['X_train'].shape)
print(data_train['y_one_hot_train'].shape)


In [None]:
# get model
yamnet_tl = build_model()
# yamnet_tl.summary()

# lwrap metric
metrics = [LWLRAP(num_classes=24), 
           tf.metrics.Precision(), 
           tf.metrics.Recall(), 
           tf.metrics.CategoricalAccuracy()]

# callbacks
early_stopping_cb = tf.keras.callbacks.EarlyStopping(monitor='val_lwlrap', 
                                                     min_delta=0, 
                                                     patience=25, 
                                                     verbose=1, 
                                                     mode='auto', 
                                                     baseline=None, 
                                                     restore_best_weights=True)

reduce_lro_cb = tf.keras.callbacks.ReduceLROnPlateau(monitor='val_lwlrap', 
                                                     factor=0.1, patience=10, verbose=1, 
                                                     mode='auto', min_delta=0.0001, 
                                                     cooldown=0, min_lr=0)

# optimizer
opt = tf.keras.optimizers.Nadam(lr=0.001, clipnorm=1.0)

# compile model
yamnet_tl.compile(optimizer=opt,
              loss='categorical_crossentropy',
              metrics=metrics)
# train model
yamnet_tl.fit(
              # data['X_train'], data['y_one_hot_train'],
              train_dataset,
              epochs=50, 
              verbose=1,
              validation_data=(data_val['X_val'], data_val['y_one_hot_val']))