learn from https://www.tensorflow.org/tutorials/images/transfer_learning

In [1]:
import os
import numpy as np
import pandas as pd
from matplotlib import pyplot as plt
import random
import tensorflow as tf
from tensorflow import keras
import keras.layers as tfl
import seaborn as sn
import dataset

Get Japanese data

In [2]:
# get Japanese data
# interfacce function to get splited dataset
# resize all audio to (19, 1841) matrix
# 1841 is the max size of Japanese audio
dataset_collection = dataset.getMFCCDataset(train_size = 0.6, valid_size = 0.2, test_size = 0.2, cut = False, max_wid = 32, max_len = 1841)

jap_X_train = dataset_collection['X_train']
jap_Y_train = dataset_collection['Y_train']
jap_X_valid = dataset_collection['X_valid']
jap_Y_valid = dataset_collection['Y_valid']
jap_X_test = dataset_collection['X_test']
jap_Y_test = dataset_collection['Y_test']

jap_X_train_norm = dataset_collection['X_train_norm']
jap_X_valid_norm = dataset_collection['X_valid_norm']
jap_X_test_norm = dataset_collection['X_test_norm']

# convert array to tensor
# jap_X_train_norm = tf.convert_to_tensor(jap_X_train_norm)
# jap_X_valid_norm = tf.convert_to_tensor(jap_X_valid_norm)
# jap_X_test_norm = tf.convert_to_tensor(jap_X_test_norm)
# jap_Y_train = tf.convert_to_tensor(jap_Y_train)
# jap_Y_valid = tf.convert_to_tensor(jap_Y_valid)
# jap_Y_test = tf.convert_to_tensor(jap_Y_test)

# jap_X_train = tf.convert_to_tensor(jap_X_train)
# jap_X_valid = tf.convert_to_tensor(jap_X_valid)
# jap_X_test = tf.convert_to_tensor(jap_X_test)

  X_train_norm = (X_train - X_train_miu)/X_train_std
  X_valid_norm = (X_valid - X_valid_miu)/X_valid_std
  X_test_norm = (X_test - X_test_miu)/X_test_std


In [3]:
# input pic size is same as spec matrix size (19, 1841)
# new dim = 3, because it is required by MobileNetV2
input_shape = (jap_X_train_norm.shape[1], jap_X_train_norm.shape[2], 3)
print(input_shape)

(32, 1841, 3)


In [7]:
# transfer dataset from array to list
# X_train_norm_list = []
# for i in range(len(jap_X_train_norm)):
#     temp_feature_batch = np.resize(jap_X_train_norm[i], input_shape)
#     X_train_norm_list.append(temp_feature_batch)

# X_valid_norm_list = []
# for i in range(len(jap_X_valid_norm)):
#     temp_feature_batch = np.resize(jap_X_valid_norm[i], input_shape)
#     X_valid_norm_list.append(temp_feature_batch)

# X_test_norm_list = []
# for i in range(len(jap_X_test_norm)):
#     temp_feature_batch = np.resize(jap_X_test_norm[i], input_shape)
#     X_test_norm_list.append(temp_feature_batch)

# Y_train_list = list(jap_Y_train)
# Y_valid_list = list(jap_Y_valid)
# Y_test_list = list(jap_Y_test)


In [4]:
jap_X_train_norm_resize = np.resize(jap_X_train_norm, (len(jap_X_train_norm), jap_X_train_norm.shape[1], jap_X_train_norm.shape[2], 3))
jap_X_valid_norm_resize = np.resize(jap_X_valid_norm, (len(jap_X_valid_norm), jap_X_valid_norm.shape[1], jap_X_valid_norm.shape[2], 3))
jap_X_test_norm_resize = np.resize(jap_X_test_norm, (len(jap_X_test_norm), jap_X_test_norm.shape[1], jap_X_test_norm.shape[2], 3))

# jap_Y_train_resize = np.resize(jap_Y_train, (540, 1, 3))
# jap_Y_valid_resize = np.resize(jap_Y_valid, (180, 1, 3))
# jap_Y_test_resize = np.resize(jap_Y_test, (180, 1, 3))

In [5]:
# X_train_norm = tf.convert_to_tensor(np.array(X_train_norm_list))
# X_valid_norm = tf.convert_to_tensor(np.array(X_valid_norm_list))
# X_test_norm = tf.convert_to_tensor(np.array(X_test_norm_list))

X_train_norm = tf.convert_to_tensor(np.array(jap_X_train_norm_resize))
X_valid_norm = tf.convert_to_tensor(np.array(jap_X_valid_norm_resize))
X_test_norm = tf.convert_to_tensor(np.array(jap_X_test_norm_resize))

Y_train = tf.convert_to_tensor(jap_Y_train)
Y_valid = tf.convert_to_tensor(jap_Y_valid)
Y_test = tf.convert_to_tensor(jap_Y_test)

# Y_train = tf.convert_to_tensor(jap_Y_train_resize)
# Y_valid = tf.convert_to_tensor(jap_Y_valid_resize)
# Y_test = tf.convert_to_tensor(jap_Y_test_resize)

Load pretrained model

use model 'MobileNetV2'

In [6]:
# load pretrained model
# weights = 'imagenet', means use pretrained parameters
base_model = tf.keras.applications.MobileNetV2(input_shape=input_shape,
                                               include_top=False,
                                               weights='imagenet')

base_model.summary()

Model: "mobilenetv2_1.00_224"
__________________________________________________________________________________________________
 Layer (type)                   Output Shape         Param #     Connected to                     
 input_1 (InputLayer)           [(None, 32, 1841, 3  0           []                               
                                )]                                                                
                                                                                                  
 Conv1 (Conv2D)                 (None, 16, 921, 32)  864         ['input_1[0][0]']                
                                                                                                  
 bn_Conv1 (BatchNormalization)  (None, 16, 921, 32)  128         ['Conv1[0][0]']                  
                                                                                                  
 Conv1_relu (ReLU)              (None, 16, 921, 32)  0           ['bn_Conv1[0][

Advanced features are extraxted by MobileNetV2

In [None]:
# feature extracted
feature_batch = []
for i in range(len(jap_X_train_norm)):
    temp_feature_batch = np.resize(jap_X_train_norm[i], (1, 32, 1841, 3))
    ith_feature_batch = base_model(temp_feature_batch)
    feature_batch.append(ith_feature_batch)


In [None]:
feature_batch_average = []
for i in range(len(feature_batch)):
    ith_feature_batch_average = tf.keras.layers.GlobalAveragePooling2D()(feature_batch[i])
    feature_batch_average.append(ith_feature_batch_average)
print(np.array(feature_batch_average).shape)

(540, 1, 1280)


Apply pretrained model on Japanese dataset

In [7]:
def pretrained_model(input_shape):
    # keep pretrained model MobileNetV2 untrainable
    base_model.trainable = False
    
    inputs = tf.keras.Input(input_shape)
    x = tf.keras.applications.mobilenet_v2.preprocess_input(inputs) # higher accuracy (0.4 > 0.3) without preprocess, weird
    # set training to False to avoid keeping track of statistics in the batch norm layer
    x = base_model(inputs, training=False) 
    
    # # add the new category classification layers
    # # use global avg pooling to summarize the info in each channel
    x = tf.keras.layers.GlobalAveragePooling2D()(x)

    # # include dropout with probability of 0.2 to avoid overfitting
    x = tf.keras.layers.Dropout(0.2)(x)

    # # use a prediction layer with three neuron (as three emotions in Japanese dataset)
    outputs = tf.keras.layers.Dense(3)(x)
    
    model = tf.keras.Model(inputs, outputs)

    return model

In [8]:
base_learning_rate = 0.01
jap_pretrained_model = pretrained_model(input_shape)
jap_pretrained_model.compile(optimizer=tf.keras.optimizers.Adam(learning_rate = base_learning_rate),
                loss='categorical_crossentropy',
                metrics=['Accuracy'])
            #   loss=tf.keras.losses.categorical_crossentropy,
            #   metrics=[tf.keras.metrics.Accuracy()])
jap_pretrained_model.summary()

Model: "model"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 input_2 (InputLayer)        [(None, 32, 1841, 3)]     0         
                                                                 
 mobilenetv2_1.00_224 (Funct  (None, 1, 58, 1280)      2257984   
 ional)                                                          
                                                                 
 global_average_pooling2d (G  (None, 1280)             0         
 lobalAveragePooling2D)                                          
                                                                 
 dropout (Dropout)           (None, 1280)              0         
                                                                 
 dense (Dense)               (None, 3)                 3843      
                                                                 
Total params: 2,261,827
Trainable params: 3,843
Non-trainable

In [9]:
history = jap_pretrained_model.fit(X_train_norm,
                    Y_train,
                    epochs=20,
                    validation_data=(X_train_norm, Y_train))

Epoch 1/20
Epoch 2/20

In [None]:
df_loss_acc = pd.DataFrame(history.history)
df_loss= df_loss_acc[['loss','val_loss']]
df_loss.rename(columns={'loss':'train','val_loss':'validation'},inplace=True)
df_acc= df_loss_acc[['accuracy','val_accuracy']]
df_acc.rename(columns={'accuracy':'train','val_accuracy':'validation'},inplace=True)
df_loss.plot(title='Model loss',figsize=(6,4)).set(xlabel='Epoch',ylabel='Loss')
df_acc.plot(title='Model Accuracy',figsize=(6,4)).set(xlabel='Epoch',ylabel='Accuracy')

fine tune

In [None]:
def transfer_pretrained_model(input_shape):
    # keep pretrained model MobileNetV2 trainable
    base_model.trainable = True

    # Fine-tune from this layer onwards
    # total layer number of MobileNetV2 is 154
    fine_tune_at = 151

    # Freeze all the layers before the `fine_tune_at` layer
    for layer in base_model.layers[:fine_tune_at]:
      layer.trainable = False
    
    inputs = tf.keras.Input(input_shape)
    x = tf.keras.applications.mobilenet_v2.preprocess_input(inputs)
    # set training to False to avoid keeping track of statistics in the batch norm layer
    x = base_model(x, training=False) 
    
    # # add the new category classification layers
    # # use global avg pooling to summarize the info in each channel
    x = tf.keras.layers.GlobalAveragePooling2D()(x)

    # # include dropout with probability of 0.2 to avoid overfitting
    x = tf.keras.layers.Dropout(0.2)(x)

    # # use a prediction layer with three neuron (as three emotions in Japanese dataset)
    outputs = tf.keras.layers.Dense(3)(x)
    
    model = tf.keras.Model(inputs, outputs)

    return model

In [None]:
jap_transfer_pretrained_model = transfer_pretrained_model(input_shape)
jap_transfer_pretrained_model.compile(optimizer=tf.keras.optimizers.Adam(learning_rate = base_learning_rate),
                loss='categorical_crossentropy',
                metrics=['Accuracy'])
            #   loss=tf.keras.losses.categorical_crossentropy,
            #   metrics=[tf.keras.metrics.Accuracy()])
jap_transfer_pretrained_model.summary()

In [None]:
transfer_history = transfer_pretrained_model.fit(
                    X_train_norm,
                    Y_train,
                    epochs=20,
                    validation_data=(X_train_norm, Y_train))

In [None]:
df_loss_acc = pd.DataFrame(transfer_history.history)
df_loss= df_loss_acc[['loss','val_loss']]
df_loss.rename(columns={'loss':'train','val_loss':'validation'},inplace=True)
df_acc= df_loss_acc[['accuracy','val_accuracy']]
df_acc.rename(columns={'accuracy':'train','val_accuracy':'validation'},inplace=True)
df_loss.plot(title='Model loss',figsize=(6,4)).set(xlabel='Epoch',ylabel='Loss')
df_acc.plot(title='Model Accuracy',figsize=(6,4)).set(xlabel='Epoch',ylabel='Accuracy')