In [1]:
# -*- coding: utf-8 -*-
"""
create on Wed Sep 16 13:14:29 2020

@author: Mingyu Hsueh

Environment:
    Tensorflow 2.0
    Python 3.8 ++
"""
import warnings
warnings.filterwarnings('ignore', category=FutureWarning)

import datetime
import tensorflow as tf
import numpy as np
import math
import random
from tensorflow import keras
import import_ipynb
from tensorflow.keras.callbacks import ModelCheckpoint, TensorBoard, LambdaCallback
from tensorflow.keras.models import Model
from tensorflow.keras.layers import Conv2D, Input, Layer, Reshape, Lambda, GRU, Bidirectional,\
                                    Flatten, BatchNormalization, TimeDistributed, \
                                    ReLU, GlobalAveragePooling1D, Concatenate, Dense
from utils import normalize, regression, Ge2e_loss, test_similarity, EER_estimate, Ge2eOptimizer 
# , Centroid_matrix, Centroid_matrix_basic

importing Jupyter notebook from utils.ipynb


In [2]:
from tensorflow.compat.v1 import ConfigProto, InteractiveSession

config = ConfigProto()
config.gpu_options.allow_growth = True
config.gpu_options.per_process_gpu_memory_fraction = 0.3
InteractiveSession(config=config)

tf.compat.v1.disable_eager_execution() # disable eager_excution

In [3]:
class Attention(Layer):
    def __init__(self, state_size, **kwargs):
        super(Attention, self).__init__(**kwargs)
        self.hidden = state_size   
        
    def build(self, input_shape):
        # Trainable parameters
        initializer = tf.random_normal_initializer(stddev=0.1)
        self.w_omega = self.add_weight(name="w_omega", shape=[self.hidden, 1024], 
                                       initializer=initializer)
        self.b_omega = self.add_weight(name="b_omega", shape=[1024], initializer=initializer)
        self.u_omega = self.add_weight(name="u_omega", shape=[1024], initializer=initializer)
        super(Attention, self).build(input_shape)
        
    def call(self, inputs):
        v = tf.tanh(tf.tensordot(inputs, self.w_omega, axes=1) + self.b_omega)

        # For each of the timestamps its vector of size A from `v` is reduced with `u` vector
        vu = tf.tensordot(v, self.u_omega, axes=1, name='vu')  # (B,T) shape
        alphas = tf.nn.softmax(vu, name='alphas')         # (B,T) shape

        # Output of (Bi-)RNN is reduced with attention vector; the result has (B,D) shape
        context = tf.reduce_sum(inputs * tf.expand_dims(alphas, -1), 1)
        return context
    
#     def compute_output_shape(self, input_shape):
#         shape = tf.TensorShape(input_shape).as_list()
#         shape[-1] = self.output_dim
#         return tf.TensorShape(shape)

    def get_config(self):
        config = {'output_dim':self.hidden}
        base_config = super(Attention, self).get_config()
        return dict(list(base_config.items())+list(config.items()))

In [4]:
class Network(Model):
    def __init__(self, config, *args, **kwargs):
        super(Network, self).__init__(*args, **kwargs)
        self.mode = False
        
        self.n_layer = config["n_layer"]
        self.rnn_unit = config["hidden_size"]
        self.lr = config["lr"]
        self.dense_unit = config["proj_size"]
        self.M = config["m_file"]
        self.N = config["batch_size"]
        self.timestamps = config["time_stamp"]
        self.feature_dim = config["feature_dim"]
        self.loss_type = config["loss_type"]
        self.steps = config["steps"]
        self.gen_batch_size = config["batch_size"]
        self.total_utt = config["n_train"]
        self.epoch = config["epoch"]
        self.checkpoint_path = config["checkpoint_path"]
        
        self.train_path = config["train_path"]
        self.enroll_path = config["enroll_path"]
        self.eval_path = config["eval_path"]
        self.ground_path = config["ground_path"]
        
        self.inputs=[]
        self.outputs=[]
        self.infer_input=[]
        # layer       
        self.conv1 = Conv2D(filters=16, kernel_size=[5,5], padding='same', name='conv1')
        self.bn1 = BatchNormalization()
        self.relu1 = ReLU()
        
        self.conv2 = Conv2D(filters=16, kernel_size=[5,5], padding='same', name='conv2')
        self.bn2 = BatchNormalization()
        self.relu2 = ReLU()
        
        self.conv3 = Conv2D(filters=32, kernel_size=[5,5], padding='same', name='conv3')
        self.bn3 = BatchNormalization()
        self.relu3 = ReLU()
        
        self.conv4 = Conv2D(filters=32, kernel_size=[5,5], padding='same', name='conv4')
        self.bn4 = BatchNormalization()
        self.relu4 = ReLU()
        
        self.flatten = TimeDistributed(Flatten())
        # input layers
        self.input_layer = Input(shape=(None, self.timestamps, self.feature_dim), name='input')
        self.squeeze_layer = Lambda(lambda x: tf.keras.backend.squeeze(x, axis=0), name='squeeze_input')
        self.expand_layer = Lambda(lambda x: tf.expand_dims(x, axis=-1), name='expand_input')
        
        self.lstm_layers = []
        self.dense_layers = []
        for lstm_idx in range(self.n_layer):
            return_sequences = True
            lstm_layer = Bidirectional(GRU(units=(self.rnn_unit), return_sequences=return_sequences, name="lstm_{}".format(lstm_idx)))
            dense_layer = TimeDistributed(Dense(units=(2*self.dense_unit), name="dense_{}".format(lstm_idx)))            
                
            self.lstm_layers.append(lstm_layer)
            self.dense_layers.append(dense_layer)
        self.embedding_norm_layer = tf.keras.layers.Lambda(lambda x: tf.keras.backend.l2_normalize(x, axis=-1), 
                                                            name='embeding_output')
        
        self.attention = Attention(2*self.dense_unit)
#         self._centroid_layer = Centroid_matrix(self.N, self.M,name='centroids')
#         self.infer_centroid_layer = Centroid_matrix_basic(self.N, self.M,name='infer_centroids')

        self.regression = regression(self.N, self.M, 2*self.dense_unit, True, name='similarity_regression') 
        self.infer_regression = regression(self.N, self.M, 2*self.dense_unit, False, name='similarity_regression2') 
        self.loss_layer = Ge2e_loss(self.N, self.M, self.loss_type, name='ge2e_loss')
        
        self.X = None    

    def _build(self, is_training=False):
        self.X = self.input_layer
        self.inputs.append(self.X)
        self.X = self.squeeze_layer(self.X)
        self.X = self.expand_layer(self.X)
        
        self.X = self.conv1(self.X)
        self.X = self.relu1(self.bn1(self.X))
        
        self.X = self.conv2(self.X)
        self.X = self.relu2(self.bn2(self.X))
        
        self.X = self.conv3(self.X)
        self.X = self.relu3(self.bn3(self.X))
        
        self.X = self.conv4(self.X)
        self.X = self.relu4(self.bn4(self.X))
        
        self.X = self.flatten(self.X)
        
        # multi lstm + dense
        for lstm_idx in range(self.n_layer):
            self.X = self.lstm_layers[lstm_idx](self.X)
            self.X = self.dense_layers[lstm_idx](self.X)
        self.X = self.attention(self.X)
        self.X = self.embedding_norm_layer(self.X)       # [tot_utt, embed_dim]
        
        # loss layer
#         self._centroid = self._centroid_layer(self.X)
#         self.matrix = self.regression((self.X,self._centroid))
        self.matrix = self.regression(self.X)
        self.loss = self.loss_layer(self.matrix)
        
        # outputs
        self.outputs.append(self.loss)
        self.outputs.append(self.X)

        model = Model(inputs=self.inputs, outputs=self.outputs)                  
        return model
    
    def generator(self):
        train_data = np.load(self.train_path)
        
        batch_x = np.empty((0,99,40))
        train_index = [i for i in range(self.total_utt)]
        random.shuffle(train_index)

        loss = np.zeros((1,))

        while True:
            for i in range(self.steps):
                batch_list = [train_index.pop() for _ in range(min(self.gen_batch_size, len(train_index)))]
                for idx in batch_list:
                    batch_x = np.concatenate((batch_x, train_data[idx]), axis=0)

                output = np.zeros((1,batch_x.shape[0], 64))
                batch_x = np.expand_dims(batch_x, axis=0)
#                 batch_x = np.expand_dims(batch_x, axis=-1)
                yield ({'input': batch_x}, {'ge2e_loss': loss, 'embeding_output': output})
                batch_x = np.empty((0,99,40))
            train_index = [i for i in range(self.total_utt)]
            random.shuffle(train_index)
        
    def _run(self):
        self.model = self._build()
        self.model.compile(optimizer=Ge2eOptimizer(self.lr), loss=[custom_loss, dummy_loss])
        self.model.summary()
        
        callbacks = self._callbacks()
#         print('steps_per_epoch',self.steps)
        if not os.path.isdir(self.checkpoint_path):
            os.makedirs(self.checkpoint_path)
        self.model.fit(self.generator(), steps_per_epoch=self.steps, epochs=self.epoch, verbose=1, \
                            use_multiprocessing=True, callbacks=callbacks, shuffle=False)
        return 
    
    def _callbacks(self):
        logdir="logs/fit/" + datetime.datetime.now().strftime("%Y%m%d-%H%M%S")
        tensorboard_callback = TensorBoard(log_dir=logdir, write_graph=True)
        epoch_print_callback = LambdaCallback(on_epoch_end=lambda epochs,logs: self.val())
        cp_save_callback = ModelCheckpoint(os.path.join(self.checkpoint_path, "model_{epoch:02d}.ckpt"), \
                                           save_weights_only=True, verbose=0, save_freq=5*self.steps)
        
        callbacks = [cp_save_callback, tensorboard_callback, epoch_print_callback]
        return callbacks
    
    def _infer_build(self):
        # inputs
        self.X = self.input_layer
        self.infer_input=[]
        self.infer_input.append(self.X)
        self.X = self.squeeze_layer(self.X)
        self.X = self.expand_layer(self.X)

        self.X = self.conv1(self.X)
        self.X = self.relu1(self.bn1(self.X))
        
        self.X = self.conv2(self.X)
        self.X = self.relu2(self.bn2(self.X))
        
        self.X = self.conv3(self.X)
        self.X = self.relu3(self.bn3(self.X))
        
        self.X = self.conv4(self.X)
        self.X = self.relu4(self.bn4(self.X))
        
        self.X = self.flatten(self.X)
        
        # multi lstm + dense
        for lstm_idx in range(self.n_layer):
            self.X = self.lstm_layers[lstm_idx](self.X)
            self.X = self.dense_layers[lstm_idx](self.X)
        self.X = self.attention(self.X)
        self.X = self.embedding_norm_layer(self.X)       # [tot_utt, embed_dim]
        
        # loss layer
#         self._centroid = self.infer_centroid_layer(self.X)
        self.matrix = self.infer_regression((self.X,self._centroid))

        self.loss = self.loss_layer(self.matrix)
        
        model = tf.keras.Model(inputs=[self.infer_input], outputs=[self.X, self.loss])
        return model
    
    def _infer_process(self, index):
        model = self._infer_build()
        if index<10: model_name = "model_0" + str(index) + ".ckpt"
        else: model_name = "model_"+ str(index) + ".ckpt"

        new_model = tf.keras.Model(model.inputs, model.layers[-3].output)
#         print(new_model.get_weights())
        new_model.load_weights(os.path.join(self.checkpoint_path, model_name), by_name=True, skip_mismatch=True)
#         print(new_model.get_weights())
        
        if not self.mode: 
            new_model.summary()
            self.mode = True
        
        un_en_data = np.load(self.enroll_path)  
        en_data = np.expand_dims(un_en_data, axis=0)
        ev_data = np.load(self.eval_path)
        ev_data = self.data_linein(ev_data)
        
        enroll_stack = np.empty((0,self.dense_unit))
        eval_stack = np.empty((0,self.dense_unit))
        
        for i in range(en_data.shape[1]):
            _enroll= np.empty((0,self.dense_unit))
            for j in range(en_data.shape[2]):
                data = np.reshape(en_data[:,i,j,:,:],(1,1,self.timestamps, self.feature_dim))
                enroll = new_model.predict(data, batch_size=1)
                _enroll = np.concatenate((_enroll, enroll), axis=0)
            enroll_stack = np.concatenate((enroll_stack,np.mean(_enroll,0,keepdims=True)),axis=0)
        print(enroll_stack.shape)
        
        for i in range(ev_data.shape[1]):
            data = np.reshape(ev_data[:,i,:,:,:],(1,1,self.timestamps, self.feature_dim))
            evalu = new_model.predict(data, batch_size=1)
            eval_stack = np.concatenate((eval_stack, evalu), axis=0)
        print(eval_stack.shape)
        
        similarity = test_similarity(enroll_stack, eval_stack)
        print(similarity)
        ground = np.load(self.ground_path)
        EER = EER_estimate(similarity, ground, draw=True)
        return EER
    
    def data_linein(self, data):
        if len(data.shape)==4:
            speakers = len(data)
            batch = np.empty((0,99,40))
            for i in range(speakers):
                batch = np.concatenate((batch, data[i]), axis=0)
            batch = np.expand_dims(batch, axis=0)
#             batch = np.expand_dims(batch, axis=-1)
        elif len(data.shape)==3:
            batch = np.expand_dims(data, axis=0)
#             batch = np.expand_dims(batch, axis=-1)
        else: pass
        return batch
    
    def train_test(self, index):
        model = self._infer_build()
        if index<10: model_name = "model_0" + str(index) + ".ckpt"
        else: model_name = "model_"+ str(index) + ".ckpt"
            
#         model.load_weights(os.path.join(self.checkpoint_path, model_name), by_name=True, skip_mismatch=True)
        new_model = tf.keras.Model(model.inputs, model.layers[-3].output)
        new_model.summary()
#         print(os.path.join(self.checkpoint_path, model_name))
#         print(new_model.get_weights())
        new_model.load_weights(os.path.join(self.checkpoint_path, model_name), by_name=True, skip_mismatch=True)
#         print(new_model.get_weights())
        
        _data = np.load(self.train_path)
        index=[random.randint(0,_data.shape[0]) for _ in range(5)]
        data = np.empty((0,_data.shape[1],_data.shape[2],_data.shape[3]))
        
        for i in index:
            data = np.concatenate((data,np.reshape(_data[i],(1,_data.shape[1],_data.shape[2],_data.shape[3]))),axis=0)
        stack = np.empty((0,data.shape[1],self.dense_unit))
        
        for i in range(data.shape[0]):
            tmp= np.empty((0,self.dense_unit))
            for j in range(data.shape[1]):
                batch = np.reshape(data[i,j,:,:],(1,1,self.timestamps, self.feature_dim,1))
                _tmp = new_model.predict(batch, batch_size=1)
                tmp = np.concatenate((tmp, _tmp), axis=0)
            stack = np.concatenate((stack,np.reshape(tmp,(1,tmp.shape[0],tmp.shape[1]))),axis=0)
        
        similarity = test_similarity(np.mean(stack,1), stack)
        print(np.round(similarity,3))
    
    def val(self):
        new_model = tf.keras.Model(self.model.inputs, self.model.layers[-3].output)
        
        _data = np.load(self.train_path)
        index=[random.randint(0,_data.shape[0]) for _ in range(3)]
        data = np.empty((0,_data.shape[1],_data.shape[2],_data.shape[3]))
        
        for i in index:
            data = np.concatenate((data,np.reshape(_data[i],(1,_data.shape[1],_data.shape[2],_data.shape[3]))),axis=0)
        stack = np.empty((0,data.shape[1],self.dense_unit))
        
        for i in range(data.shape[0]):
            tmp= np.empty((0,self.dense_unit))
            for j in range(data.shape[1]):
                batch = np.reshape(data[i,j,:,:],(1,1,self.timestamps, self.feature_dim,1))
                _tmp = new_model.predict(batch, batch_size=1)
                tmp = np.concatenate((tmp, _tmp), axis=0)
            stack = np.concatenate((stack,np.reshape(tmp,(1,tmp.shape[0],tmp.shape[1]))),axis=0)
        
        similarity = test_similarity(np.mean(stack,1), stack)
        print(np.round(similarity,3))
        
    
#     def compute_output_shape(self, input_shape):
#         assert(input_shape, list)
#         shape_a, shape_b = input_shape
#         return [(shape_a[0], self.output_dim), shape_b[:-1]]

In [5]:
# Define custom loss
def custom_loss(y_true, y_pred):
    return y_pred

def dummy_loss(y_true, y_pred):
    return 0 * y_pred

In [6]:
import configparser
import argparse
import json
import os
# parser = argparse.ArgumentParser()
# args = parser.parse_args()

CONFIG_FILE = os.getcwd() +'/'+ 'config.json'
with open(CONFIG_FILE, 'rb') as fid:
    config = json.load(fid)

In [7]:
config["n_train"] = 1418
config["m_file"] = 4
config["time_stamp"] = 99
config["feature_dim"] = 40 
# config["n_train"], config["m_file"], config["time_stamp"], config["feature_dim"] = train_data.shape

config["steps"] = math.floor(config["n_train"] / config["batch_size"])

In [8]:
sv = Network(config)
sv._run()
# print(hist.history)

Instructions for updating:
If using Keras pass *_constraint arguments to layers.
Model: "model"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
input (InputLayer)           [(None, None, 99, 40)]    0         
_________________________________________________________________
squeeze_input (Lambda)       (None, 99, 40)            0         
_________________________________________________________________
expand_input (Lambda)        (None, 99, 40, 1)         0         
_________________________________________________________________
conv1 (Conv2D)               (None, 99, 40, 16)        416       
_________________________________________________________________
batch_normalization (BatchNo (None, 99, 40, 16)        64        
_________________________________________________________________
re_lu (ReLU)                 (None, 99, 40, 16)        0         
______________________________________________

ResourceExhaustedError: 2 root error(s) found.
  (0) Resource exhausted: OOM when allocating tensor with shape[99,64,128] and type float on /job:localhost/replica:0/task:0/device:GPU:0 by allocator GPU_0_bfc
	 [[{{node bidirectional/forward_lstm_0_1/TensorArrayV2Stack/TensorListStack}}]]
Hint: If you want to see a list of allocated tensors when OOM happens, add report_tensor_allocations_upon_oom to RunOptions for current allocation info.

	 [[loss/Identity_1/_221]]
Hint: If you want to see a list of allocated tensors when OOM happens, add report_tensor_allocations_upon_oom to RunOptions for current allocation info.

  (1) Resource exhausted: OOM when allocating tensor with shape[99,64,128] and type float on /job:localhost/replica:0/task:0/device:GPU:0 by allocator GPU_0_bfc
	 [[{{node bidirectional/forward_lstm_0_1/TensorArrayV2Stack/TensorListStack}}]]
Hint: If you want to see a list of allocated tensors when OOM happens, add report_tensor_allocations_upon_oom to RunOptions for current allocation info.

0 successful operations.
0 derived errors ignored.

In [None]:
import glob
EER_dict = {}
sv = Network(config)
num_of_model = len(glob.glob('/home4/myhsueh/save_model/*.ckpt'))
for idx in range(num_of_model,0,-1):
    EER_val = sv._infer_process((idx)*2)
    EER_dict[idx*2] = EER_val

In [None]:
from numba import cuda 
device = cuda.get_current_device()
device.reset() 