In [None]:
import os
import glob
from tqdm import tqdm
import numpy as np
from feature_ext import YaafeMFCC
import h5py
from random import shuffle
import keras
import keras.backend as K

In [24]:
import numpy as np
import keras.backend as K
from keras.utils import to_categorical


def data_generator_classification(npy_files: list, batch_size: int, steps: int, num_classes: int,
                                  mode: str='train', model: str='CNN'):
    """
    Generator for generating training and validation data for the speaker classification training.
    
    args:
        npy_files: list of lists. Each sublist consist of two elements. The first is the path to the npy file 
                    with the features and the secon with path to the targets
        batch_size: int with batch size
        steps: int meaning number of batches per epoch
        mode: string indicating whether this generator is used for training or validation data
        model: string holding type of model we are training (only CNN or LSTM) for adding channels in case of CNN
        
    yields:
        (features, targets): tuple with feature (shape: LSTM -> batch_size, W, H; CNN -> batch_size, W, H, 1) 
                            and target batch 
    """
    
    # load sample file to determine shapes of the data
    f = np.load(npy_files[0][0])
    f_N, f_W, f_H = f.shape
    
    idxs = np.arange(len(npy_files))
    n = int(batch_size/f_N)
    assert mode=='train' or mode=='val'
    
    if mode == 'train':
        while True:
            
            # get files and retrieve data
            f_idxs = np.random.choice(idxs, size=n)
            features = np.zeros((batch_size, f_W, f_H))
            targets = np.zeros(batch_size)
            for i in range(n):
                file = npy_files[f_idxs[i]]
                dfile = file[0]
                tfile = file[1]
                samples = np.load(dfile)
                sample_targets = np.load(tfile)
                features[i*f_N:(i+1)*f_N] = samples
                targets[i*f_N:(i+1)*f_N] = sample_targets

            # transform targets to categrorical
            targets = to_categorical(targets, num_classes)
            
            # if CNN is trained insert channel
            if model == 'CNN':
                B, W, H = features.shape
                if K.image_data_format() == 'channels_last':
                    features = np.reshape(features, (B, W, H, 1))
                else:
                    features = np.reshape(features, (B, 1, W, H))

            assert features.shape[0] == batch_size

            yield (features, targets)
        
    # for val mode we dont need to draw random files, but just loop over the data
    if mode == 'val':
        while True:
            
            # get files and retrieve data
            for j in range(steps):
                features = np.zeros((batch_size, f_W, f_H))
                targets = np.zeros(batch_size)
                for i in range(n):
                    file = npy_files[j+i]
                    dfile = file[0]
                    tfile = file[1]
                    samples = np.load(dfile)
                    sample_targets = np.load(tfile)
                    features[i*f_N:(i+1)*f_N] = samples
                    targets[i*f_N:(i+1)*f_N] = sample_targets

                # transform targets to categrorical 
                targets = to_categorical(targets, num_classes)

                # if CNN is trained insert channel
                if model == 'CNN':
                    B, W, H = features.shape
                    if K.image_data_format() == 'channels_last':
                        features = np.reshape(features, (B, W, H, 1))
                    else:
                        features = np.reshape(features, (B, 1, W, H))

                assert features.shape[0] == batch_size

                yield (features, targets)
                

def data_generator_identification(npy_files, batch_size, steps, cache_size=2, mode='train', model='CNN'):
    """
    Generator for generating training and validation data for the speaker identification training.
    
    args:
        npy_files: list of lists. Each sublist consist of two elements. The first is the path to the npy file 
                    with the features and the secon with path to the targets
        batch_size: int with batch size
        steps: int meaning number of batches per epoch
        mode: string indicating whether this generator is used for training or validation data
        model: string holding type of model we are training (only CNN or LSTM) for adding channels in case of CNN
        
    yields:
        (features, targets): tuple with feature (shape: LSTM -> batch_size, W, H; CNN -> batch_size, W, H, 1) 
                            and target batch 
    """
    
    # load sample file to determine shapes of the data
    f = np.load(npy_files[0][0])
    f_N, f_W, f_H = f.shape
    
    idxs = np.arange(len(npy_files))
    n = int(batch_size/f_N)
        
    # get labels
    labels = []
    for file in npy_files:
        targets = np.load(file[1])
        labels = list(np.unique(labels + list(np.unique(targets))))
    labels = [int(i) for i in labels]
    
    # build positive sample cache
    cache_pos = dict()
    for i in labels:
        cache_pos[i] = []

    cache_not_full = True
    while cache_not_full:

        f_idxs = np.random.choice(idxs, size=n)

        for i in range(n):
            file = npy_files[f_idxs[i]]
            dfile = file[0]
            tfile = file[1]
            sample_targets = np.load(tfile)
            samples = np.load(dfile)
            for j in labels:
                if len(cache_pos[j]) < cache_size:
                    pos_mask = sample_targets == j
                    if any(pos_mask):
                        cache_pos[j] += list(samples[pos_mask])

        full = []
        for i in labels:
            if len(cache_pos[i]) >= cache_size:
                full.append(True)
            else:
                full.append(False)
        if all(full):
            cache_not_full = False
            print('Positive samples cache full!')
    
    if mode == 'train':

        while True:
            f_idxs = np.random.choice(idxs, size=n)
            features = np.zeros((batch_size, f_W, f_H))
            targets = np.zeros(batch_size)
            for i in range(n):
                file = npy_files[f_idxs[i]]
                dfile = file[0]
                tfile = file[1]
                samples = np.load(dfile)
                sample_targets = np.load(tfile)
                features[i*f_N:(i+1)*f_N] = samples
                targets[i*f_N:(i+1)*f_N] = sample_targets

            # get postive and negative samples
            pos_samples = []
            neg_samples = []
            for i in range(len(targets)):
                targ = int(targets[i])
                idx_pos = np.random.choice(range(len(cache_pos[targ])))
                neg_targets_mask = targets != targets[i]
                neg_targets_masked = targets[neg_targets_mask]
                neg_idx = np.random.choice(np.arange(len(targets[neg_targets_mask])))
                pos_samples.append([cache_pos[targ][idx_pos]])
                neg_samples.append([features[neg_targets_mask][neg_idx]])
                cache_pos[targ][idx_pos] = features[i]
            pos_samples = np.concatenate(pos_samples, axis=0)
            neg_samples = np.concatenate(neg_samples, axis=0)
            
            features = np.concatenate([features, pos_samples, neg_samples], axis=0)
#             targets = np.zeros(features.shape)
            
            if model == 'CNN':
                B, W, H = features.shape
                features = np.reshape(features, (B, W, H, 1))
                
            assert features.shape[0] == 3*batch_size

            yield features
        
    if mode == 'val':
        
        while True:
            
            for j in range(steps):
                features = np.zeros((batch_size, f_W, f_H))
                targets = np.zeros(batch_size)
                for i in range(n):
                    file = npy_files[j+i]
                    dfile = file[0]
                    tfile = file[1]
                    samples = np.load(dfile)
                    sample_targets = np.load(tfile)
                    features[i*f_N:(i+1)*f_N] = samples
                    targets[i*f_N:(i+1)*f_N] = sample_targets

                # get postive and negative samples
                pos_samples = []
                neg_samples = []
                for i in range(len(targets)):
                    targ = int(targets[i])
                    idx_pos = np.random.choice(range(len(cache_pos[targ])))
                    neg_targets_mask = targets != targets[i]
                    neg_targets_masked = targets[neg_targets_mask]
                    neg_idx = np.random.choice(np.arange(len(targets[neg_targets_mask])))
                    pos_samples.append([cache_pos[targ][idx_pos]])
                    neg_samples.append([features[neg_targets_mask][neg_idx]])
                    cache_pos[targ][idx_pos] = features[i]
                pos_samples = np.concatenate(pos_samples, axis=0)
                neg_samples = np.concatenate(neg_samples, axis=0)

                features = np.concatenate([features, pos_samples, neg_samples], axis=0)
#                 targets = np.zeros(features.shape)

                if model == 'CNN':
                    B, W, H = features.shape
                    features = np.reshape(features, (B, W, H, 1))

                assert features.shape[0] == 3*batch_size

                yield features
                
def data_loader_model_wrapper_classification(data_loader, state_shape):
    """
    Interface between data loader and model
    
    args:
        
    returns:
    
    """
    
    while True:
        features, targets = data_loader.__next__()
        N, W, H = features.shape
        W_pad = (W % 16 > 0)*(16 - W % 16)
        H_pad = (H % 16 > 0)*(16 - H % 16)
        features = np.pad(features, ((0,0),(0,W_pad),(0,H_pad)), 'constant', constant_values=((0,0),(0,0),(0,0)))
        N, W, H = features.shape
        input_dict = {
            'x': np.reshape(features, (N, W*H)),
            'y': targets,
            'state': np.zeros(state_shape)
        }
        yield input_dict
        
def data_loader_model_wrapper_identification(data_loader, state_shape):
    """
    Interface between data loader and model
    
    args:
        
    returns:
    
    """
    
    while True:
        features = data_loader.__next__()
        N, W, H = features.shape
        W_pad = (W % 16 > 0)*(16 - W % 16)
        H_pad = (H % 16 > 0)*(16 - H % 16)
        features = np.pad(features, ((0,0),(0,W_pad),(0,H_pad)), 'constant', constant_values=((0,0),(0,0),(0,0)))
        N, W, H = features.shape
        input_dict = {
            'x': np.reshape(features, (N, W*H)),
            'state': np.zeros(state_shape)
        }
        yield input_dict

# Imports

In [33]:
import configparser
import numpy as np
import tensorflow as tf
from established.utils.helper_functions import get_all_file_names
from established.utils.data_loader import data_generator_classification
from established.utils.data_loader import data_generator_identification
from established.utils.data_loader import data_loader_model_wrapper_classification
from established.utils.data_loader import data_loader_model_wrapper_identification

In [34]:
# If true: train classification, else: identification
classification = True

# Preparation

In [35]:
# parse config
config_file = 'established/config.ini'
config = configparser.ConfigParser()
config.read(config_file)

['established/config.ini']

In [36]:
# load paths of data files
if classification:
    train_files, val_files = get_all_file_names(config['DATA']['data_root'])
else:
    train_files, val_files = get_all_file_names(config['DATA']['data_root'])

In [37]:
# define parameters
batch_size = int(config['TRAINING']['batch_size'])
samples_per_file = np.load(train_files[0][0]).shape[0]
steps_per_epoch_train = int(len(train_files)*samples_per_file/batch_size)
steps_per_epoch_val = int(len(val_files)*samples_per_file/batch_size)

In [50]:
if classification:
    # get data loader
    train_gen = data_generator_classification(train_files, batch_size, steps_per_epoch_train, 
                                              int(config['DATA']['num_classes']),
                                              mode='train', model='LSTM')
    val_gen = data_generator_classification(val_files, batch_size, steps_per_epoch_val, 
                                            int(config['DATA']['num_classes']), 
                                            mode='val', model='LSTM')

    # data loader wrapper -> reshape inputs, add state initialzation and pad time and dimension to 
    # multiple of 16
    train_gen_wrap = data_loader_model_wrapper_classification(train_gen, [batch_size, 
                                                                     int(config['TRAINING']['encoding_dim'])])
    val_gen_wrap = data_loader_model_wrapper_classification(train_gen, [batch_size, 
                                                                     int(config['TRAINING']['encoding_dim'])])
else:
    # get data loader
    train_gen = data_generator_identification(train_files, batch_size, steps_per_epoch_train, mode='train', model='LSTM')
    val_gen = data_generator_identification(val_files, batch_size, steps_per_epoch_val, mode='val', model='LSTM')

    # data loader wrapper -> reshape inputs, add state initialzation and pad time and dimension to 
    # multiple of 16
    train_gen_wrap = data_loader_model_wrapper_identification(train_gen, [batch_size, 
                                                                     int(config['TRAINING']['encoding_dim'])])
    val_gen_wrap = data_loader_model_wrapper_identification(train_gen, [batch_size, 
                                                                     int(config['TRAINING']['encoding_dim'])])

# Initialize model

In [55]:
# reset default graph in case graph has already been defined
tf.reset_default_graph()

if classification:
    input_dict, output_dict = get_simple_LSTM_classification(int(config['TRAINING']['encoding_dim']), 
                                                             int(config['DATA']['time_steps']), 
                                                             int(config['DATA']['features_dim']), 
                                                             int(config['DATA']['num_classes']))
else:
    input_dict, output_dict = get_simple_LSTM_encoder(int(config['TRAINING']['encoding_dim']), 
                                                             int(config['DATA']['time_steps']), 
                                                             int(config['DATA']['features_dim']))

In [57]:
# Initialize trainer and losses

if classification:
    losses_dict = {
        'logits': tf.nn.softmax_cross_entropy_with_logits
    }
else:
    losses_dict = {
        'encoding': triplet_loss_tf
    }

if classification:
    trainer = ClassificationTrainer(input_dict, output_dict, losses_dict, train_gen_wrap, val_gen_wrap, 
                     steps_per_epoch_train, steps_per_epoch_val, config_file)
else:
    trainer = IdentificationTrainer(input_dict, output_dict, losses_dict, train_gen_wrap, val_gen_wrap, 
                     steps_per_epoch_train, steps_per_epoch_val, config_file)

# Run training

In [58]:
trainer.train()

Epoch 0


InvalidArgumentError: You must feed a value for placeholder tensor 'Placeholder' with dtype float and shape [?,3840]
	 [[Node: Placeholder = Placeholder[dtype=DT_FLOAT, shape=[?,3840], _device="/job:localhost/replica:0/task:0/gpu:0"]()]]

Caused by op 'Placeholder', defined at:
  File "/home/janis/anaconda3/envs/tf13env/lib/python3.6/runpy.py", line 193, in _run_module_as_main
    "__main__", mod_spec)
  File "/home/janis/anaconda3/envs/tf13env/lib/python3.6/runpy.py", line 85, in _run_code
    exec(code, run_globals)
  File "/home/janis/anaconda3/envs/tf13env/lib/python3.6/site-packages/ipykernel_launcher.py", line 16, in <module>
    app.launch_new_instance()
  File "/home/janis/anaconda3/envs/tf13env/lib/python3.6/site-packages/traitlets/config/application.py", line 658, in launch_instance
    app.start()
  File "/home/janis/anaconda3/envs/tf13env/lib/python3.6/site-packages/ipykernel/kernelapp.py", line 505, in start
    self.io_loop.start()
  File "/home/janis/anaconda3/envs/tf13env/lib/python3.6/site-packages/tornado/platform/asyncio.py", line 132, in start
    self.asyncio_loop.run_forever()
  File "/home/janis/anaconda3/envs/tf13env/lib/python3.6/asyncio/base_events.py", line 427, in run_forever
    self._run_once()
  File "/home/janis/anaconda3/envs/tf13env/lib/python3.6/asyncio/base_events.py", line 1440, in _run_once
    handle._run()
  File "/home/janis/anaconda3/envs/tf13env/lib/python3.6/asyncio/events.py", line 145, in _run
    self._callback(*self._args)
  File "/home/janis/anaconda3/envs/tf13env/lib/python3.6/site-packages/tornado/ioloop.py", line 758, in _run_callback
    ret = callback()
  File "/home/janis/anaconda3/envs/tf13env/lib/python3.6/site-packages/tornado/stack_context.py", line 300, in null_wrapper
    return fn(*args, **kwargs)
  File "/home/janis/anaconda3/envs/tf13env/lib/python3.6/site-packages/tornado/gen.py", line 1233, in inner
    self.run()
  File "/home/janis/anaconda3/envs/tf13env/lib/python3.6/site-packages/tornado/gen.py", line 1147, in run
    yielded = self.gen.send(value)
  File "/home/janis/anaconda3/envs/tf13env/lib/python3.6/site-packages/ipykernel/kernelbase.py", line 357, in process_one
    yield gen.maybe_future(dispatch(*args))
  File "/home/janis/anaconda3/envs/tf13env/lib/python3.6/site-packages/tornado/gen.py", line 326, in wrapper
    yielded = next(result)
  File "/home/janis/anaconda3/envs/tf13env/lib/python3.6/site-packages/ipykernel/kernelbase.py", line 267, in dispatch_shell
    yield gen.maybe_future(handler(stream, idents, msg))
  File "/home/janis/anaconda3/envs/tf13env/lib/python3.6/site-packages/tornado/gen.py", line 326, in wrapper
    yielded = next(result)
  File "/home/janis/anaconda3/envs/tf13env/lib/python3.6/site-packages/ipykernel/kernelbase.py", line 534, in execute_request
    user_expressions, allow_stdin,
  File "/home/janis/anaconda3/envs/tf13env/lib/python3.6/site-packages/tornado/gen.py", line 326, in wrapper
    yielded = next(result)
  File "/home/janis/anaconda3/envs/tf13env/lib/python3.6/site-packages/ipykernel/ipkernel.py", line 294, in do_execute
    res = shell.run_cell(code, store_history=store_history, silent=silent)
  File "/home/janis/anaconda3/envs/tf13env/lib/python3.6/site-packages/ipykernel/zmqshell.py", line 536, in run_cell
    return super(ZMQInteractiveShell, self).run_cell(*args, **kwargs)
  File "/home/janis/anaconda3/envs/tf13env/lib/python3.6/site-packages/IPython/core/interactiveshell.py", line 2819, in run_cell
    raw_cell, store_history, silent, shell_futures)
  File "/home/janis/anaconda3/envs/tf13env/lib/python3.6/site-packages/IPython/core/interactiveshell.py", line 2845, in _run_cell
    return runner(coro)
  File "/home/janis/anaconda3/envs/tf13env/lib/python3.6/site-packages/IPython/core/async_helpers.py", line 67, in _pseudo_sync_runner
    coro.send(None)
  File "/home/janis/anaconda3/envs/tf13env/lib/python3.6/site-packages/IPython/core/interactiveshell.py", line 3020, in run_cell_async
    interactivity=interactivity, compiler=compiler, result=result)
  File "/home/janis/anaconda3/envs/tf13env/lib/python3.6/site-packages/IPython/core/interactiveshell.py", line 3185, in run_ast_nodes
    if (yield from self.run_code(code, result)):
  File "/home/janis/anaconda3/envs/tf13env/lib/python3.6/site-packages/IPython/core/interactiveshell.py", line 3267, in run_code
    exec(code_obj, self.user_global_ns, self.user_ns)
  File "<ipython-input-55-a2db8c7d4c5d>", line 8, in <module>
    int(config['DATA']['num_classes']))
  File "<ipython-input-54-19a307da4cf6>", line 76, in get_simple_LSTM_classification
    input_dict_encoder, output_dict_encoder = get_simple_LSTM_encoder(encoding_dim, n_steps, n_feature_dim)
  File "<ipython-input-54-19a307da4cf6>", line 47, in get_simple_LSTM_encoder
    x = tf.placeholder(tf.float32, [None, n_steps * n_feature_dim])
  File "/home/janis/anaconda3/envs/tf13env/lib/python3.6/site-packages/tensorflow/python/ops/array_ops.py", line 1548, in placeholder
    return gen_array_ops._placeholder(dtype=dtype, shape=shape, name=name)
  File "/home/janis/anaconda3/envs/tf13env/lib/python3.6/site-packages/tensorflow/python/ops/gen_array_ops.py", line 2094, in _placeholder
    name=name)
  File "/home/janis/anaconda3/envs/tf13env/lib/python3.6/site-packages/tensorflow/python/framework/op_def_library.py", line 767, in apply_op
    op_def=op_def)
  File "/home/janis/anaconda3/envs/tf13env/lib/python3.6/site-packages/tensorflow/python/framework/ops.py", line 2630, in create_op
    original_op=self._default_original_op, op_def=op_def)
  File "/home/janis/anaconda3/envs/tf13env/lib/python3.6/site-packages/tensorflow/python/framework/ops.py", line 1204, in __init__
    self._traceback = self._graph._extract_stack()  # pylint: disable=protected-access

InvalidArgumentError (see above for traceback): You must feed a value for placeholder tensor 'Placeholder' with dtype float and shape [?,3840]
	 [[Node: Placeholder = Placeholder[dtype=DT_FLOAT, shape=[?,3840], _device="/job:localhost/replica:0/task:0/gpu:0"]()]]


In [42]:
def batch_cosine_similarity(x1, x2):
    """
    Cosine similarity between x1 and x2
    """
    
    dot = K.squeeze(K.batch_dot(x1, x2, axes=1), axis=1)
    return dot

def batch_cosine_similarity_tf(x1, x2):
    """
    Cosine similarity between x1 and x2
    """
    
    dot = tf.einsum('ij,ij->i', x1, x2)
    return dot


def triplet_loss(y_pred, batch_size, alpha):
    """
    Triplet loss for speaker identification.
    
    args:
        y_pred: contains encoding of anchor, positive and negative sample in this order. -> shape (3*batch_size, ...)
    
    returns:
        total_loss
    """
    
    anchor = y_pred[0:batch_size]
    positive_ex = y_pred[batch_size:2 * batch_size]
    negative_ex = y_pred[2 * batch_size:]
    sap = batch_cosine_similarity(anchor, positive_ex)
    san = batch_cosine_similarity(anchor, negative_ex)
    loss = K.maximum(san - sap + tf.constant(alpha), 0.0)
    total_loss = K.mean(loss)
    return total_loss


def triplet_loss_tf(y_pred, batch_size, alpha):
    """
    Triplet loss for speaker identification.
    
    args:
        y_pred: contains encoding of anchor, positive and negative sample in this order. -> shape (3*batch_size, ...)
    
    returns:
        total_loss
    """
    
    anchor = y_pred[0:batch_size]
    positive_ex = y_pred[batch_size:2 * batch_size]
    negative_ex = y_pred[2 * batch_size:]
    pos_sim = batch_cosine_similarity(anchor, positive_ex)
    neg_sim = batch_cosine_similarity(anchor, negative_ex)
    
    loss = tf.maximum(neg_sim - pos_sim + tf.constant(alpha), 0.0)
    total_loss = tf.reduce_mean(loss)
    return total_loss

In [54]:
import tensorflow as tf

def LSTM_layer(X, hidden_dim, state_np_init, n_time, return_sequence=False):
    """
    Creates one unidirectional LSTM layer.
    
    args:
        
    returns:
    
    """
    
    # split operation only support the shape[axis] with integer multiple of 16
    X_in = tf.split(X, n_time, 1)
    
    # define LSTM cell
    lstm_cell = tf.contrib.rnn.LSTMCell(hidden_dim)
    
    # create initial state
    cell_state = tf.convert_to_tensor(state_np_init, dtype=tf.float32)
    hidden_state = tf.convert_to_tensor(state_np_init, dtype=tf.float32)
    state = tf.nn.rnn_cell.LSTMStateTuple(cell_state, hidden_state)
    
    outputs, states = tf.nn.static_rnn(lstm_cell, X_in, initial_state=state, dtype=tf.float32)
    
    if return_sequence:
        return outputs
    else:
        return outputs[-1]
        

def get_simple_LSTM_encoder(encoding_dim: int, n_steps: int, n_feature_dim: int):
    """
    Returns a one layer uni-directional LSTM encoder
    
    args:
        
    returns:
    
    """
    
    # adapt to huawei dim limitation of multiples of 16
    n_steps = n_steps + (n_steps % 16 > 0)*(16 - n_steps % 16)
    n_feature_dim = n_feature_dim + (n_feature_dim % 16 > 0)*(16 - n_feature_dim % 16)
    
    # input
    x = tf.placeholder(tf.float32, [None, n_steps * n_feature_dim])
    state_np_init = tf.placeholder(tf.float32, [None, encoding_dim])
    
    # get encoding
    with tf.name_scope("Encoding"):
        encoding = LSTM_layer(x, encoding_dim, state_np_init, n_steps, )
        
    input_dict = {
        'x': x,
        'state': state_np_init
    }
    
    output_dict = {
        'encoding': encoding
    }
    
    return input_dict, output_dict

def get_simple_LSTM_classification(encoding_dim: int, n_steps: int, n_feature_dim: int, num_classes: int):
    """
    Returns endpoints of classification model.
    
    args:
        
    returns:
        
    """
    
    # get encoding and model inputs
    input_dict_encoder, output_dict_encoder = get_simple_LSTM_encoder(encoding_dim, n_steps, n_feature_dim)
    y = tf.placeholder(tf.float32, [None, num_classes])
    
    logits =  tf.layers.dense(output_dict_encoder['encoding'], num_classes)
#     prediction = tf.nn.softmax(logits)
    
    input_dict = {
        'x': input_dict_encoder['x'],
        'state': input_dict_encoder['state'],
        'y': y
    }
    
    output_dict = {
        'logits': logits
    }
    
    return input_dict, output_dict

In [46]:
import numpy as np
import tensorflow as tf

class BaseTrainer():
    
    def __init__(self, input_dict, output_dict, losses_dict, train_gen, val_gen, 
                 steps_per_epoch_train, steps_per_epoch_val, config_file):
        
        self.input_dict = input_dict
        self.output_dict = output_dict
        self.losses_dict = losses_dict
        self.train_gen = train_gen
        self.val_gen = val_gen
        self.steps_per_epoch_train = steps_per_epoch_train
        self.steps_per_epoch_val = steps_per_epoch_val
        
        self.params = config['TRAINING']
    
    
    def train(self, checkpoint=None):
        
        costs = self.compute_loss()

        cost = tf.reduce_sum(tf.concat(costs, axis=0))
        train_ops = tf.train.AdamOptimizer(float(self.params['lr'])).minimize(cost)
        
        sess = tf.Session()
        saver = tf.train.Saver()
        
        # create run folder
        import datetime
        run_folder = 'runs/' + 'tf13' + str(datetime.datetime.now())
        writer = tf.summary.FileWriter(run_folder, sess.graph)
        
        if checkpoint is not None:
            saver.restore(sess, checkpoint)
        
        sess.run(tf.global_variables_initializer())
        
        best_val_loss = np.inf
        patience_counter = 0
        for i in range(int(self.params['epochs'])):
            print('Epoch ' + str(i))
            
            train_loss = []
            for j in range(self.steps_per_epoch_train):
                batch = self.train_gen.__next__()
                
                inp = self._build_input(batch)
                sess.run(train_ops, inp)
                
                if j % int(self.params['print_every']) == 0:
                    train_loss.append(sess.run(train_ops))
                    print('Step ' + str(j) + ' | Training loss: ' + str(train_loss[-1]))
            
            val_loss = []
            for j in range(self.steps_per_epoch_val):
                batch = self.val_gen.__next__()
                inp = self._build_input(batch)
                val_loss.append(sess.run(cost, inp))
            
            val_loss = np.mean(val_loss)
            train_loss = np.mean(train_loss)
            tf.summary.scalar('train_loss', train_loss)
            tf.summary.scalar('val_loss', val_loss)
            
            print('Epoch ' + str(i) + ' | Training loss: ' + str(train_loss) + ' | Validation loss: ' + str(val_loss))
            
            if val_loss < best_val_loss:
                best_val_loss = val_loss
                patience_counter = 0
            else:
                patience_counter += 1
                if patience_counter >= int(self.params['patience']):
                    print('Training done!')
                    break
            
                    
    def _build_input(self, batch):
        inp = {}
        for key in self.input_dict.keys():
            inp[self.input_dict[key]] = batch[key]
        return inp
    
    def compute_loss(self):
        raise NotImplementedError
        

class ClassificationTrainer(BaseTrainer):
    
    def __init__(self, input_dict, output_dict, losses_dict, train_gen, val_gen, 
                 steps_per_epoch_train, steps_per_epoch_val, config_file):
        super(ClassificationTrainer, self).__init__(input_dict, output_dict, losses_dict, train_gen, val_gen, 
                 steps_per_epoch_train, steps_per_epoch_val, config_file)
        
    def compute_loss(self):
        return tf.reduce_mean(self.losses_dict['logits'](logits=self.output_dict['logits'], 
                                                         labels=self.input_dict['y']))
    
    
class IdentificationTrainer(BaseTrainer):
    
    def __init__(self, input_dict, output_dict, losses_dict, train_gen, val_gen, 
                 steps_per_epoch_train, steps_per_epoch_val, config_file):
        super(IdentificationTrainer, self).__init__(input_dict, output_dict, losses_dict, train_gen, val_gen, 
                 steps_per_epoch_train, steps_per_epoch_val, config_file)
        
    def compute_loss(self):
        return tf.reduce_mean(self.losses_dict['encoding'](self.output_dict['encoding'], 
                                                          self.params['batch_size'],
                                                          self.params['alpha']))

In [None]:
import tensorflow as tf
import numpy as np

tf.reset_default_graph()

n_time = 80
stacked = False
bidirectional = False
normal = True
classification = False

def RNN(X, hidden_dim, state_np_init, scope):
    # split operation only support the shape[axis] with integer multiple of 16
    X_in = tf.split(X, n_time, 1)
    lstm_cell = tf.contrib.rnn.LSTMCell(hidden_dim)
    cell_state = tf.convert_to_tensor(state_np_init, dtype=tf.float32)
    hidden_state = tf.convert_to_tensor(state_np_init, dtype=tf.float32)
    state = tf.nn.rnn_cell.LSTMStateTuple(cell_state, hidden_state)
    
    outputs, states = tf.nn.static_rnn(lstm_cell, X_in, initial_state=state, dtype=tf.float32, scope=scope)
    
    if scope == 'normal':
        return outputs[-1]
    else:
        return outputs

def input_pad_new(x):
    return np.pad(x, ((0,0),(0,0),(0,13)), 'constant', constant_values=((0,0),(0,0),(0,0)))

# parameters init
l_r = 0.001
lr = l_r
training_iters = 100000

# Huawei DDK V150 only support 16 times the n_inputs and n_steps
# so the input data of mnist dataset should pad to 32 pixels.
n_inputs = 48
n_steps = n_time
n_hidden_units1 = 48
n_hidden_units2 = 64
encoding_dim = n_hidden_units2
n_classes = num_classes

# define placeholder for input
x = tf.placeholder(tf.float32, [None, n_steps * n_inputs])
y = tf.placeholder(tf.float32, [None, n_classes])
# x_reverse = tf.reverse(x, axis=tf.constant([0]))
if stacked:
    state_np_init1 = tf.placeholder(tf.float32, [None, n_hidden_units1])
state_np_init2 = tf.placeholder(tf.float32, [None, n_hidden_units2])

# 1. Huawei DDK V150 can't support some operations that use for initialize the state.
# 2. to support variable input of num, so add state init as placeholder
# state_np_init = tf.placeholder(tf.float32, [None, n_hidden_units])

if stacked:
    encoding1 = RNN(x, 48, state_np_init1, scope='no')
    encoding1 = tf.stack(encoding1, axis=1)
    dim = tf.shape(encoding1)[0]
    encoding1 = tf.reshape(encoding1, [dim, n_steps*n_inputs])
    encoding = RNN(encoding1, 512, state_np_init2, scope='normal')
if bidirectional:
    encoding1 = RNN(x, n_hidden_units2, state_np_init2, scope='no')
    encoding2 = RNN(x_reverse, n_hidden_units2, state_np_init2, scope='reverse')
    encoding = tf.add(encoding1, encoding2)
if normal:
    encoding = RNN(x, n_hidden_units2, state_np_init2, scope='normal')
    
with tf.name_scope("Encoding"):
    encoding = tf.layers.dense(encoding, 512)
#     encoding = tf.nn.l2_normalize(encoding, dim=1)

# d1 = tf.layers.dense(encoding, 256, activation=tf.nn.relu)
logits =  tf.layers.dense(encoding, n_classes)
prediction = tf.nn.softmax(logits) 

cost = tf.reduce_mean(tf.nn.softmax_cross_entropy_with_logits(logits=logits,labels=y))
train_op = tf.train.AdamOptimizer(l_r).minimize(cost)

correct_pred = tf.equal(tf.argmax(prediction,1),tf.argmax(y,1))
inter = tf.cast(correct_pred,tf.float32)
accuracy = tf.reduce_mean(inter)

#init session
sess = tf.Session()
#init all variables

#start training
saver = tf.train.Saver()

import datetime
run_folder = 'runs/' + 'tf13' + str(datetime.datetime.now())
writer = tf.summary.FileWriter(run_folder, sess.graph)

if not classification:
    saver.restore(sess, 'runs/60perc_acc_classification/class_ckpt-178')
    encoding = tf.nn.l2_normalize(encoding, dim=1)
    cost_id = deep_speaker_loss(None, encoding)
    train_op_id = tf.train.AdamOptimizer(lr).minimize(cost_id)
    sess.run(tf.global_variables_initializer())
    best_val_loss = 100
    train_loss = []
    all_val_loss = []
    patience = 10
    for i in range(training_iters):

        batch_x, batch_y = gen_train_ident.__next__()
        batch_x_pad = input_pad_new(batch_x)
        batch_x_pad_reshape = np.reshape(batch_x_pad, [3*batch_size, n_steps*n_inputs])
        d = {x: batch_x_pad_reshape, state_np_init2:np.zeros((3*batch_size, n_hidden_units2))}
        sess.run(train_op_id,feed_dict=d)
#         enc = sess.run(encoding,feed_dict=d)
#         print(enc)
        if i % 100 == 0:
            t_loss = sess.run(cost_id,feed_dict=d)
            print("Step " + str(i+1) + " Training loss: {}".format(t_loss))
            train_loss.append(t_loss)

        if i % steps_per_epoch == 0:

            val_loss = []
            for i in range(validation_steps):
                batch_x_val, batch_y_val = gen_val_ident.__next__()
                batch_x_pad_val = input_pad_new(batch_x_val)
                batch_x_pad_reshape_val = np.reshape(batch_x_pad_val, [3*batch_size, n_steps*n_inputs])
                d_v = {x: batch_x_pad_reshape_val, state_np_init2:np.zeros((3*batch_size, n_hidden_units2))}
                val_loss.append(sess.run(cost_id,feed_dict=d_v))

            t_loss = np.mean(train_loss)
            v_loss = np.mean(val_loss)
            all_val_loss.append(v_loss)
            print("Training loss: {}".format(t_loss))
            print("Validation loss: {}".format(v_loss))

            tf.summary.scalar('train_loss', t_loss)
            tf.summary.scalar('val_loss', v_loss)
            train_loss = []

            if v_loss < best_val_loss:
                best_val_loss = v_loss

                saver.save(sess, run_folder + '/mnist_ckpt', i)
                output_graph_def = tf.graph_util.convert_variables_to_constants(
                    sess, # The session is used to retrieve the weights
                    tf.get_default_graph().as_graph_def(), # The graph_def is used to retrieve the nodes 
                    ["Encoding/dense/BiasAdd"] # The output node names are used to select the usefull nodes
                    )

                with tf.gfile.GFile(run_folder + '/best_model.pb', "wb") as f:
                    f.write(output_graph_def.SerializeToString())

            if not any(all_val_loss[-patience:] >= best_val_loss):
                print('Done!')
                break
else:
    best_val_loss = 100
    patience = 10
    train_loss = []
    all_val_loss = []
    for i in range(training_iters):

        batch_x, batch_y = gen_train_class.__next__()
        batch_x_pad = input_pad_new(batch_x)
        batch_x_pad_reshape = np.reshape(batch_x_pad, [batch_size, n_steps*n_inputs])
        if stacked:
            d = {x: batch_x_pad_reshape, y: batch_y,
                                         state_np_init1:np.zeros((batch_size, n_hidden_units1)),
                                         state_np_init2:np.zeros((batch_size, n_hidden_units2))}
        else:
            d = {x: batch_x_pad_reshape, y: batch_y,
                                         state_np_init2:np.zeros((batch_size, n_hidden_units2))}
        sess.run(train_op,feed_dict=d)

        if i % 100 == 0:
            t_loss = sess.run(cost,feed_dict=d)
            print("Step " + str(i+1) + " Training loss: {}".format(t_loss))
            train_loss.append(t_loss)

        if i % steps_per_epoch == 0:

            val_loss = []
            val_acc = []
            for i in range(validation_steps):
                batch_x_val, batch_y_val = gen_val_class.__next__()
                batch_x_pad_val = input_pad_new(batch_x_val)
                batch_x_pad_reshape_val = np.reshape(batch_x_pad_val, [batch_size, n_steps*n_inputs])
                if stacked:
                    d_v = {x: batch_x_pad_reshape_val, y: batch_y_val,
                                             state_np_init1:np.zeros((batch_size, n_hidden_units1)),
                                             state_np_init2:np.zeros((batch_size, n_hidden_units2))}
                else: 
                    d_v = {x: batch_x_pad_reshape_val, y: batch_y_val,
                                             state_np_init2:np.zeros((batch_size, n_hidden_units2))}

                val_loss.append(sess.run(cost,feed_dict=d_v))
                val_acc.append(sess.run(accuracy,feed_dict=d_v))

            t_loss = np.mean(train_loss)
            v_loss = np.mean(val_loss)
            v_acc = np.mean(val_acc)
            all_val_loss.append(v_loss)
            print("Training loss: {}".format(t_loss))
            print("Validation loss: {}".format(v_loss))
            print("Validation accuracy: {}".format(v_acc))

            tf.summary.scalar('train_loss', t_loss)
            tf.summary.scalar('val_loss', v_loss)
            tf.summary.scalar('val_loss', v_acc)
            train_loss = []

            if v_loss < best_val_loss:
                best_val_loss = v_loss

                saver.save(sess, run_folder + '/class_ckpt', i)
                output_graph_def = tf.graph_util.convert_variables_to_constants(
                    sess, # The session is used to retrieve the weights
                    tf.get_default_graph().as_graph_def(), # The graph_def is used to retrieve the nodes 
                    ["Encoding/dense/BiasAdd"] # The output node names are used to select the usefull nodes
                    )

                with tf.gfile.GFile(run_folder + '/best_model.pb', "wb") as f:
                    f.write(output_graph_def.SerializeToString())

            if not any(all_val_loss[-patience:] >= best_val_loss):
                print('Done!')
                break

In [None]:
graph = tf.get_default_graph()

In [None]:
graph

In [None]:
from tensorflow.examples.tutorials.mnist import input_data
import tensorflow as tf
import numpy as np

tf.reset_default_graph()

n_time = 80
stacked = False
bidirectional = False
normal = True

def RNN(X, hidden_dim, state_np_init, scope):
    # split operation only support the shape[axis] with integer multiple of 16
    X_in = tf.split(X, n_time, 1)
    lstm_cell = tf.contrib.rnn.LSTMCell(hidden_dim)
    cell_state = tf.convert_to_tensor(state_np_init, dtype=tf.float32)
    hidden_state = tf.convert_to_tensor(state_np_init, dtype=tf.float32)
    state = tf.nn.rnn_cell.LSTMStateTuple(cell_state, hidden_state)
    
    outputs, states = tf.nn.static_rnn(lstm_cell, X_in, initial_state=state, dtype=tf.float32, scope=scope)
    
    if scope == 'normal':
        return outputs[-1]
    else:
        return outputs

def input_pad_new(x):
    return np.pad(x, ((0,0),(0,0),(0,13)), 'constant', constant_values=((0,0),(0,0),(0,0)))

# parameters init
l_r = 0.001
training_iters = 100000

# Huawei DDK V150 only support 16 times the n_inputs and n_steps
# so the input data of mnist dataset should pad to 32 pixels.
n_inputs = 48
n_steps = n_time
n_hidden_units1 = 48
n_hidden_units2 = 512
n_classes = num_classes

# define placeholder for input
x = tf.placeholder(tf.float32, [None, n_steps * n_inputs])
y = tf.placeholder(tf.float32, [None, n_classes])
# x_reverse = tf.reverse(x, axis=tf.constant([0]))
if stacked:
    state_np_init1 = tf.placeholder(tf.float32, [None, n_hidden_units1])
state_np_init2 = tf.placeholder(tf.float32, [None, n_hidden_units2])

# 1. Huawei DDK V150 can't support some operations that use for initialize the state.
# 2. to support variable input of num, so add state init as placeholder
# state_np_init = tf.placeholder(tf.float32, [None, n_hidden_units])

if stacked:
    encoding1 = RNN(x, 48, state_np_init1, scope='no')
    encoding1 = tf.stack(encoding1, axis=1)
    dim = tf.shape(encoding1)[0]
    encoding1 = tf.reshape(encoding1, [dim, n_steps*n_inputs])
    encoding = RNN(encoding1, 512, state_np_init2, scope='normal')
if bidirectional:
    encoding1 = RNN(x, n_hidden_units2, state_np_init2, scope='no')
    encoding2 = RNN(x_reverse, n_hidden_units2, state_np_init2, scope='reverse')
    encoding = tf.add(encoding1, encoding2)
if normal:
    encoding = RNN(x, n_hidden_units2, state_np_init2, scope='normal')
    
with tf.name_scope("Encoding"):
    encoding = tf.layers.dense(encoding, 512)
#     encoding = tf.nn.l2_normalize(encoding, dim=1)

# d1 = tf.layers.dense(encoding, 256, activation=tf.nn.relu)
logits =  tf.layers.dense(encoding, n_classes)
prediction = tf.nn.softmax(logits) 

cost = tf.reduce_mean(tf.nn.softmax_cross_entropy_with_logits(logits=logits,labels=y))
train_op = tf.train.AdamOptimizer(l_r).minimize(cost)

correct_pred = tf.equal(tf.argmax(prediction,1),tf.argmax(y,1))
inter = tf.cast(correct_pred,tf.float32)
accuracy = tf.reduce_mean(inter)

#init session
sess = tf.Session()
#init all variables
sess.run(tf.global_variables_initializer())
#start training
saver = tf.train.Saver()

import datetime
run_folder = 'runs/' + 'tf13' + str(datetime.datetime.now())
writer = tf.summary.FileWriter(run_folder, sess.graph)

best_val_loss = 100
patience = 10
all_val_loss = []
train_loss = []
for i in range(training_iters):
    
    batch_x, batch_y = gen_train_class.__next__()
    batch_x_pad = input_pad_new(batch_x)
    batch_x_pad_reshape = np.reshape(batch_x_pad, [batch_size, n_steps*n_inputs])
    if stacked:
        d = {x: batch_x_pad_reshape, y: batch_y,
                                     state_np_init1:np.zeros((batch_size, n_hidden_units1)),
                                     state_np_init2:np.zeros((batch_size, n_hidden_units2))}
    else:
        d = {x: batch_x_pad_reshape, y: batch_y,
                                     state_np_init2:np.zeros((batch_size, n_hidden_units2))}
    sess.run(train_op,feed_dict=d)
    
    if i % 100 == 0:
        t_loss = sess.run(cost,feed_dict=d)
        print("Step " + str(i+1) + " Training loss: {}".format(t_loss))
        train_loss.append(t_loss)
        
    if i % steps_per_epoch == 0:
        
        val_loss = []
        val_acc = []
        for i in range(validation_steps):
            batch_x_val, batch_y_val = gen_val_class.__next__()
            batch_x_pad_val = input_pad_new(batch_x_val)
            batch_x_pad_reshape_val = np.reshape(batch_x_pad_val, [batch_size, n_steps*n_inputs])
            if stacked:
                d_v = {x: batch_x_pad_reshape_val, y: batch_y_val,
                                         state_np_init1:np.zeros((batch_size, n_hidden_units1)),
                                         state_np_init2:np.zeros((batch_size, n_hidden_units2))}
            else: 
                d_v = {x: batch_x_pad_reshape_val, y: batch_y_val,
                                         state_np_init2:np.zeros((batch_size, n_hidden_units2))}
                
            val_loss.append(sess.run(cost,feed_dict=d_v))
            val_acc.append(sess.run(accuracy,feed_dict=d_v))
            
        t_loss = np.mean(train_loss)
        v_loss = np.mean(val_loss)
        v_acc = np.mean(val_acc)
        all_val_loss.append(v_loss)
        print("Training loss: {}".format(t_loss))
        print("Validation loss: {}".format(v_loss))
        print("Validation accuracy: {}".format(v_acc))
        
        tf.summary.scalar('train_loss', t_loss)
        tf.summary.scalar('val_loss', v_loss)
        tf.summary.scalar('val_loss', v_acc)
        train_loss = []
        
        if v_loss < best_val_loss:
            best_val_loss = v_loss
            
            saver.save(sess, 'out/mnist_ckpt', i)
            output_graph_def = tf.graph_util.convert_variables_to_constants(
                sess, # The session is used to retrieve the weights
                tf.get_default_graph().as_graph_def(), # The graph_def is used to retrieve the nodes 
                ["Encoding/dense/BiasAdd"] # The output node names are used to select the usefull nodes
            )

            with tf.gfile.GFile('out/Encoding_lstm_var_batch.pb', "wb") as f:
                f.write(output_graph_def.SerializeToString())
        
        if not any(all_val_loss[-patience:] >= best_val_loss):
            print('Done!')
            break

In [None]:
with tf.Session() as sess:
    
    ### create graph ###
    
    writer = tf.summary.FileWriter(run_folder, sess.graph)
    saver.save(sess, 'out/mnist_ckpt', i)
    output_graph_def = tf.graph_util.convert_variables_to_constants(
        sess,
        tf.get_default_graph().as_graph_def(), # graph_def
        ["Encoding/dense/BiasAdd"] # The output node names
    )

    with tf.gfile.GFile('out/Encoding_lstm_var_batch.pb', "wb") as f:
        f.write(output_graph_def.SerializeToString())

In [None]:
import tensorflow as tf

def load_graph(frozen_graph_filename):
    with tf.gfile.GFile(frozen_graph_filename, "rb") as f:
        graph_def = tf.GraphDef()
        graph_def.ParseFromString(f.read())

    with tf.Graph().as_default() as graph:
        tf.import_graph_def(
            graph_def, 
            input_map=None, 
            return_elements=None, 
            name="prefix", 
            op_dict=None, 
            producer_op_list=None
        )
    return graph

In [None]:
graph = load_graph('Encoding_lstm_var_batch_class.pb')

In [None]:
# get operation and tensor names for later reference
for op in graph.get_operations():
    print(op.name)
    print(op.values())

In [None]:
gen_train_ident = data_generator_identification(train_files, batch_size, steps_per_epoch, mode='train', model='LSTM')
gen_val_ident = data_generator_identification(val_files, batch_size, validation_steps, mode='val', model='LSTM')

In [None]:
import logging

def batch_cosine_similarity(x1, x2):
    # https://en.wikipedia.org/wiki/Cosine_similarity
    # 1 = equal direction ; -1 = opposite direction
    dot = K.squeeze(K.batch_dot(x1, x2, axes=1), axis=1)
    logging.info('dot: {}'.format(dot))
    # as values have have length 1, we don't need to divide by norm (as it is 1)
    return dot


def deep_speaker_loss(y_true, y_pred):
    logging.info('y_true={}'.format(y_true))
    logging.info('y_pred={}'.format(y_pred))
    # y_true.shape = (batch_size, embedding_size)
    # y_pred.shape = (batch_size, embedding_size)
    # CONVENTION: Input is:
    # concat(BATCH_SIZE * [ANCHOR, POSITIVE_EX, NEGATIVE_EX] * NUM_FRAMES)
    # EXAMPLE:
    # BATCH_NUM_TRIPLETS = 3, NUM_FRAMES = 2
    # _____________________________________________________
    # ANCHOR 1 (512,)
    # ANCHOR 2 (512,)
    # ANCHOR 3 (512,)
    # POS EX 1 (512,)
    # POS EX 2 (512,)
    # POS EX 3 (512,)
    # NEG EX 1 (512,)
    # NEG EX 2 (512,)
    # NEG EX 3 (512,)
    # _____________________________________________________

    logging.info('elements={}'.format(batch_size))
    anchor = y_pred[0:batch_size]
    positive_ex = y_pred[batch_size:2*batch_size]
    negative_ex = y_pred[2*batch_size:]
    
    logging.info('anchor={}'.format(anchor))
    logging.info('positive_ex={}'.format(positive_ex))
    logging.info('negative_ex={}'.format(negative_ex))

    sap = batch_cosine_similarity(anchor, positive_ex)
    logging.info('sap={}'.format(sap))
    san = batch_cosine_similarity(anchor, negative_ex)
    logging.info('san={}'.format(san))
    loss = K.maximum(san - sap + alpha, 0.0)
    logging.info('loss={}'.format(loss))
    # total_loss = K.sum(loss)
#     total_loss = K.mean(loss)
#     logging.info('total_loss={}'.format(total_loss))
    return tf.reduce_mean(loss)

In [None]:
graph2 = tf.get_default_graph()

In [None]:
lr = 0.001

# access the input and output nodes via tensor names
x = graph.get_tensor_by_name('prefix/Placeholder:0')
state = graph.get_tensor_by_name('prefix/Placeholder_2:0')
encoding = graph.get_tensor_by_name('prefix/Encoding/dense/BiasAdd:0')

with tf.Session(graph=graph):
    cost = deep_speaker_loss(None, encoding)
#     train_op = tf.train.AdamOptimizer(lr).minimize(cost)
    tf_saver = tf.train.Saver()
    tf_saver.restore(sess, 'out/mnist_ckpt-345')
    print(train_op)

In [None]:
from keras.callbacks import ModelCheckpoint, EarlyStopping, TensorBoard
import datetime

run_folder = 'runs/' + str(datetime.datetime.now())

os.mkdir(run_folder)

class_model.fit_generator(gen_train_class, 
                steps_per_epoch=steps_per_epoch, 
                epochs=epochs,
                validation_data=gen_val_class, 
                validation_steps=validation_steps,
                callbacks=[
                    ModelCheckpoint(run_folder + '/best_model.hdf5', save_best_only=True),
                    EarlyStopping(patience=10),
                    TensorBoard(log_dir=run_folder)
                ])

In [None]:
class_model.load_weights(run_folder + '/' + 'best_model.hdf5')

In [None]:
val_dat = np.zeros((10*batch_size, 80, 35, 1))
val_dat_t = np.zeros((10*batch_size, 165))
for i in range(10):
    d, t = gen_val_class.__next__()
    val_dat[i*batch_size:(i+1)*batch_size] = d
    val_dat_t[i*batch_size:(i+1)*batch_size] = t

In [None]:
pred = class_model.predict(val_dat)

In [None]:
am = np.argmax(pred, axis=1)

In [None]:
am

In [None]:
at = np.argmax(val_dat_t, axis=1)

In [None]:
(at == am).sum()/len(at)

In [None]:
triplet_model.save_weights('triplet_weights.h5')

In [None]:
triplet_model.load_weights('triplet_weights.h5')

In [None]:
def freeze_session(session, keep_var_names=None, output_names=None, clear_devices=True):
    graph = session.graph
    with graph.as_default():
        freeze_var_names = list(set(v.op.name for v in tf.global_variables()).difference(keep_var_names or []))
        output_names = output_names or []
        output_names += [v.op.name for v in tf.global_variables()]
        input_graph_def = graph.as_graph_def()
        if clear_devices:
            for node in input_graph_def.node:
                node.device = ""
        frozen_graph = convert_variables_to_constants(session, input_graph_def,
                                                      output_names, freeze_var_names)
        return frozen_graph

In [None]:
# freeze graph for storing it
import tensorflow as tf
from tensorflow.python.framework.graph_util import convert_variables_to_constants
frozen_graph = freeze_session(K.get_session(), output_names=[out.op.name for out in triplet_model.outputs])

In [None]:
# save graph as .pb
tf.train.write_graph(frozen_graph, "", "triplet_model.pb", as_text=False)