In [2]:
import numpy as np
import os
import sys
import keras
import keras.backend as K
from keras import Model, Sequential, layers
from keras.layers import Lambda
import tensorflow as tf
from keras.layers import Input, Dense, Embedding, Dot, Reshape, Add, Average, Concatenate, TimeDistributed, Bidirectional,LSTM, Multiply


Using TensorFlow backend.


In [6]:
num_domains = 4 

domain_dims = [10,20,25,30]
input_emb_dim = 256
lstm_dim = 32
ctx_dim_1 = 32
interaction_dim = 32
num_neg_samples = 10
domain_emb_wt = [
    np.random.random([domain_dims[i],input_emb_dim])
    for i in range(num_domains)
]

RUN_MODE = 'train'

In [127]:
def get_model(
    num_domains,
    domain_dims,
    domain_emb_wt,
    lstm_dim,
    interaction_layer_dim,
    context_dim,
    num_neg_samples = 10,
    RUN_MODE = 'train'    
    ):
    
    ctx_dim_1 = context_dim
    interaction_dim = interaction_layer_dim
    input_emb_dim = domain_emb_wt[0].shape[-1]
    n_timesteps = num_domains + 2
    
    
    def tf_stack(x):
        import tensorflow as tf
        import keras.backend as K

        x1 = tf.stack(
                x,
                axis = 1
        )
        return x1


    def tf_squeeze(x):
        import tensorflow as tf

        x1 = tf.squeeze(
                x,
                axis = 1
        )
        return x1

    def tf_split_dplus2(x):
        global n_timesteps
        return tf.split(
                x,
                num_or_size_splits=n_timesteps,
                axis=1
            )

    def tf_reduce_sum(x):
        import tensorflow as tf
        return tf.math.reduce_sum(
            x,
            keepdims=False,
            axis=1
        )

    def split_squeeze_numDomains(x):
        import tensorflow as tf
        import keras.backend as K
        global num_domains
        x1 = tf.split(
                x,
                num_or_size_splits = num_domains,
                axis = 1
        )
        x2 = [tf.squeeze(_x2,axis=1) for _x2 in x1]
        return x2

    def tf_reduce_mean_kdims_axis1(x):
        import tensorflow as tf
        return tf.math.reduce_mean(
                x,
                axis=1,
                keepdims=True )

    def tf_split_squeeze_axis1(x):
        import tensorflow as tf
        x1 = tf.split(
                x,
                num_or_size_splits = x.shape[1],
                axis = 1
        )
        x2 = [tf.squeeze(_x2,axis=1) for _x2 in x1]
        return x2

    def tf_reciprocal(x):
        import tensorflow as tf
        epsilon = .000001
        return tf.math.reciprocal(
             x + epsilon
        )

    def tf_sigmoid(x):
        import tensorflow as tf
        return tf.sigmoid(
             x 
        )
    
            
    # ================= Define the weights ===================== #

    BD_LSTM_layer = Bidirectional(
        LSTM(
            units=lstm_dim, 
            return_sequences=True
        ), 
        input_shape = (n_timesteps, input_emb_dim), 
        merge_mode = None
    )

    # Embedding layer for each domain
    list_Entity_Embed = [
        Embedding(
            input_dim = domain_dims[i], 
            output_dim = input_emb_dim, 
            embeddings_initializer =  keras.initializers.Constant(value = domain_emb_wt[i]),
            name = 'entity_embedding_'+str(i)
        ) for i in range(num_domains)
    ]


    # Dense layer for getting the Context vectors
    list_FNN_1 = [ Dense( ctx_dim_1 ,activation = 'relu', use_bias=True) for i in range(1,n_timesteps-1) ]
    list_FNN_2 = [ Dense( interaction_dim ,activation = 'relu') for i in range(1,n_timesteps-1) ]
    # Dense layer for transforming the input vectors
    xform_Inp_FNN = [Dense( interaction_dim ,activation = None,use_bias=True) for i in range(num_domains)]

    # ========================================================= #

    def process(input_indices, _type = 'pos'):
        # Split the inputs
        split_input = Lambda(tf_split_squeeze_axis1)(input_indices)   
        split_emb = []
        for i in range(num_domains):
            split_emb.append(list_Entity_Embed[i](split_input[i]))

        split_emb = [Lambda(tf_squeeze)(_) for _ in split_emb]
        # input embedding now has shape [ ?, num_domains, 256]
        input_emb = Lambda(tf_stack)(split_emb)

        mean_layer_op = Lambda(tf_reduce_mean_kdims_axis1)(input_emb)
        concat_layer = Concatenate(axis=1)(
            [mean_layer_op,input_emb,mean_layer_op]
        )
        n_timesteps = num_domains + 2
        bd_lstm = BD_LSTM_layer(concat_layer)

        # =========== #
        bd_lstm_fwd = bd_lstm[0]
        bd_lstm_bck = bd_lstm[1]
        # =========== #

        split_BL_F_op = Lambda( tf_split_dplus2)(bd_lstm_fwd)
        split_BL_B_op = Lambda( tf_split_dplus2)(bd_lstm_bck)
        split_BL_B_op = [Lambda( tf_squeeze )(_) for _ in split_BL_B_op]
        split_BL_F_op = [Lambda( tf_squeeze )(_) for _ in split_BL_F_op]
        print( 'After Bi-Directional LSTM Cur shape :', len(split_BL_B_op), split_BL_B_op[0].shape)

        # ----------- #
        # Context vector
        # ----------- #

        ctx_output = []
        for i in range(1,n_timesteps-1):
            _left = split_BL_F_op[i-1]
            _right = split_BL_B_op[i-1]

            # Context vector
            ctx_concat = Concatenate(axis=-1)([_left,_right])
            ctx_mlp_layer1 = list_FNN_1[i-1](ctx_concat)
            ctx_mlp_layer2 = list_FNN_2[i-1](ctx_mlp_layer1)
            ctx_output.append(ctx_mlp_layer2)

        print( ' Cur shape [Context vector]:', ctx_output[0].shape)
        # ============
        # Final output as sigmoid ( sum of Dot products between context vector and modified input)
        # ============

        # =--------
        # Calculate interaction of the context vector with input  
        # =-------- 
        input_layer_split = Lambda(split_squeeze_numDomains )(input_emb)
        interaction_layer_input = [ 
            xform_Inp_FNN[i](input_layer_split[i]) 
            for i in range(num_domains) 
        ]
        # Do dot product
        dot_product = [ Dot(axes=-1)(
            [interaction_layer_input[i],ctx_output[i]]
        ) for i in range(num_domains)]

        stacked_dot_op = Lambda(tf_stack)(dot_product)

        if _type == 'neg':
            stacked_dot_op = Lambda(tf_reciprocal)(stacked_dot_op)
            final_op = Lambda(tf_reduce_sum)(stacked_dot_op)
        else:
            final_op = Lambda(tf_reduce_sum)(stacked_dot_op)
        final_op = Lambda(tf_sigmoid)(final_op)
        return final_op

    

    if RUN_MODE == 'train':
        
        # ========= TRAIN mode =========== #
        print( 'Run mode ',RUN_MODE )
        pos_input = Input(shape=(num_domains,1))
        pos_op =  process(pos_input, _type = 'pos')
        
        neg_input = Input(shape=(num_neg_samples,num_domains,1),name='negative_samples')
        neg_input_list = Lambda( tf_split_squeeze_axis1) (neg_input)
        neg_ops = []

        for n_sample in neg_input_list:
            n_res = process(n_sample, _type = 'pos')
            neg_ops.append(n_res)

        final_pred = Lambda(tf_stack)(neg_ops)
        final_pred = Lambda(
            lambda x:
            tf.math.reduce_mean(x,axis=1,keepdims=False)
        )(final_pred)

        final_pred = Add()([final_pred, pos_op])
        inputs = [pos_input,neg_input]
        outputs = final_pred
        model = Model(
                inputs=inputs,
                outputs=outputs
        )
        # ====== Fix embedding weights ======= #
        for l in model.layers:
            if 'entity_embedding_' in l.name:
                l.trainable = False
    
    else:
        # ========= TEST mode =========== #
        pos_input = Input(shape=(num_domains,1))
        print( 'Run mode ',RUN_MODE )
        final_pred =  process(pos_input, _type = 'pos')
        inputs = pos_input
        outputs = final_pred

        # ==================================== #
        model = Model(
                inputs=inputs,
                outputs=outputs
        )
        
        model.load_weights("model.h5")
        for l in model.layers:
            l.trainable = False
   
    # ============== Custom Loss function ==================== #
    # Maximize the objective , Minimize -(predicted_val)
    # ======================================================== #
    def custom_loss(y_true,y_pred):
        return -y_pred
    
    optimizer = keras.optimizers.Adagrad()
    model.compile(optimizer, loss=custom_loss)
    
    return model

In [128]:
def save_model(model):
    model_json = model.to_json()
    with open("model.json", "w") as json_file:
        json_file.write(model_json)
    # serialize weights to HDF5
    model.save_weights("model.h5")
    print("Saved model to disk")

In [129]:
# =====================
# Input : shape [[?, 4, 1],[?,10,4,1]] 

sizes = [10,20,25,30]
pos = []
neg = []
for i in range(1000):
    arr = [np.random.randint(
        low=0,
        high=j,
        size=1
    ) for j in sizes]
    pos.append(arr)
        
    n = []
    for k in range(10):
        arr = [np.random.randint(
        low=0,
        high=j,size=1
        ) for j in sizes]
        n.append(arr)
   
    neg.append(n)

pos = np.array(pos)
neg = np.array(neg)


In [130]:
num_domains = 4 
domain_dims = [10,20,25,30]
input_emb_dim = 256
lstm_dim = 32
ctx_dim_1 = 32
interaction_dim = 32
num_neg_samples = 10
domain_emb_wt = [
    np.random.random([domain_dims[i],input_emb_dim])
    for i in range(num_domains)
]

RUN_MODE = 'train'
model_obj=get_model(
    num_domains=4,
    domain_dims=domain_dims,
    domain_emb_wt=domain_emb_wt,
    lstm_dim=32,
    interaction_layer_dim=32,
    context_dim=32,
    num_neg_samples = 10,
    RUN_MODE = 'train'    
)

save_model(model_obj)

Run mode  train
After Bi-Directional LSTM Cur shape : 6 (?, 32)
 Cur shape [Context vector]: (?, 32)
After Bi-Directional LSTM Cur shape : 6 (?, 32)
 Cur shape [Context vector]: (?, 32)
After Bi-Directional LSTM Cur shape : 6 (?, 32)
 Cur shape [Context vector]: (?, 32)
After Bi-Directional LSTM Cur shape : 6 (?, 32)
 Cur shape [Context vector]: (?, 32)
After Bi-Directional LSTM Cur shape : 6 (?, 32)
 Cur shape [Context vector]: (?, 32)
After Bi-Directional LSTM Cur shape : 6 (?, 32)
 Cur shape [Context vector]: (?, 32)
After Bi-Directional LSTM Cur shape : 6 (?, 32)
 Cur shape [Context vector]: (?, 32)
After Bi-Directional LSTM Cur shape : 6 (?, 32)
 Cur shape [Context vector]: (?, 32)
After Bi-Directional LSTM Cur shape : 6 (?, 32)
 Cur shape [Context vector]: (?, 32)
After Bi-Directional LSTM Cur shape : 6 (?, 32)
 Cur shape [Context vector]: (?, 32)
After Bi-Directional LSTM Cur shape : 6 (?, 32)
 Cur shape [Context vector]: (?, 32)
Saved model to disk


In [131]:
model_saved = get_model(
    num_domains=4,
    domain_dims=domain_dims,
    domain_emb_wt=domain_emb_wt,
    lstm_dim=32,
    interaction_layer_dim=32,
    context_dim=32,
    num_neg_samples = 10,
    RUN_MODE = 'test'    
)

Run mode  test
After Bi-Directional LSTM Cur shape : 6 (?, 32)
 Cur shape [Context vector]: (?, 32)


In [96]:
# train_model(model, inputs, outputs ):
def model_train(
    model,
    pos_x,
    neg_x,
    batch_size=512,
    num_epochs=100
):
    num_samples = pos_x.shape[0]
    y = [None] * num_samples
    model.fit(
        [pos_x,neg_x],
        y,
        batch_size = batch_size, 
        epochs = num_epochs
    )
    return model

    
    

Epoch 1/1


<keras.callbacks.History at 0x7fe35cd2e6d0>

In [132]:
model_saved.summary()


__________________________________________________________________________________________________
Layer (type)                    Output Shape         Param #     Connected to                     
input_24 (InputLayer)           (None, 4, 1)         0                                            
__________________________________________________________________________________________________
lambda_3122 (Lambda)            [(None, 1), (None, 1 0           input_24[0][0]                   
__________________________________________________________________________________________________
entity_embedding_0 (Embedding)  (None, 1, 256)       2560        lambda_3122[0][0]                
__________________________________________________________________________________________________
entity_embedding_1 (Embedding)  (None, 1, 256)       5120        lambda_3122[0][1]                
__________________________________________________________________________________________________
entity_emb

In [None]:
save_model(model)