In [9]:
import os
import re
import sys
import numpy as np
import pandas as pd

from time import sleep
from keras import backend as K
from keras.models import Model 
from keras.models import Sequential as SequentialModel
from keras.layers import Dense, Conv1D, LSTM, Dropout, Embedding, Layer, Input, Flatten, concatenate as Concatenate, Lambda
from keras.callbacks import Callback
from keras.utils import to_categorical
from keras.preprocessing.text import Tokenizer as KerasTokenizer

%load_ext autoreload
%autoreload 2

sys.path.insert(0, '../ct')

import load
from preprocess import preprocess
from preprocess import Tokenizer
from preprocess.preprocess import separator_samples

from model.layers import LayerNormalization
from model.layers import ContentBasedAttention_CT
from model.layers import ScaledDotProductAttention
from model.layers import MultiHeadAttention

The autoreload extension is already loaded. To reload it, use:
  %reload_ext autoreload


In [10]:
train_data = pd.read_pickle('../data/processed/spooky-author/train.pkl')

x_train = np.array(train_data.x.tolist())
y_train = np.array(train_data.y.tolist())

_x_train = np.zeros((x_train.shape[0], 128))  # samples, d_model
_x_train[:,:x_train.shape[1]] = x_train
x_train = _x_train

# Test output and input shape of a layer

**Custom Layer for understanding keras input-/output shapes**

In [3]:
class DummyLayer(LSTM):
    def __init__(self, units, **kwargs):
        super().__init__(units, **kwargs)
    
    def build(self, input_shape):
        print(f'build:   input_shape={input_shape}')
        return super().build(input_shape)
    
    def call(self, x, **kwargs):
        y = super().call(x, **kwargs)
        print(f'call:    input_shape={x.shape}, output_shape={y.shape}')
        sleep(1)
        return y
        
    
    def compute_output_shape(self, input_shape):
        output_shape = super().compute_output_shape(input_shape)
        print(f'compute: input_shape={input_shape}, output_shape={output_shape}')
        return output_shape

In [4]:
def get_model():
    model = SequentialModel()
    model.add(Embedding(input_dim=15000, output_dim=200))
    model.add(LSTM(units=128, dropout=0.2, recurrent_dropout=0.15, return_sequences=True))
    model.add(DummyLayer(units=200))
    model.add(Dense(units=100))
    model.add(Dense(3, activation='softmax'))

    model.compile(optimizer='Adam',
                  loss='categorical_crossentropy',
                  metrics=['accuracy'])
    return model

In [5]:
model = get_model()

build:   input_shape=(None, None, 128)
call:    input_shape=(None, None, 128), output_shape=(None, 200)
compute: input_shape=(None, None, 128), output_shape=(None, 200)


In [68]:
model.fit(x_train, 
          y_train,
          epochs=4,
          batch_size=32,
          validation_split=0.3)

  "Converting sparse IndexedSlices to a dense Tensor of unknown shape. "


Train on 13705 samples, validate on 5874 samples
Epoch 1/4
Epoch 2/4
Epoch 3/4
Epoch 4/4


<keras.callbacks.callbacks.History at 0x1668c0bcf98>

# Expected Input/Output:

```
# LSTM-output as input
build:   input_shape=(None, None, 128)
call:    input_shape=(None, None, 128), output_shape=(None, 200)
compute: input_shape=(None, None, 128), output_shape=(None, 200)
```

# Custom Layers included in model:

## Layer Normalization

In [26]:
def get_model():
    model = SequentialModel()
    model.add(Embedding(input_dim=15000, output_dim=200))
    model.add(LSTM(units=128, dropout=0.2, recurrent_dropout=0.15, return_sequences=False))
    model.add(Dense(units=100))
    model.add(LayerNormalization(units=100))
    # model.add(Dense(units=100))
    model.add(Dense(3, activation='softmax'))

    model.compile(optimizer='Adam',
                  loss='categorical_crossentropy',
                  metrics=['accuracy'])
    return model

In [7]:
model = get_model()

In [None]:
model.fit(x_train, 
          y_train,
          epochs=8,
          batch_size=32,
          validation_split=0.3)

In [89]:
model.summary()

Model: "sequential_23"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
embedding_22 (Embedding)     (None, None, 200)         3000000   
_________________________________________________________________
lstm_27 (LSTM)               (None, 128)               168448    
_________________________________________________________________
dense_31 (Dense)             (None, 100)               12900     
_________________________________________________________________
layer_normalization_5 (Layer (None, 100)               0         
_________________________________________________________________
dense_32 (Dense)             (None, 3)                 303       
Total params: 3,181,651
Trainable params: 3,181,651
Non-trainable params: 0
_________________________________________________________________


## Scaled Dot Product Attention

In [3]:
def get_model():
    # x = Input()
    model = SequentialModel()
    model.add(Embedding(input_dim=15000, output_dim=128))
    # model.add(LSTM(units=128, dropout=0.2, recurrent_dropout=0.15, return_sequences=True))
    model.add(ScaledDotProductAttention(d_model=128, d_k=16, d_v=128))
    model.add(Flatten())
    model.add(Dense(units=100, name='hidden_0'))
    model.add(LayerNormalization(units=100))
    model.add(Dense(3, activation='softmax', name='output_layer'))

    model.compile(optimizer='Adam',
                  loss='categorical_crossentropy',
                  metrics=['accuracy'])
    return model


model = get_model()

#### INIT <class 'model.layers.attention.ScaledDotProductAttention'> ####
#### BUILD <class 'model.layers.attention.ScaledDotProductAttention'> ####
#### CALL <class 'model.layers.attention.ScaledDotProductAttention'> ####
    q=(None, None, 16),
    k=(None, None, 16),
    v=(None, None, 128)
    k_T=(None, 16, None)
    y=(None, None, 128)
#### COMPUTE OUTPUT SHAPE ####
(None, 128, 128)


In [4]:
model.summary()

Model: "sequential_1"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
embedding_1 (Embedding)      (None, None, 128)         1920000   
_________________________________________________________________
scaled_dot_product_attention (None, 128, 128)          20480     
_________________________________________________________________
flatten_1 (Flatten)          (None, 16384)             0         
_________________________________________________________________
hidden_0 (Dense)             (None, 100)               1638500   
_________________________________________________________________
layer_normalization_1 (Layer (None, 100)               0         
_________________________________________________________________
output_layer (Dense)         (None, 3)                 303       
Total params: 3,579,283
Trainable params: 3,579,283
Non-trainable params: 0
____________________________________________

In [5]:
model.fit(x_train, 
          y_train,
          epochs=8,
          batch_size=32,
          validation_split=0.3)

  "Converting sparse IndexedSlices to a dense Tensor of unknown shape. "


Train on 13705 samples, validate on 5874 samples
Epoch 1/8
Epoch 2/8
Epoch 3/8
Epoch 4/8
Epoch 5/8
Epoch 6/8
Epoch 7/8
Epoch 8/8


<keras.callbacks.callbacks.History at 0x25871ae27f0>

## Multi-Head Attention

In [6]:
def get_model(number_of_heads=2):
    input_layer = Input(shape=(128,))

    _0_x = Embedding(input_dim=15000, output_dim=128)(input_layer)

    _1_s = [ScaledDotProductAttention(d_model=128, d_k=16, d_v=128)(_0_x) for _ in range(number_of_heads)]
    _1_m = MultiHeadAttention(d_heads=2, d_model=128, d_k=16, d_v=128)(_1_s)

    _2_f = Flatten()(_1_m)
    _2_h_0 = Dense(units=10, name='hidden_0')(_2_f)
    _2_hL_0 = LayerNormalization(units=10)(_2_h_0)

    output_layer = Dense(3, activation='softmax', name='output_layer')(_2_hL_0)

    model = Model(inputs=[input_layer],
                  outputs=[output_layer])
    model.compile(optimizer='Adam',
                  loss='categorical_crossentropy',
                  metrics=['accuracy'])

    return model

model = get_model()

#### INIT [ScaledDotProductAttention] ####
#### BUILD [ScaledDotProductAttention] ####
#### CALL [ScaledDotProductAttention] ####
    q=(None, 128, 16),
    k=(None, 128, 16),
    v=(None, 128, 128)
    k_T=(None, 16, 128)
    y=(None, 128, 128)
#### COMPUTE OUTPUT SHAPE ####
(None, 128, 128)
#### INIT [ScaledDotProductAttention] ####
#### BUILD [ScaledDotProductAttention] ####
#### CALL [ScaledDotProductAttention] ####
    q=(None, 128, 16),
    k=(None, 128, 16),
    v=(None, 128, 128)
    k_T=(None, 16, 128)
    y=(None, 128, 128)
#### COMPUTE OUTPUT SHAPE ####
(None, 128, 128)


In [36]:
model.summary()

Model: "model_4"
__________________________________________________________________________________________________
Layer (type)                    Output Shape         Param #     Connected to                     
input_12 (InputLayer)           (None, 128)          0                                            
__________________________________________________________________________________________________
embedding_13 (Embedding)        (None, 128, 128)     1920000     input_12[0][0]                   
__________________________________________________________________________________________________
scaled_dot_product_attention_20 (None, 128, 128)     20480       embedding_13[0][0]               
__________________________________________________________________________________________________
scaled_dot_product_attention_21 (None, 128, 128)     20480       embedding_13[0][0]               
____________________________________________________________________________________________

In [37]:
model.fit(x_train, 
          y_train,
          epochs=8,
          batch_size=32,
          validation_split=0.3)

  "Converting sparse IndexedSlices to a dense Tensor of unknown shape. "


Train on 13705 samples, validate on 5874 samples
Epoch 1/8
Epoch 2/8
Epoch 3/8
Epoch 4/8
Epoch 5/8
Epoch 6/8
Epoch 7/8
Epoch 8/8


<keras.callbacks.callbacks.History at 0x2580641ac18>

# Subclassing Model

In [3]:
class MHAModel(Model):
    def __init__(self, *args, **kwargs):
        input_layer = Input(shape=(128,))

        _0_x = Embedding(input_dim=15000, output_dim=128)(input_layer)

        _1_s = [ScaledDotProductAttention(d_model=128, d_k=16, d_v=128)(_0_x) for _ in range(2)]
        _1_m = MultiHeadAttention(d_heads=2, d_model=128, d_k=16, d_v=128)(_1_s)

        _2_f = Flatten()(_1_m)
        _2_h_0 = Dense(units=10, name='hidden_0')(_2_f)
        _2_hL_0 = LayerNormalization(units=10)(_2_h_0)
        
        output_layer = Dense(3, activation='softmax', name='output_layer')(_2_hL_0)
    
        super().__init__(inputs=[input_layer], 
                         outputs=[output_layer], 
                         *args, 
                         **kwargs)

def get_model():
    model = MHAModel(name='Easy keras, EASY.')
    return model


model = get_model()
model.compile(optimizer='Adam',
              loss='categorical_crossentropy',
              metrics=['accuracy'])

#### INIT [ScaledDotProductAttention] ####
#### BUILD [ScaledDotProductAttention] ####
#### CALL [ScaledDotProductAttention] ####
    q=(None, 128, 16),
    k=(None, 128, 16),
    v=(None, 128, 128)
    k_T=(None, 16, 128)
    y=(None, 128, 128)
#### COMPUTE OUTPUT SHAPE ####
(None, 128, 128)
#### INIT [ScaledDotProductAttention] ####
#### BUILD [ScaledDotProductAttention] ####
#### CALL [ScaledDotProductAttention] ####
    q=(None, 128, 16),
    k=(None, 128, 16),
    v=(None, 128, 128)
    k_T=(None, 16, 128)
    y=(None, 128, 128)
#### COMPUTE OUTPUT SHAPE ####
(None, 128, 128)


In [4]:
model.summary()

Model: "Easy keras, EASY."
__________________________________________________________________________________________________
Layer (type)                    Output Shape         Param #     Connected to                     
input_1 (InputLayer)            (None, 128)          0                                            
__________________________________________________________________________________________________
embedding_1 (Embedding)         (None, 128, 128)     1920000     input_1[0][0]                    
__________________________________________________________________________________________________
scaled_dot_product_attention_1  (None, 128, 128)     20480       embedding_1[0][0]                
__________________________________________________________________________________________________
scaled_dot_product_attention_2  (None, 128, 128)     20480       embedding_1[0][0]                
__________________________________________________________________________________

# Model within a Model

In [150]:
class ModelCeption(Model):
    def __init__(self, *args, **kwargs):
        
        # Model
        input_layer = Input(shape=(128,))

        _0_x = Embedding(input_dim=15000, output_dim=128)

        _1_s = [ScaledDotProductAttention(d_model=128, d_k=16, d_v=128) for _ in range(2)]
        _1_m = MultiHeadAttention(d_heads=2, d_model=128, d_k=16, d_v=128)

        _2_f = Flatten()
        _2_h_0 = Dense(units=10, name='hidden_0')
        _2_hL_0 = LayerNormalization(units=10)
        
        output_layer = Dense(3, activation='softmax', name='output_layer')
    
        
        _0_x_tensor = _0_x(input_layer)
        _1_s_tensor = [head(_0_x_tensor) for head in _1_s]
        _1_m_tensor = _1_m(_1_s_tensor)
        _2_f_tensor = _2_f(_1_m_tensor)
        _2_h_0_tensor = _2_h_0(_2_f_tensor)
        _2_hL_0_tensor = _2_hL_0(_2_h_0_tensor)
        output_tensor = output_layer(_2_hL_0_tensor)
    
        super().__init__(inputs=input_layer,
                         outputs=output_tensor,
                         *args, 
                         **kwargs)
        self.inner_model = {'inner_model': InnerModel(),
                            'inner_model_input': _2_f}
        self.input_layer = input_layer
        self._0_x = _0_x
        self._1_s = _1_s
        self._1_m = _1_m
        self._2_f = _2_f
        self._2_h_0 = _2_h_0
        self._2_hL_0 = _2_hL_0
        self.output_layer = output_layer
        
    def __call__(self, *args, **kwargs):
        print('CALLLLLLLLLLLLLLLLLLLLLLLLLLLLLLLL')
        super().__call__(*args, **kwargs)
        
#     def fit():
#         pass

#     def train_on_batch:
#         pass
    
    def call(self, inputs, *args, **kwargs):
        print()
        print('INPUTS')
        print(inputs)
        # sleep(2)
        exit(-1)
        super().call(inputs, *args, **kwargs)
    
    def compile(self, *args, **kwargs):
        super().compile(*args, **kwargs)
        self.inner_model['inner_model'].compile(optimizer='Adam',
                                                loss='categorical_crossentropy',
                                                metrics=['accuracy'])


class InnerModel(Model):
    def __init__(self, *args, **kwargs):
        _inputs = Input((16384,))
        # z = Lambda(lambda x: K.stop_gradient(x))(_inputs)
        z = Dense(units=10, name='inner_0')(_inputs)
        z = LayerNormalization(units=10)(z)
        
        output_layer = Dense(3, activation='softmax', name='inner_output')(z)
        
        super().__init__(*args,
                         inputs=_inputs,
                         outputs=output_layer,
                         **kwargs)

class TrainInnerModelCallback(Callback):
    def __init__(self, inner_model, pseudo_input,*args, **kwags):
        self.inner_model = inner_model
        self.pseudo_input = pseudo_input
        super().__init__()

    def on_batch_end(self, batch, logs=None):
        print()
        print(self.pseudo_input.output)
        sleep(2)
        
        
    def on_epoch_end(self, epoch, logs=None):
        pass
    
    
        
def get_model():
    model = ModelCeption(name='ModelCeption')
    return model


model = get_model()
model.compile(optimizer='Adam',
              loss=['categorical_crossentropy'],
              metrics=['accuracy']
             )

#### INIT [ScaledDotProductAttention] ####
#### INIT [ScaledDotProductAttention] ####
#### BUILD [ScaledDotProductAttention] ####
#### CALL [ScaledDotProductAttention] ####
    q=(None, 128, 16),
    k=(None, 128, 16),
    v=(None, 128, 128)
    k_T=(None, 16, 128)
    y=(None, 128, 128)
#### COMPUTE OUTPUT SHAPE ####
(None, 128, 128)
#### BUILD [ScaledDotProductAttention] ####
#### CALL [ScaledDotProductAttention] ####
    q=(None, 128, 16),
    k=(None, 128, 16),
    v=(None, 128, 128)
    k_T=(None, 16, 128)
    y=(None, 128, 128)
#### COMPUTE OUTPUT SHAPE ####
(None, 128, 128)


In [143]:
model.summary()

Model: "ModelCeption"
__________________________________________________________________________________________________
Layer (type)                    Output Shape         Param #     Connected to                     
input_86 (InputLayer)           (None, 128)          0                                            
__________________________________________________________________________________________________
embedding_50 (Embedding)        (None, 128, 128)     1920000     input_86[0][0]                   
__________________________________________________________________________________________________
scaled_dot_product_attention_99 (None, 128, 128)     20480       embedding_50[0][0]               
__________________________________________________________________________________________________
scaled_dot_product_attention_10 (None, 128, 128)     20480       embedding_50[0][0]               
_______________________________________________________________________________________

In [None]:
callback = TrainInnerModelCallback(inner_model=model.inner_model['inner_model'], 
                                   pseudo_input=model._2_f)

model.fit(x_train, 
          y_train,
          epochs=8,
          batch_size=32,
          validation_split=0.3,
          callbacks=[callback])

In [132]:
model.train_function

<tensorflow.python.keras.backend.EagerExecutionFunction at 0x2b20ba7d208>

In [152]:
x_t = x_train[:int(0.7*len(x_train))]
y_t = y_train[:int(0.7*len(x_train))]

x_v = x_train[int(0.7*len(x_train)):]
y_v = y_train[int(0.7*len(x_train)):]

output = []
for e in range(5):
    print(f'epoch: {e}')
    epoch_output = []
    for i in range(0, len(x_t), 16):
        batch_output = model.train_on_batch(x_t[i: i+16], y_t[i: i+16])
        epoch_output.append(batch_output)
    output.append(epoch_output)

epoch: 0
epoch: 1
epoch: 2
epoch: 3
epoch: 4


In [153]:
model.evaluate(x_v, y_v, batch_size=16)



[0.5742112807868077, 0.7788559794425964]

In [192]:
input_layer = Input(shape=(128,))
_0_x = Embedding(input_dim=15000, output_dim=128)(input_layer)
_1_s = [ScaledDotProductAttention(d_model=128, d_k=16, d_v=128)(_0_x) for _ in range(2)]
_1_m = MultiHeadAttention(d_heads=2, d_model=128, d_k=16, d_v=128)(_1_s)
_2_f = Flatten(name='flattened_intermediate_layer')(_1_m)
_2_h_0 = Dense(units=10, name='hidden_0')(_2_f)
_2_hL_0 = LayerNormalization(units=10)(_2_h_0)
output_layer = Dense(3, activation='softmax', name='output_layer')(_2_hL_0)

model = Model(inputs=[input_layer], outputs=[output_layer])

model.compile(optimizer='Adam',
              loss='categorical_crossentropy',
              metrics=['accuracy'])


#### INIT [ScaledDotProductAttention] ####
#### BUILD [ScaledDotProductAttention] ####
#### CALL [ScaledDotProductAttention] ####
    q=(None, 128, 16),
    k=(None, 128, 16),
    v=(None, 128, 128)
    k_T=(None, 16, 128)
    y=(None, 128, 128)
#### COMPUTE OUTPUT SHAPE ####
(None, 128, 128)
#### INIT [ScaledDotProductAttention] ####
#### BUILD [ScaledDotProductAttention] ####
#### CALL [ScaledDotProductAttention] ####
    q=(None, 128, 16),
    k=(None, 128, 16),
    v=(None, 128, 128)
    k_T=(None, 16, 128)
    y=(None, 128, 128)
#### COMPUTE OUTPUT SHAPE ####
(None, 128, 128)


In [200]:
batch = x_train[:16]
batch_labels = y_train[:16]

flatten_output_function = K.function([model.input], _2_f)
flatten_output = flatten_output_function(batch)

In [197]:
flatten_output

array([[ 0.00475288,  0.00809301,  0.00449178, ...,  0.00615292,
        -0.00346607,  0.00294867],
       [ 0.00431834,  0.00817334,  0.00445237, ...,  0.00606779,
        -0.00345981,  0.00317416],
       [ 0.00489997,  0.00777089,  0.00452916, ...,  0.00555791,
        -0.00330961,  0.00295466],
       ...,
       [ 0.00379726,  0.00619949,  0.0033668 , ...,  0.00474489,
        -0.0024537 ,  0.00245047],
       [ 0.00449217,  0.00723123,  0.00419872, ...,  0.00574301,
        -0.00319548,  0.00289049],
       [ 0.00438056,  0.00737246,  0.00391611, ...,  0.00511611,
        -0.00302   ,  0.00243112]], dtype=float32)

In [198]:
flatten_output.shape

(16, 16384)

In [199]:
inner = InnerModel()
inner.compile(optimizer='Adam',
              loss='categorical_crossentropy',
              metrics=['accuracy'])

In [202]:
inner.train_on_batch(x=flatten_output,
                     y=batch_labels)

[2.5845277, 0.1875]

In [207]:
# Model
input_layer = Input(shape=(128,))
_0_x = Embedding(input_dim=15000, output_dim=128)
_1_s = [ScaledDotProductAttention(d_model=128, d_k=16, d_v=128) for _ in range(2)]
_1_m = MultiHeadAttention(d_heads=2, d_model=128, d_k=16, d_v=128)
_2_f = Flatten()
_2_h_0 = Dense(units=10, name='hidden_0')
_2_hL_0 = LayerNormalization(units=10)
output_layer = Dense(3, activation='softmax', name='output_layer')

_0_x_tensor = _0_x(input_layer)
_1_s_tensor = [head(_0_x_tensor) for head in _1_s]
_1_m_tensor = _1_m(_1_s_tensor)
_2_f_tensor = _2_f(_1_m_tensor)
_2_h_0_tensor = _2_h_0(_2_f_tensor)
_2_hL_0_tensor = _2_hL_0(_2_h_0_tensor)
output_tensor = output_layer(_2_hL_0_tensor)

model = Model(inputs=[input_layer], outputs=[output_tensor])

model.compile(optimizer='Adam',
              loss='categorical_crossentropy',
              metrics=['accuracy'])


#### INIT [ScaledDotProductAttention] ####
#### INIT [ScaledDotProductAttention] ####
#### BUILD [ScaledDotProductAttention] ####
#### CALL [ScaledDotProductAttention] ####
    q=(None, 128, 16),
    k=(None, 128, 16),
    v=(None, 128, 128)
    k_T=(None, 16, 128)
    y=(None, 128, 128)
#### COMPUTE OUTPUT SHAPE ####
(None, 128, 128)
#### BUILD [ScaledDotProductAttention] ####
#### CALL [ScaledDotProductAttention] ####
    q=(None, 128, 16),
    k=(None, 128, 16),
    v=(None, 128, 128)
    k_T=(None, 16, 128)
    y=(None, 128, 128)
#### COMPUTE OUTPUT SHAPE ####
(None, 128, 128)


In [208]:
_2_f.output

<tf.Tensor 'flatten_60/Reshape:0' shape=(None, None) dtype=float32>

In [209]:
_2_f_tensor

<tf.Tensor 'flatten_60/Reshape:0' shape=(None, None) dtype=float32>

# Custom Loss function

In [20]:
class test_model(Model):
    def __init__(self, *args, **kwargs):
        
        inp1 = Input(shape=(10,))
        inp2 = Input(shape=(10,))
        
        x = Concatenate(inputs=[inp1, inp2], axis=1)
        z = Dense(10)(x)
        
        y = Dense(3, activation='softmax')(z)
        
        super().__init__(inputs=[inp1, inp2], outputs=[x], *args, **kwargs)
        
    def train_on_batch(self, x, y, *args,**kwargs):
        print(x)
        inp1, inp2 = x
        
        print()
        
        super().train_on_batch(x, y, *args, **kwargs)

In [21]:
m = test_model()

In [22]:
m.summary()

Model: "test_model_4"
__________________________________________________________________________________________________
Layer (type)                    Output Shape         Param #     Connected to                     
input_8 (InputLayer)            (None, 10)           0                                            
__________________________________________________________________________________________________
input_9 (InputLayer)            (None, 10)           0                                            
__________________________________________________________________________________________________
concatenate_4 (Concatenate)     (None, 20)           0           input_8[0][0]                    
                                                                 input_9[0][0]                    
Total params: 0
Trainable params: 0
Non-trainable params: 0
__________________________________________________________________________________________________


In [269]:
m.compile(optimizer='Adam', loss='categorical_crossentropy', metrics=['accuracy'])

m.train_on_batch(x=[K.zeros((16,10)), K.zeros((16, 10))],
                 y=K.zeros((16, 3)))

[<tf.Variable 'Variable:0' shape=(16, 10) dtype=float32, numpy=
array([[0., 0., 0., 0., 0., 0., 0., 0., 0., 0.],
       [0., 0., 0., 0., 0., 0., 0., 0., 0., 0.],
       [0., 0., 0., 0., 0., 0., 0., 0., 0., 0.],
       [0., 0., 0., 0., 0., 0., 0., 0., 0., 0.],
       [0., 0., 0., 0., 0., 0., 0., 0., 0., 0.],
       [0., 0., 0., 0., 0., 0., 0., 0., 0., 0.],
       [0., 0., 0., 0., 0., 0., 0., 0., 0., 0.],
       [0., 0., 0., 0., 0., 0., 0., 0., 0., 0.],
       [0., 0., 0., 0., 0., 0., 0., 0., 0., 0.],
       [0., 0., 0., 0., 0., 0., 0., 0., 0., 0.],
       [0., 0., 0., 0., 0., 0., 0., 0., 0., 0.],
       [0., 0., 0., 0., 0., 0., 0., 0., 0., 0.],
       [0., 0., 0., 0., 0., 0., 0., 0., 0., 0.],
       [0., 0., 0., 0., 0., 0., 0., 0., 0., 0.],
       [0., 0., 0., 0., 0., 0., 0., 0., 0., 0.],
       [0., 0., 0., 0., 0., 0., 0., 0., 0., 0.]], dtype=float32)>, <tf.Variable 'Variable:0' shape=(16, 10) dtype=float32, numpy=
array([[0., 0., 0., 0., 0., 0., 0., 0., 0., 0.],
       [0., 0., 0., 0.

In [275]:
K.eval(m([K.zeros((16,10)), K.zeros((16, 10))]))

array([[0.33347207, 0.33337963, 0.33314833],
       [0.33347207, 0.33337963, 0.33314833],
       [0.33347207, 0.33337963, 0.33314833],
       [0.33347207, 0.33337963, 0.33314833],
       [0.33347207, 0.33337963, 0.33314833],
       [0.33347207, 0.33337963, 0.33314833],
       [0.33347207, 0.33337963, 0.33314833],
       [0.33347207, 0.33337963, 0.33314833],
       [0.33347207, 0.33337963, 0.33314833],
       [0.33347207, 0.33337963, 0.33314833],
       [0.33347207, 0.33337963, 0.33314833],
       [0.33347207, 0.33337963, 0.33314833],
       [0.33347207, 0.33337963, 0.33314833],
       [0.33347207, 0.33337963, 0.33314833],
       [0.33347207, 0.33337963, 0.33314833],
       [0.33347207, 0.33337963, 0.33314833]], dtype=float32)