In [86]:
import tensorflow as tf
import tensorflow_hub as hub

from tensorflow.keras import Model
from tensorflow.keras.layers import Input
from tensorflow.keras.layers import Lambda
from tensorflow.keras.layers import SeparableConv1D
from tensorflow.keras.layers import MaxPooling1D
from tensorflow.keras.layers import Dropout
from tensorflow.keras.layers import GlobalAveragePooling1D
from tensorflow.keras.layers import Dense
from tensorflow.keras.layers import Embedding

import numpy as np

In [29]:
def module_fn():
    MAX_SEQ_LEN = 5
    base_module = hub.Module('https://tfhub.dev/google/nnlm-en-dim128/1')
    
    text_input = tf.placeholder(dtype=tf.string, shape=[None, 1])
    reshaped_input = tf.reshape(text_input, [-1])
    split = tf.strings.split(reshaped_input)
    split = tf.sparse.to_dense(split, default_value='')
    seq_len = tf.shape(split)[1]    
    batch_size = tf.shape(split)[0]
    split = tf.cond(
        seq_len < MAX_SEQ_LEN,
        lambda: tf.pad(split, [[0, 0], [0, MAX_SEQ_LEN - seq_len]], constant_values=''),
        lambda: tf.slice(split, [0, 0], [batch_size, MAX_SEQ_LEN])
    )
    embeddings = tf.map_fn(base_module, split, dtype=tf.float32)
    hub.add_signature(inputs=text_input, outputs=embeddings)

my_module_spec = hub.create_module_spec(
    module_fn,
)

sequence_embed = hub.Module(my_module_spec)

INFO:tensorflow:Saver not created because there are no variables in the graph to restore


# Sequence Embed within Model

In [42]:
def CustomSeperableConv1D(filters, kernel_size):
    return SeparableConv1D(
        filters, 
        kernel_size, 
        padding='same', 
        activation='relu',
        bias_initializer='random_uniform',
        depthwise_initializer='random_uniform'
    )

input_layer = Input(shape=(1,), dtype='string')
embeddings = Lambda(sequence_embed, name='sequence_embed')(input_layer)
dropout1 = Dropout(0.5)(embeddings)
sepconv1a = CustomSeperableConv1D(32, 3)(dropout1)
sepconv1b = CustomSeperableConv1D(32, 3)(sepconv1a)
globalavgpool = GlobalAveragePooling1D()(sepconv1b)
dropout2 = Dropout(0.5)(globalavgpool)
logits = Dense(1, activation='sigmoid')(dropout2)

model = Model(inputs=[input_layer], outputs=logits)

optimizer = tf.keras.optimizers.Adam(lr=0.01)
model.compile(optimizer=optimizer, loss='binary_crossentropy', metrics=['acc'])
# model.summary()

x = np.array(['the quick brown fox', 'fruit juice'])
y = np.array([0, 1])
test_data = np.array(['orange juice', 'apple juice', 'big bad lion'])[:, np.newaxis]
init = [tf.global_variables_initializer(), tf.tables_initializer()]
with tf.Session() as sess:
    sess.run(init)
    model.fit(x, y, epochs=50, verbose=0)
    print(model.predict(x))
    print(model.predict(test_data))

estimator = tf.keras.estimator.model_to_estimator(model)

INFO:tensorflow:Saver not created because there are no variables in the graph to restore
INFO:tensorflow:Using default config.
INFO:tensorflow:Using the Keras model provided.


AttributeError: 'Module' object has no attribute '__name__'

# Sequence Embed Outside Model

In [19]:
def CustomSeperableConv1D(filters, kernel_size):
    return SeparableConv1D(
        filters, 
        kernel_size, 
        padding='same', 
        activation='relu',
        bias_initializer='random_uniform',
        depthwise_initializer='random_uniform'
    )

input_layer = Input(shape=(5, 128), dtype=tf.float32, name='text')
dropout1 = Dropout(0.5)(input_layer)
sepconv1a = CustomSeperableConv1D(32, 3)(dropout1)
sepconv1b = CustomSeperableConv1D(32, 3)(sepconv1a)
globalavgpool = GlobalAveragePooling1D()(sepconv1b)
dropout2 = Dropout(0.5)(globalavgpool)
logits = Dense(1, activation='sigmoid')(dropout2)

model = Model(inputs=[input_layer], outputs=logits)

optimizer = tf.keras.optimizers.Adam(lr=0.01)
model.compile(optimizer=optimizer, loss='binary_crossentropy', metrics=['acc'])
# model.summary()

# x = np.array(['the quick brown fox', 'fruit juice'])[:, np.newaxis]
# y = np.array([0, 1])
# test_data = np.array(['orange juice', 'apple juice', 'big bad lion'])[:, np.newaxis]
# init = [tf.global_variables_initializer(), tf.tables_initializer()]
# with tf.Session() as sess:
#     sess.run(init)
#     embeddings = sess.run(sequence_embed(x))
#     model.fit(embeddings, y, epochs=50, verbose=0)
#     print(model.predict(sess.run(sequence_embed(x))))
#     print(model.predict(sess.run(sequence_embed(test_data))))

In [20]:
estimator = tf.keras.estimator.model_to_estimator(model)

INFO:tensorflow:Using default config.
INFO:tensorflow:Using the Keras model provided.
INFO:tensorflow:Using config: {'_save_checkpoints_secs': 600, '_num_ps_replicas': 0, '_keep_checkpoint_max': 5, '_task_type': 'worker', '_global_id_in_cluster': 0, '_is_chief': True, '_cluster_spec': <tensorflow.python.training.server_lib.ClusterSpec object at 0x7f82304ba9d0>, '_model_dir': '/tmp/tmpscSvA0', '_protocol': None, '_save_checkpoints_steps': None, '_keep_checkpoint_every_n_hours': 10000, '_service': None, '_session_config': allow_soft_placement: true
graph_options {
  rewrite_options {
    meta_optimizer_iterations: ONE
  }
}
, '_tf_random_seed': None, '_save_summary_steps': 100, '_device_fn': None, '_experimental_distribute': None, '_num_worker_replicas': 1, '_task_id': 0, '_log_step_count_steps': 100, '_evaluation_master': '', '_eval_distribute': None, '_train_distribute': None, '_master': ''}


In [21]:
model.input_names

['text']

# Test

In [106]:
x = np.array(['the quick brown fox', 'fruit juice'])
tokenizer = tf.keras.preprocessing.text.Tokenizer(num_words=200)
tokenizer.fit_on_texts(x)
words = tokenizer.word_index.keys()
table = tf.contrib.lookup.string_to_index_table_from_tensor(np.array(words))

init = [tf.global_variables_initializer(), tf.tables_initializer()]

with tf.Session() as sess:
    sess.run(init)
    MAX_SEQ_LEN = 5
    text_input = tf.constant(x)
    reshaped_input = tf.reshape(text_input, [-1])
    split = tf.strings.split(reshaped_input)
    split = tf.sparse.to_dense(split, default_value='')
    seq_len = tf.shape(split)[1]    
    batch_size = tf.shape(split)[0]
    split = tf.cond(
        seq_len < MAX_SEQ_LEN,
        lambda: tf.pad(split, [[0, 0], [0, MAX_SEQ_LEN - seq_len]], constant_values=''),
        lambda: tf.slice(split, [0, 0], [batch_size, MAX_SEQ_LEN])
    )
    print(x)
    print(sess.run(table.lookup(split)))

['the quick brown fox' 'fruit juice']
[[ 5  4  0  1 -1]
 [ 3  2 -1 -1 -1]]


In [130]:
x = np.array(['the quick brown fox', 'fruit juice'])
y = np.array([0, 1])
tokenizer = tf.keras.preprocessing.text.Tokenizer(num_words=200)
tokenizer.fit_on_texts(x)
words = tokenizer.word_index.keys()
table = tf.contrib.lookup.string_to_index_table_from_tensor(tf.constant(words))
dataset = tf.data.Dataset.from_tensor_slices(({'text': x}, y))
def preprocess_fn(x, y):
    split = tf.strings.split(tf.expand_dims(x['text'], axis=-1))
    split = tf.sparse.to_dense(split, default_value='')
    seq_len = tf.shape(split)[1]    
    batch_size = tf.shape(split)[0]
    split = tf.cond(
        seq_len < MAX_SEQ_LEN,
        lambda: tf.pad(split, [[0, 0], [0, MAX_SEQ_LEN - seq_len]], constant_values=''),
        lambda: tf.slice(split, [0, 0], [batch_size, MAX_SEQ_LEN])
    )
    split = table.lookup(split)
    return {'text': split}, y
dataset = dataset.map(preprocess_fn)
generator = dataset.make_one_shot_iterator()
with tf.Session() as sess:
    print(sess.run(generator.get_next()))
    print(sess.run(generator.get_next()))

ValueError: Failed to create a one-shot iterator for a dataset. `Dataset.make_one_shot_iterator()` does not support datasets that capture stateful objects, such as a `Variable` or `LookupTable`. In these cases, use `Dataset.make_initializable_iterator()`. (Original error: Cannot capture a stateful node (name:string_to_index_17/hash_table, type:HashTableV2) by value.)

In [108]:
x = np.array(['the quick brown fox', 'fruit juice'])
tokenizer = tf.keras.preprocessing.text.Tokenizer(num_words=200)
tokenizer.fit_on_texts(x)
words = tokenizer.word_index.keys()
table = tf.contrib.lookup.string_to_index_table_from_tensor(tf.constant(words))
x = x[:, np.newaxis]
y = np.array([0, 1])
test_data = np.array(['orange juice', 'apple juice', 'big bad lion'])[:, np.newaxis]

def text_to_seq(texts):
    MAX_SEQ_LEN = 5
    reshaped_input = tf.reshape(texts, [-1])
    split = tf.strings.split(reshaped_input)
    split = tf.sparse.to_dense(split, default_value='')
    seq_len = tf.shape(split)[1]    
    batch_size = tf.shape(split)[0]
    split = tf.cond(
        seq_len < MAX_SEQ_LEN,
        lambda: tf.pad(split, [[0, 0], [0, MAX_SEQ_LEN - seq_len]], constant_values=''),
        lambda: tf.slice(split, [0, 0], [batch_size, MAX_SEQ_LEN])
    )
    
    return table.lookup(split)

tf.data.Dataset.from_tensor_slices()

train_input_fn = tf.estimator.inputs.numpy_input_fn(
    
)

def CustomSeperableConv1D(filters, kernel_size):
    return SeparableConv1D(
        filters, 
        kernel_size, 
        padding='same', 
        activation='relu',
        bias_initializer='random_uniform',
        depthwise_initializer='random_uniform'
    )

input_layer = Input(shape=(5,), dtype=tf.float32, name='tokenized_seq')
embeddings = Embedding(input_dim=200, output_dim=32, input_length=5)(text_sequences)
dropout1 = Dropout(0.5)(embeddings)
sepconv1a = CustomSeperableConv1D(32, 3)(dropout1)
sepconv1b = CustomSeperableConv1D(32, 3)(sepconv1a)
globalavgpool = GlobalAveragePooling1D()(sepconv1b)
dropout2 = Dropout(0.5)(globalavgpool)
logits = Dense(1, activation='sigmoid')(dropout2)

model = Model(inputs=[input_layer], outputs=logits)

optimizer = tf.keras.optimizers.Adam(lr=0.01)
model.compile(optimizer=optimizer, loss='binary_crossentropy', metrics=['acc'])
model.summary()


# init = [tf.global_variables_initializer(), tf.tables_initializer()]
# with tf.Session() as sess:
#     sess.run(init)
#     model.fit(x, y, epochs=50, verbose=0)
#     print(model.predict(x))
#     print(model.predict(test_data))

estimator = tf.keras.estimator.model_to_estimator(model)

_________________________________________________________________
Layer (type)                 Output Shape              Param #   
input_26 (InputLayer)        (None, 1)                 0         
_________________________________________________________________
lambda (Lambda)              (None, None)              0         
_________________________________________________________________
embedding_1 (Embedding)      (None, None, 32)          6400      
_________________________________________________________________
dropout_46 (Dropout)         (None, None, 32)          0         
_________________________________________________________________
separable_conv1d_46 (Separab (None, None, 32)          1152      
_________________________________________________________________
separable_conv1d_47 (Separab (None, None, 32)          1152      
_________________________________________________________________
global_average_pooling1d_23  (None, 32)                0         
__________

ValueError: Tensor("lambda/cond/Merge:0", shape=(?, ?), dtype=string) must be from the same graph as Tensor("string_to_index_16/hash_table:0", shape=(), dtype=resource).