In [1]:
import tensorflow as tf
import tensorflow_hub as hub
import matplotlib.pyplot as plt
import numpy as np
import os
import pandas as pd
import re
import seaborn as sns
import keras.layers as layers
from keras.models import Model
from keras import backend as K
np.random.seed(10)
# Reduce logging output.
tf.logging.set_verbosity(tf.logging.ERROR)


embed = hub.Module("https://tfhub.dev/google/universal-sentence-encoder/2")
embed_size = embed.get_output_info_dict()['default'].get_shape()[1].value
category_counts = 2


Using TensorFlow backend.


# **Building the Model**

In [2]:
def UniversalEmbedding(x):
    return embed(tf.squeeze(tf.cast(x, tf.string)), signature="default", as_dict=True)["default"]
input_text = layers.Input(shape=(1,), dtype=tf.string)
embedding = layers.Lambda(UniversalEmbedding, output_shape=(embed_size,))(input_text)
dense = layers.Dense(256, activation='relu')(embedding)
pred = layers.Dense(category_counts, activation='softmax')(dense)
model = Model(inputs=[input_text], outputs=pred)
last_hidden_weight_w=model.layers[3].get_weights()[0] #weights
last_hidden_weight_b=model.layers[3].get_weights()[1] #Biases
model.compile(loss='categorical_crossentropy', optimizer='adam', metrics=['accuracy'])
model.summary()

Model: "model_1"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
input_1 (InputLayer)         (None, 1)                 0         
_________________________________________________________________
lambda_1 (Lambda)            (None, 512)               0         
_________________________________________________________________
dense_1 (Dense)              (None, 256)               131328    
_________________________________________________________________
dense_2 (Dense)              (None, 2)                 514       
Total params: 131,842
Trainable params: 131,842
Non-trainable params: 0
_________________________________________________________________


# **Preparing the Input**

In [0]:
from sklearn.preprocessing import MultiLabelBinarizer
train_text=np.array(["This is my fist sentence", "this is my second Sentence", "I 'm so happy !", "I 'm very sleepy"])
train_labels=np.array(["0","0","1","1"])
encoder = MultiLabelBinarizer()
encoder.fit_transform(train_labels)
train_label = encoder.transform(train_labels)

test_text=np.array(["This is my try", "this is my second", "I 'm so lazy !", "I 'm very very"])
test_labels=np.array(["0","0","1","1"])
encoder = MultiLabelBinarizer()
encoder.fit_transform(test_labels)
test_label = encoder.transform(test_labels)

# **Training**

In [4]:
with tf.Session() as session:
  K.set_session(session)
  session.run(tf.global_variables_initializer())
  session.run(tf.tables_initializer())
  history = model.fit(train_text, 
            train_label,
            validation_data=(test_text, test_label),
            epochs=10,
            batch_size=32)
  model.save_weights('./model.h5')
  print(last_hidden_weight_w)

Train on 4 samples, validate on 4 samples
Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10
[[ 0.0278791   0.00779122]
 [ 0.14899296 -0.00390171]
 [ 0.11884362  0.09706031]
 [ 0.13711494 -0.1245061 ]
 [-0.14385459 -0.14261161]
 [ 0.12220424 -0.04639435]
 [ 0.12684527  0.04081987]
 [ 0.1450387   0.0785443 ]
 [ 0.1379363  -0.08597329]
 [-0.06217295 -0.15108863]
 [-0.07855064 -0.14498825]
 [ 0.11723781 -0.11652759]
 [ 0.06488326  0.14507776]
 [ 0.12806022  0.02404842]
 [ 0.08135933  0.07148902]
 [ 0.02435048  0.01652883]
 [-0.13984764  0.08012739]
 [-0.05318818  0.07390042]
 [-0.0228111  -0.02489601]
 [-0.14994352 -0.09707874]
 [ 0.13701645  0.00943856]
 [-0.04623117 -0.07092538]
 [-0.10755856  0.11597702]
 [ 0.02682802 -0.09743185]
 [ 0.02788872  0.02012792]
 [-0.08955817 -0.07326447]
 [-0.02849473  0.14822948]
 [ 0.09319857 -0.15036437]
 [-0.13559315 -0.06210376]
 [-0.04907978 -0.050529  ]
 [ 0.00239293 -0.10742597]
 [-0.01209

In [5]:
!ls -alh | grep model.h5

-rw-r--r-- 1 root root 530K Nov 11 09:03 model.h5


# **Testing**

In [0]:
new_text = ["In what year did the titanic sink ?", "What is the highest peak in California ?", "Who invented the light bulb ?"]
new_text = np.array(new_text, dtype=object)[:, np.newaxis]
with tf.Session() as session:
  K.set_session(session)
  session.run(tf.global_variables_initializer())
  session.run(tf.tables_initializer())
  model.load_weights('./model.h5')  
  predicts = model.predict(new_text, batch_size=32)

In [7]:
predicts

array([[0.5625144 , 0.43748555],
       [0.48502126, 0.5149787 ],
       [0.5769549 , 0.42304507]], dtype=float32)

In [8]:
categories = ["0","1"]
predict_logits = predicts.argmax(axis=1)
predict_labels = [categories[logit] for logit in predict_logits]
predict_labels

['0', '1', '0']