# Preparation

In [1]:
# These are my default settings
import warnings
warnings.filterwarnings("ignore")

import os
from tqdm import tqdm
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
%matplotlib inline
import seaborn as sns
import joblib

plt.rcParams["figure.figsize"] = (12, 6)
sns.set()
pd.set_option("display.max_columns", None)

import zipfile
import shutil

In [2]:
import tensorflow as tf
import tensorflow_hub as hub
import tensorflow_datasets as tfds

physical_devices = tf.config.list_physical_devices('GPU')
if physical_devices:
    try:
        tf.config.experimental.set_memory_growth(physical_devices[0], True)
    except RuntimeError as e:
        print(e)

In [3]:
# Loading USE version 5 from tensorflow hub
# Attention, there are 2 U.S.E models: normal and large model.
# Here, I use normal model.

embed = hub.load("https://tfhub.dev/google/universal-sentence-encoder/4")

In [4]:
embed

<tensorflow.python.saved_model.load.Loader._recreate_base_user_object.<locals>._UserObject at 0x1fb858294f0>

In [5]:
train_data, valid_data, test_data = tfds.load(
    name="imdb_reviews",
    split=["train[:95%]", "train[95%:]", "test"],
    as_supervised=True
)


In [6]:

print("Train set size: ", len(train_data)) 
print("Test set size: ", len(test_data))   
print("Valid set size: ", len(valid_data)) 

Train set size:  23750
Test set size:  25000
Valid set size:  1250


In [7]:
train_data

<PrefetchDataset element_spec=(TensorSpec(shape=(), dtype=tf.string, name=None), TensorSpec(shape=(), dtype=tf.int64, name=None))>

# Model construction

In [8]:
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense

# os.environ["TFHUB_CACHE_DIR"] = "my_tfhub_cache"

use_layer = hub.KerasLayer("https://tfhub.dev/google/universal-sentence-encoder/4",
                           input_shape=[],
                           dtype=tf.string,
                           trainable=False,
                           name="USE"
                           )
bin_model = Sequential([
    use_layer,
    Dense(64, activation="relu"),
    Dense(1, activation="sigmoid")
])

bin_model.summary()





Model: "sequential"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 USE (KerasLayer)            (None, 512)               256797824 
                                                                 
 dense (Dense)               (None, 64)                32832     
                                                                 
 dense_1 (Dense)             (None, 1)                 65        
                                                                 
Total params: 256,830,721
Trainable params: 32,897
Non-trainable params: 256,797,824
_________________________________________________________________


In [9]:
bin_model.compile(
    optimizer="adam",
    loss="binary_crossentropy",
    metrics=["accuracy"]
)

Train the model for 2 epochs in mini-batches of 32 samples. This is 2 iterations over all samples in the x_train and y_train tensors. While training, monitor the model's loss and accuracy on the 1250 samples from the validation set:

In [10]:
with tf.device('/CPU:0'):
    bin_model.fit
        train_data.batch(32),
        epochs=1,
        # batch_size=64,
        validation_data=valid_data.batch(32),
        verbose=1)



In [11]:
with tf.device('/CPU:0'):
    bin_model.evaluate(test_data.batch(32))
    

