In [1]:
import tensorflow as tf
import tensorflow_hub as hub
import numpy as np
import pandas as pd
from sklearn.model_selection import train_test_split

###  Universal Sentence Encoder Demo

The Universal Sentence Encoder model is ~1 GB, so loading at first may take a while.

In [2]:
# Load the Universal Sentence Encoder from TF Hub
use_url = "https://tfhub.dev/google/universal-sentence-encoder/4"
embed = hub.load(use_url)

In [3]:
input_sentences = ["The quick brown fox jumps over the lazy dog.",
    "I am a sentence for which I would like to get its embedding"]
print(embed(input_sentences))

tf.Tensor(
[[-0.03133018 -0.06338634 -0.01607502 ... -0.03242778 -0.04575741
   0.05370456]
 [ 0.05080861 -0.01652428  0.01573781 ...  0.00976659  0.03170123
   0.0178812 ]], shape=(2, 512), dtype=float32)


### Data Preprocessing

In [4]:
df = pd.read_csv("../data/movie_sentences.csv")
print(df.head())
print(df.tail())

                                            sentence  rating
0  smart and alert , thirteen conversations about...       0
1  color , musical bounce and warm seas lapping o...       0
2  it is not a mass-market entertainment but an u...       0
3  a light-hearted french film about the spiritua...       0
4  my wife is an actress has its moments in looki...       0
                                               sentence  rating
9995  in the end , they discover that balance in lif...       1
9996  a counterfeit 1000 tomin bank note is passed i...       1
9997  enter the beautiful and mysterious secret agen...       1
9998  after listening to a missionary from china spe...       1
9999  looking for a short cut to fame , glass concoc...       1


In [5]:
train, test = train_test_split(df, test_size=0.2)

### Define Model Architecture

In [6]:
embed = hub.KerasLayer(use_url)

In [7]:
dense1 = tf.keras.layers.Dense(32, activation='relu')
dropout = tf.keras.layers.Dropout(0.2)
output_layer = tf.keras.layers.Dense(1)

inputs = tf.keras.layers.Input(shape=(1,), dtype=tf.string)
x = embed(tf.squeeze(tf.cast(inputs, tf.string)))
x = dense1(x)
x = dropout(x)
outputs = output_layer(x)

model = tf.keras.Model(inputs=inputs, outputs=outputs)

In [8]:
model.compile(optimizer='adam',
              loss=tf.keras.losses.MeanSquaredError(),
              metrics=['accuracy'])

### Train Model

In [9]:
model.fit(train['sentence'], train['rating'], epochs=5)

Epoch 1/5
Epoch 2/5
Epoch 3/5
Epoch 4/5
Epoch 5/5


<tensorflow.python.keras.callbacks.History at 0x142e9ed10>

### Evaluate Model

In [12]:
test_loss, test_accuracy = model.evaluate(test['sentence'], test['rating'])
print("Test Loss: {}, Test Accuracy: {}".format(test_loss, test_accuracy))

Test Loss: 0.07178784161806107, Test Accuracy: 0.9169999957084656


In [17]:
demo_sentences = np.array([
    "The Earth is round",
    "This sandwich tastes bad",
    "The committee probe started in January 2017 under then-Chair Devin Nunes and concluded in March 2018 with a report finding no evidence that the Trump campaign conspired with the Kremlin",
    "He misled the country repeatedly on an issue that consumed American politics",
    "As for Mr. Schiff, no one should ever believe another word he says"
])
scores = np.array(model(demo_sentences))
for i, score in enumerate(scores):
    classification = "Objective" if score > 0.5 else "Subjective"
    print ("{}\nScore: {}\n ({})".format(demo_sentences[i], score[0], classification))

SyntaxError: invalid syntax (<ipython-input-17-04e1129aa18d>, line 10)