In [1]:
import tensorflow as tf
import tensorflow_hub as hub
import time

In [2]:
# By manually initializing the graph up front, we can avoid the overhead of re-initializing
# it every time we make a prediction
g = tf.Graph()
with g.as_default():
    # grab the most recent version of Google's universal sentence encoder
    embed = hub.Module("https://tfhub.dev/google/universal-sentence-encoder/2")

    # we feed the actual raw sentences into these two input layers for comparison
    sts_input1 = tf.placeholder(tf.string, shape=(None))
    sts_input2 = tf.placeholder(tf.string, shape=(None))

    # use Google's model to embed the sentences and then normalize the embeddings
    sts_encode1 = tf.nn.l2_normalize(embed(sts_input1))
    sts_encode2 = tf.nn.l2_normalize(embed(sts_input2))
        
    # output a number that represents how similar the sentences are. Higher is better.
    sim_scores = tf.reduce_sum(tf.multiply(sts_encode1, sts_encode2), axis=1)
    init_op = tf.group([tf.global_variables_initializer(), tf.tables_initializer()])
g.finalize()

# Create session and initialize.
session = tf.Session(graph=g)
session.run(init_op)

In [3]:
sentence1 = "Brown vs Board of Education was a landmark ruling that desegregated schools in America"
sentence2 = "The United States Supreme Court ruled that schools should be integrated in Brown v Board of Education"

sentence3 = "I think that the reason why Christopher Columbus went to America was to find money."
sentence4 = "I have absolutely no idea what the question is asking."

sentence5 = "The three banches of government in the United states are: executive, legislative, and judicial."
sentence6 = "I really hope that the caffeteria is serving meat loaf today, but I would settle for pasta."

sentence7 = "The mitochondria is the powerhouse of the cell."
sentence8 = "The mitochondria is the powerhouse of the cell."

sentence9 = "At common law, all felonies were punishable by death. Therefore, Bob would be executed if convicted. He needs a lawyer to save his life."
sentence10 = "Bob should retain counsel to represent him in the burglary proceedings, as his life is at stake."

sentence11 = "Another set of identical sentences with the exact same words in them."
sentence12 = "Another set of identical sentences with the exact same words in them."

sentence11 = "He's heavier than I am."
sentence12 = "He weighs more than I do."

sentence13 = "The cup is half empty."
sentence14 = "The cup is half full."

sentence15 = "The answer to this question is absolutely yes."
sentence16 = "The answer to this question is absolutely no."

sentence17 = "I am pretty sure the answer is yes."
sentence18 = "Yes, I think so."

sentence19 = "I am pretty sure the answer is yes."
sentence20 = "No, I do not think so."

sentence21 = "I am pretty sure the answer is yes. This is because the revolution would not have been successful without Paul Revere."
sentence22 = "No, I do not think so. George Washington was clearly the backbone of our nation during those trying times."

sentences1 = [sentence1, sentence3, sentence5, sentence7, sentence9,
              sentence11, sentence13, sentence15, sentence17, sentence19, sentence21]
sentences2 = [sentence2, sentence4, sentence6, sentence8, sentence10,
              sentence12, sentence14, sentence16, sentence18, sentence20, sentence22]

In [4]:
start = time.time()
[similarities] = session.run(
            [sim_scores],
            feed_dict={
                sts_input1: [sent1 for sent1 in sentences1],
                sts_input2: [sent2 for sent2 in sentences2]
            })
end = time.time()
print('TOTAL TIME: ', end - start)

max_similarity = max(similarities)

for i in range(0,11):
    print('--------------------')
    print('Sentences1: ', sentences1[i])
    print('Sentences2: ', sentences2[i])
    print('Similarity: ', similarities[i] / (max_similarity * 1.0))

TOTAL TIME:  1.281527042388916
--------------------
Sentences1:  Brown vs Board of Education was a landmark ruling that desegregated schools in America
Sentences2:  The United States Supreme Court ruled that schools should be integrated in Brown v Board of Education
Similarity:  0.8353058460886845
--------------------
Sentences1:  I think that the reason why Christopher Columbus went to America was to find money.
Sentences2:  I have absolutely no idea what the question is asking.
Similarity:  0.09635219480444017
--------------------
Sentences1:  The three banches of government in the United states are: executive, legislative, and judicial.
Sentences2:  I really hope that the caffeteria is serving meat loaf today, but I would settle for pasta.
Similarity:  0.00665503693528666
--------------------
Sentences1:  The mitochondria is the powerhouse of the cell.
Sentences2:  The mitochondria is the powerhouse of the cell.
Similarity:  1.0
--------------------
Sentences1:  At common law, all f