In [None]:
!pip install tensorflow
!pip install tensorflow-text

In [19]:
# Import the necessary libraries
import tensorflow as tf
import tensorflow_hub as hub
import tensorflow_text as text
from tensorflow.keras import layers
from tensorflow.keras.models import Model
import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split


In [None]:
# Get the training data
df = pd.read_csv("sts-train.csv", sep ="\t", error_bad_lines=False, names = ["Genre", "File", "Years", "Train", "Similarity", "Sentence 1", "Sentence 2"])

df.head()

In [None]:
df.drop(['Genre', "File", "Years", "Train"], axis=1)
df_features = df.copy()
df_label = df.copy()
df_features.drop(['Similarity'], axis=1)
df_label.drop(['Sentence 1', "Sentence 2"], axis=1)


train, test, train_labels, test_labels = train_test_split(df_features, df_label, test_size=0.33)

train.head()


In [None]:
# Use BERT for sentence embedding, a 1x768 vector which can be used as input for a Neural network

bert_preprocess = hub.KerasLayer("https://tfhub.dev/tensorflow/bert_en_uncased_preprocess/3")
bert_encoder = hub.KerasLayer("https://tfhub.dev/tensorflow/bert_en_uncased_L-12_H-768_A-12/4")

# Function which returns the embedded vectors for a the sentence pair
def get_sentence_embeding(sentences):
    preprocessed_text = bert_preprocess(sentences)
    return bert_encoder(preprocessed_text)['pooled_output']

# Test
get_sentence_embeding(["A plane is taking off", "An air plane is taking off"])


In [None]:
# Bert layers
text_input = tf.keras.layers.Input(shape=(), dtype=tf.string, name='text')
preprocessed_text = bert_preprocess(text_input)
outputs = bert_encoder(preprocessed_text)

# Neural network layers
x = tf.keras.layers.Dense(1,activation='sigmoid')(outputs['pooled_output'])
x = tf.keras.layers.Dense(1, activation='sigmoid', name="output")(x)

# Use inputs and outputs to construct a final model
model = tf.keras.Model(inputs=[text_input], outputs = [x])
model.summary()