# Dependencies

In [1]:
!pip install tensorflow-text

Looking in indexes: https://pypi.org/simple, https://us-python.pkg.dev/colab-wheels/public/simple/
Collecting tensorflow-text
  Downloading tensorflow_text-2.12.1-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl (6.0 MB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m6.0/6.0 MB[0m [31m64.5 MB/s[0m eta [36m0:00:00[0m
Installing collected packages: tensorflow-text
Successfully installed tensorflow-text-2.12.1


In [2]:
import pandas as pd
import tensorflow as tf
from tensorflow.keras import layers
import pickle
import re
import tensorflow_hub as hub
import tensorflow_text as text

# Data Preparation

In [3]:
# Loading dataset
data = pd.read_csv('workout_dataset.csv')
data.head()

Unnamed: 0.1,Unnamed: 0,Title,Desc,Type,BodyPart,Equipment,Level
0,0,Partner plank band row,The partner plank band row is an abdominal exe...,Strength,Abdominals,Bands,Intermediate
1,1,Banded crunch isometric hold,The banded crunch isometric hold is an exercis...,Strength,Abdominals,Bands,Intermediate
2,2,FYR Banded Plank Jack,The banded plank jack is a variation on the pl...,Strength,Abdominals,Bands,Intermediate
3,3,Banded crunch,The banded crunch is an exercise targeting the...,Strength,Abdominals,Bands,Intermediate
4,4,Crunch,The crunch is a popular core exercise targetin...,Strength,Abdominals,Bands,Intermediate


In [4]:
# Normalizing description feature
data['Desc'] = data['Desc'].apply(lambda x: re.sub('[^A-Za-z0-9]+', ' ', x).lower())
data.head()

Unnamed: 0.1,Unnamed: 0,Title,Desc,Type,BodyPart,Equipment,Level
0,0,Partner plank band row,the partner plank band row is an abdominal exe...,Strength,Abdominals,Bands,Intermediate
1,1,Banded crunch isometric hold,the banded crunch isometric hold is an exercis...,Strength,Abdominals,Bands,Intermediate
2,2,FYR Banded Plank Jack,the banded plank jack is a variation on the pl...,Strength,Abdominals,Bands,Intermediate
3,3,Banded crunch,the banded crunch is an exercise targeting the...,Strength,Abdominals,Bands,Intermediate
4,4,Crunch,the crunch is a popular core exercise targetin...,Strength,Abdominals,Bands,Intermediate


# Encoding data

In [5]:
# Using BERT to encode description text
preprocessor = hub.KerasLayer("https://tfhub.dev/tensorflow/bert_en_uncased_preprocess/3")
encoder = hub.KerasLayer("https://tfhub.dev/tensorflow/bert_en_uncased_L-12_H-768_A-12/4", trainable=True)

def get_bert_embeddings(text, preprocessor, encoder):
  text_input = tf.keras.layers.Input(shape=(), dtype=tf.string)
  encoder_inputs = preprocessor(text_input)
  outputs = encoder(encoder_inputs)
  embedding_model = tf.keras.Model(text_input, outputs['pooled_output'])
  sentences = tf.constant([text])
  return embedding_model(sentences).numpy().tolist()

encodings = pd.DataFrame()
encodings['encodings'] = data['Desc'].apply(lambda x: get_bert_embeddings(x, preprocessor, encoder))
encodings

Unnamed: 0,encodings
0,"[[-0.497314989566803, 0.04767085984349251, 0.6..."
1,"[[-0.6131973266601562, -0.07130786031484604, 0..."
2,"[[-0.6472458243370056, -0.14792925119400024, -..."
3,"[[-0.644658088684082, -0.11583315581083298, -0..."
4,"[[-0.6215099692344666, -0.055696796625852585, ..."
...,...
1324,"[[-0.7662083506584167, -0.10315348953008652, 0..."
1325,"[[-0.2660151422023773, 0.2419849932193756, 0.9..."
1326,"[[-0.735813558101654, -0.1433982402086258, -0...."
1327,"[[-0.735813558101654, -0.1433982402086258, -0...."


In [6]:
# Saving encodings to pickle file
encodings.to_pickle('encodings.pickle')