In [2]:
!pip install tensorflow
!pip install transformers
!pip install scikit-learn



In [3]:
!pip install tf-keras
!pip install ipywidgets --upgrade




In [4]:
import tensorflow as tf
from tensorflow.keras.layers import Input, Embedding, Dense, Flatten, Concatenate
from tensorflow.keras.models import Model
from transformers import BertTokenizer, TFBertModel
import numpy as np
import pandas as pd
from sklearn.preprocessing import StandardScaler, LabelEncoder
from sklearn.model_selection import train_test_split


In [5]:
#Initialize BioBert
tokenizer = BertTokenizer.from_pretrained('dmis-lab/biobert-v1.1')
biobert_model = TFBertModel.from_pretrained('dmis-lab/biobert-v1.1', from_pt=True)

pytorch_model.bin:  92%|#########1| 398M/433M [00:00<?, ?B/s]

All PyTorch model weights were used when initializing TFBertModel.

All the weights of TFBertModel were initialized from the PyTorch model.
If your task is similar to the task the model of the checkpoint was trained on, you can already use TFBertModel for predictions without further training.


In [32]:
#Load the Exercise Recommendations csv (Training Data) 
ex_rec = './ex_rec.csv'
df_recs = pd.read_csv(ex_rec)

#Load the Exercise Dataset 
ex_ds = './ex_ds.xlsx'
df_ex = pd.read_excel(ex_ds, sheet_name='output')

df_recs.head()


Unnamed: 0,Name,Condition from document,Medical Condition,Affected Area,Age,Gender,BMI,Exercise Recommendations
0,Alex Smith,,Orthopedic surgery,knee,22.5,Male,22.5,"Supine unilateral knee flexions self assisted,..."
1,Alice Harris,,Osteoarthritis,knee,25.0,Female,25.0,"Prone knee hangs, Single knee to chest stretch..."
2,Alice Walker,,Amputee rehabilitation,,27.0,Female,27.0,"Prone knee hangs, Seated bilateral toe raises,..."
3,Amy Collins,,Pelvic floor dysfunction,,25.0,Female,25.0,"Prone knee hangs, Seated bilateral toe raises,..."
4,Angela Rodriguez,,Orthopedic surgery,carpal tunnel release,23.0,Female,23.0,"Prone knee hangs, Seated bilateral toe raises,..."


In [15]:
def get_biobert_embedding(text):
    inputs = tokenizer(text, return_tensors="tf", truncation=True, padding=True, max_length=128)
    outputs = biobert_model(inputs)
    return tf.reduce_mean(outputs.last_hidden_state, axis=1).numpy()

In [20]:
df_ex['Exercise Embedding'] = df_ex['Exercise name'].apply(lambda x: get_biobert_embedding(x))
df_ex.head()

Unnamed: 0,ExerciseID,Exercise name,Type,Joints,Equipment,Tags,Unnamed: 6,Exercise Embedding
0,10001.0,Prone knee hangs,B,knee,,"TKA, prone knee extension",,"[[-0.11381149, -0.099809445, -0.09421855, -0.0..."
1,10002.0,Seated bilateral toe raises,A,ankle,,dorsiflexion,,"[[-0.0720878, 0.06945901, -0.099388786, 0.0298..."
2,10003.0,Seated bilateral heel raises,A,ankle,,"plantarflexion, seated bilateral calf raises",,"[[-0.12523209, -0.021516671, -0.09993427, 0.14..."
3,10004.0,Seated unilateral plantarflexions with band,A,ankle,band,seated plantarflexions with band,,"[[-0.023262559, 0.00014787912, -0.043929968, -..."
4,10005.0,Seated unilateral plantarflexions with band on...,A,ankle,band,seated plantarflexions with band,,"[[-0.09319111, 0.011779558, 0.02671768, 0.0078..."


In [28]:
#Saving all Exercise Embeddings in a dictionary
exercise_embeddings = {row['ExerciseID']: row['Exercise Embedding'] for index, row in df_ex.iterrows()}

In [38]:
# Preprocessing the patient profile data
le_gender = LabelEncoder()
df_recs['Gender'] = le_gender.fit_transform(df_recs['Gender'])

# Normalizing Age and BMI
scaler = StandardScaler()
df_recs[['Age', 'BMI']] = scaler.fit_transform(df_recs[['Age', 'BMI']])

df_recs['Affected Area'] = df_recs['Affected Area'].fillna('Unknown')

In [39]:
df_recs.head()

Unnamed: 0,Name,Condition from document,Medical Condition,Affected Area,Age,Gender,BMI,Exercise Recommendations,Medical Condition Embedding
0,Alex Smith,,Orthopedic surgery,knee,-0.831781,1,-0.831781,"Supine unilateral knee flexions self assisted,...","[[-0.07959338, 0.1902951, -0.07429469, -0.0399..."
1,Alice Harris,,Osteoarthritis,knee,-0.130461,0,-0.130461,"Prone knee hangs, Single knee to chest stretch...","[[-0.25620073, -0.29091722, -0.21099968, 0.258..."
2,Alice Walker,,Amputee rehabilitation,Unknown,0.430596,0,0.430596,"Prone knee hangs, Seated bilateral toe raises,...","[[-0.107211836, 0.062950395, -0.38891056, -0.0..."
3,Amy Collins,,Pelvic floor dysfunction,Unknown,-0.130461,0,-0.130461,"Prone knee hangs, Seated bilateral toe raises,...","[[-0.2802109, -0.076734655, 0.28248388, 0.1526..."
4,Angela Rodriguez,,Orthopedic surgery,carpal tunnel release,-0.691517,0,-0.691517,"Prone knee hangs, Seated bilateral toe raises,...","[[-0.07959338, 0.1902951, -0.07429469, -0.0399..."


In [40]:
# Creating  emeddings for Medical Condition and Affected Area
df_recs['Medical Condition Embedding'] = df_recs['Medical Condition'].apply(lambda x: get_biobert_embedding(x))
df_recs['Affected Area Embedding'] = df_recs['Affected Area'].apply(lambda x: get_biobert_embedding(x))

In [41]:
df_recs.head()

Unnamed: 0,Name,Condition from document,Medical Condition,Affected Area,Age,Gender,BMI,Exercise Recommendations,Medical Condition Embedding,Affected Area Embedding
0,Alex Smith,,Orthopedic surgery,knee,-0.831781,1,-0.831781,"Supine unilateral knee flexions self assisted,...","[[-0.07959338, 0.1902951, -0.07429469, -0.0399...","[[0.31517097, -0.3031255, -0.16148873, -0.3664..."
1,Alice Harris,,Osteoarthritis,knee,-0.130461,0,-0.130461,"Prone knee hangs, Single knee to chest stretch...","[[-0.25620073, -0.29091722, -0.21099968, 0.258...","[[0.31517097, -0.3031255, -0.16148873, -0.3664..."
2,Alice Walker,,Amputee rehabilitation,Unknown,0.430596,0,0.430596,"Prone knee hangs, Seated bilateral toe raises,...","[[-0.107211836, 0.062950395, -0.38891056, -0.0...","[[0.29805782, -0.21088012, -0.11102455, -0.593..."
3,Amy Collins,,Pelvic floor dysfunction,Unknown,-0.130461,0,-0.130461,"Prone knee hangs, Seated bilateral toe raises,...","[[-0.2802109, -0.076734655, 0.28248388, 0.1526...","[[0.29805782, -0.21088012, -0.11102455, -0.593..."
4,Angela Rodriguez,,Orthopedic surgery,carpal tunnel release,-0.691517,0,-0.691517,"Prone knee hangs, Seated bilateral toe raises,...","[[-0.07959338, 0.1902951, -0.07429469, -0.0399...","[[0.036812782, -0.21748056, -0.021883816, -0.0..."


In [44]:
# Preparing input arrays 
X_conditions = np.stack(df_recs['Medical Condition Embedding'].values)
X_areas = np.stack(df_recs['Affected Area Embedding'].values)
X_age = df_recs['Age'].values
X_gender = df_recs['Gender'].values
X_bmi = df_recs['BMI'].values


In [68]:
class CustomTokenizer:
    def __init__(self):
        self.word_index = {}
        self.index_word = {}
        self.next_index = 1

    def fit_on_texts(self, texts):
        for text in texts:
            if text not in self.word_index:
                self.word_index[text] = self.next_index
                self.index_word[self.next_index] = text
                self.next_index += 1

    def texts_to_sequences(self, texts):
        sequences = []
        for text in texts:
            sequence = []
            for word in text.split(', '):
                word = word.strip()
                if word in self.word_index:
                    sequence.append(self.word_index[word])
            sequences.append(sequence)
        return sequences

In [71]:
# Tokenize Exercise Recommendations
exercise_tokenizer = CustomTokenizer()
exercise_tokenizer.fit_on_texts(df_ex['Exercise name'])

print("Tokenizer Word Index:")
print(exercise_tokenizer.word_index)

Tokenizer Word Index:
{'Prone knee hangs': 1, 'Seated bilateral toe raises': 2, 'Seated bilateral heel raises': 3, 'Seated unilateral plantarflexions with band': 4, 'Seated unilateral plantarflexions with band on floor': 5, 'Seated unilateral dorsiflexions with band on floor': 6, 'Single knee to chest stretch': 7, 'Double knee to chest stretch': 8, 'Gluteal set': 9, 'Hamstring set': 10, 'Standing terminal knee extensions with band': 11, 'Prone hamstring curls': 12, 'Seated hamstring stretch': 13, 'Step ups': 14, 'Eccentric lateral step downs': 15, 'Eccentric forward step downs': 16, 'Supine posterior pelvic tilts': 17, 'Supine anterior pelvic tilts': 18, 'Hip bridges': 19, 'Supine lower trunk rotations': 20, 'Seated upper trunk rotations': 21, 'Brace supine marching': 22, 'Side lying quadratus stretch': 23, 'Half kneeling quadratus stretch': 24, 'Hip bridges abduction with band': 25, 'Cat cow': 26, 'Prayer stretch': 27, 'Bird dog': 28, 'Prone on elbows': 29, 'Abdominal bracing': 30, 'K

In [86]:
X_conditions = np.squeeze(X_conditions) 
X_areas = np.squeeze(X_areas)  

X_age = X_age.reshape(-1, 1)  
X_gender = X_gender.reshape(-1, 1)  
X_bmi = X_bmi.reshape(-1, 1)  

print(f"Shape of X_conditions: {np.array(X_conditions).shape}")
print(f"Shape of X_areas: {np.array(X_areas).shape}")
print(f"Shape of X_age: {np.array(X_age).shape}")
print(f"Shape of X_gender: {np.array(X_gender).shape}")
print(f"Shape of X_bmi: {np.array(X_bmi).shape}")
print(f"Shape of y: {np.array(y).shape}")

print(f"Length of X_conditions: {len(X_conditions)}")
print(f"Length of X_areas: {len(X_areas)}")
print(f"Length of X_age: {len(X_age)}")
print(f"Length of X_gender: {len(X_gender)}")
print(f"Length of X_bmi: {len(X_bmi)}")
print(f"Length of y: {len(y)}")


Shape of X_conditions: (186, 768)
Shape of X_areas: (186, 768)
Shape of X_age: (186, 1)
Shape of X_gender: (186, 1)
Shape of X_bmi: (186, 1)
Shape of y: (186, 496)
Length of X_conditions: 186
Length of X_areas: 186
Length of X_age: 186
Length of X_gender: 186
Length of X_bmi: 186
Length of y: 186


In [84]:
y = exercise_tokenizer.texts_to_sequences(df_recs['Exercise Recommendations'])
y = tf.keras.preprocessing.sequence.pad_sequences(y, padding='post')
y = np.array(y)

In [91]:
print(X_conditions)
print(X_conditions.shape)

print(X_areas)
print(X_areas.shape)

print(X_age)
print(X_age.shape)

print(X_gender)
print(X_gender.shape)

print(X_bmi)
print(X_bmi.shape)

[[-0.07959338  0.1902951  -0.07429469 ... -0.03079618  0.3099924
  -0.3045482 ]
 [-0.25620073 -0.29091722 -0.21099968 ...  0.10398183  0.3315693
  -0.486813  ]
 [-0.10721184  0.0629504  -0.38891056 ... -0.00811734  0.09061897
  -0.1265862 ]
 ...
 [-0.07959338  0.1902951  -0.07429469 ... -0.03079618  0.3099924
  -0.3045482 ]
 [-0.25620073 -0.29091722 -0.21099968 ...  0.10398183  0.3315693
  -0.486813  ]
 [ 0.04599728 -0.4079072   0.03514122 ...  0.0097474   0.25755954
  -0.10322534]]
(186, 768)
[[ 0.31517097 -0.3031255  -0.16148873 ... -0.66897696  0.40151897
  -0.06121884]
 [ 0.31517097 -0.3031255  -0.16148873 ... -0.66897696  0.40151897
  -0.06121884]
 [ 0.29805782 -0.21088012 -0.11102455 ... -0.53802985  0.4259918
  -0.18913214]
 ...
 [ 0.30410388 -0.29608408 -0.06239473 ... -0.5570863   0.6065502
  -0.10924176]
 [ 0.31512377 -0.28462678 -0.27066514 ... -0.5514687   0.29384425
   0.05588222]
 [-0.15055184 -0.33060488 -0.22443725 ...  0.24520631  0.328867
  -0.23396625]]
(186, 768)
[[

In [126]:
input_condition = tf.keras.Input(shape=(768,), name='condition_embedding')
input_area = tf.keras.Input(shape=(768,), name='area_embedding')
input_age = tf.keras.Input(shape=(1,), name='age')
input_gender = tf.keras.Input(shape=(1,), name='gender')
input_bmi = tf.keras.Input(shape=(1,), name='bmi')

concat = tf.keras.layers.Concatenate()([input_condition, input_area, input_gender, input_bmi])

dense_1 = tf.keras.layers.Dense(128, activation='relu')(concat)
dense_2 = tf.keras.layers.Dense(64, activation='relu')(dense_1)

num_classes = y.shape[1]

output = tf.keras.layers.Dense(num_classes, activation='sigmoid')(dense_2)

model = tf.keras.Model(inputs=[input_condition, input_area, input_age, input_gender, input_bmi], outputs=output)
model.compile(optimizer='adam', loss='binary_crossentropy', metrics=[tf.keras.metrics.BinaryAccuracy()])


In [127]:
sample_y = [1, 7, 8, 10, 11, 12, ... , 0, 0, 0]  # Padded with zeros

# Use MultiLabelBinarizer
mlb = MultiLabelBinarizer(classes=range(1, 497))
y_multi_label = mlb.fit_transform([sample_y])

print(y_multi_label)

[[1 0 0 0 0 0 1 1 0 1 1 1 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0
  0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0
  0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0
  0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0
  0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0
  0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0
  0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0
  0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0
  0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0
  0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0
  0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0
  0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0
  0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0
  0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 

In [128]:
from sklearn.preprocessing import MultiLabelBinarizer

mlb = MultiLabelBinarizer(classes=range(1, 497)) 
y_multi_label = mlb.fit_transform(y)

X_combined = np.hstack([
    X_conditions, 
    X_areas, 
    X_age.reshape(-1, 1), 
    X_gender.reshape(-1, 1), 
    X_bmi.reshape(-1, 1)
])

# Perform train-test split
X_train_combined, X_test_combined, y_train, y_test = train_test_split(
    X_combined, 
    y_multi_label, 
    test_size=0.2, 
    random_state=42
)

# After splitting, separate the combined input back into individual components
X_train_conditions = X_train_combined[:, :768]
X_train_areas = X_train_combined[:, 768:1536]
X_train_age = X_train_combined[:, 1536:1537]
X_train_gender = X_train_combined[:, 1537:1538]
X_train_bmi = X_train_combined[:, 1538:]

X_test_conditions = X_test_combined[:, :768]
X_test_areas = X_test_combined[:, 768:1536]
X_test_age = X_test_combined[:, 1536:1537]
X_test_gender = X_test_combined[:, 1537:1538]
X_test_bmi = X_test_combined[:, 1538:]




In [129]:
model.fit(
    [X_train_conditions, X_train_areas, X_train_age, X_train_gender, X_train_bmi],
    y_train,
    validation_data=(
        [X_test_conditions, X_test_areas, X_test_age, X_test_gender, X_test_bmi],
        y_test
    ),
    epochs=10,
    batch_size=32
)

Epoch 1/10
[1m5/5[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 18ms/step - binary_accuracy: 0.5189 - loss: 0.6907 - val_binary_accuracy: 0.5686 - val_loss: 0.6722
Epoch 2/10
[1m5/5[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 5ms/step - binary_accuracy: 0.5770 - loss: 0.6642 - val_binary_accuracy: 0.6245 - val_loss: 0.6209
Epoch 3/10
[1m5/5[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 5ms/step - binary_accuracy: 0.6286 - loss: 0.6108 - val_binary_accuracy: 0.6811 - val_loss: 0.5371
Epoch 4/10
[1m5/5[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 6ms/step - binary_accuracy: 0.6942 - loss: 0.5218 - val_binary_accuracy: 0.7638 - val_loss: 0.4332
Epoch 5/10
[1m5/5[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 6ms/step - binary_accuracy: 0.7534 - loss: 0.4413 - val_binary_accuracy: 0.8074 - val_loss: 0.3464
Epoch 6/10
[1m5/5[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 6ms/step - binary_accuracy: 0.7863 - loss: 0.3919 - val_binary_acc

<keras.src.callbacks.history.History at 0x317657c90>