In [1]:
import pandas as pd
import tensorflow as tf
from tensorflow.keras.layers import Input, Embedding, Flatten, Dense, Concatenate, Dropout
from tensorflow.keras.models import Model
from tensorflow.keras import regularizers

import kerastuner as kt

2025-02-22 23:33:43.680847: I tensorflow/core/util/port.cc:153] oneDNN custom operations are on. You may see slightly different numerical results due to floating-point round-off errors from different computation orders. To turn them off, set the environment variable `TF_ENABLE_ONEDNN_OPTS=0`.
2025-02-22 23:33:43.682401: I external/local_xla/xla/tsl/cuda/cudart_stub.cc:32] Could not find cuda drivers on your machine, GPU will not be used.
2025-02-22 23:33:43.690326: I external/local_xla/xla/tsl/cuda/cudart_stub.cc:32] Could not find cuda drivers on your machine, GPU will not be used.
2025-02-22 23:33:43.706945: E external/local_xla/xla/stream_executor/cuda/cuda_fft.cc:477] Unable to register cuFFT factory: Attempting to register factory for plugin cuFFT when one has already been registered
E0000 00:00:1740285223.736063 1725704 cuda_dnn.cc:8310] Unable to register cuDNN factory: Attempting to register factory for plugin cuDNN when one has already been registered
E0000 00:00:1740285223.74

In [2]:
NUM_INJURY_CATEGORIES = 28

injury_counts_input = Input(shape=(NUM_INJURY_CATEGORIES,), name='injury_counts')
position_input = Input(shape=(1,), name='position')
numerical_input = Input(shape=(6,), name='numerical') # height, weight, age, forty, bench, vertical

In [3]:
def build_model(hp):
    embed_dim = hp.Int('embed_dim', min_value=4, max_value=12, step=2)
    x_position = Embedding(input_dim=29, output_dim=embed_dim, input_length=1)(position_input) # train encoding on position index
    x_position = Flatten()(x_position)

    # One hot injury encoding
    units_injury = hp.Int('units_injury', min_value=128, max_value=256, step=64)
    x_injury = Dense(units_injury, activation='relu')(injury_counts_input)
    dropout_injury = hp.Float('dropout_injury', min_value=0.1, max_value=0.4, step=0.05)
    x_injury = Dropout(dropout_injury)(x_injury)

    # Numerical encoding
    units_num = hp.Int('units_num', min_value=128, max_value=256, step=64)
    x_num = Dense(units_num, activation='relu')(numerical_input)
    dropout_num = hp.Float('dropout_num', min_value=0.1, max_value=0.4, step=0.05)
    x_num = Dropout(dropout_num)(x_num)

    units_num2 = hp.Int('units_num', min_value=32, max_value=128, step=32)
    x_num = Dense(units_num2, activation='relu')(numerical_input)
    dropout_num2 = hp.Float('dropout_num', min_value=0.1, max_value=0.4, step=0.05)
    x_num = Dropout(dropout_num2)(x_num)

    x = Concatenate()([x_position, x_injury, x_num])

    units_hidden = hp.Int('units_hidden', min_value=128, max_value=512, step=64)
    x = Dense(units_hidden, activation='relu')(x)
    dropout_hidden = hp.Float('dropout_hidden', min_value=0.2, max_value=0.5, step=0.1)
    x = Dropout(dropout_hidden)(x)

    units_hidden2 = hp.Int('units_hidden2', min_value=32, max_value=256, step=32)
    x = Dense(units_hidden2, activation='relu')(x)
    dropout_hidden2 = hp.Float('dropout_hidden2', min_value=0.2, max_value=0.5, step=0.1)
    x = Dropout(dropout_hidden2)(x)
    output = Dense(NUM_INJURY_CATEGORIES, activation='softmax')(x)

    model = Model(inputs=[injury_counts_input, position_input, numerical_input], outputs=output)

    lr = hp.Float('learning_rate', min_value=1e-5, max_value=1e-2, sampling='LOG')
    optimizer = tf.keras.optimizers.Adam(learning_rate=lr)

    model.compile(optimizer=optimizer, loss='categorical_crossentropy', metrics=['accuracy'])
    
    return model

In [4]:
## Training
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
from sklearn.utils import class_weight
from tensorflow.keras.utils import to_categorical
import numpy as np

df = pd.read_csv('data.csv')
injury_counts_cols = [col for col in df.columns if col.startswith('prev_')]
numerical_cols = ['height', 'weight', 'age', 'forty', 'bench', 'vertical']

scaler = StandardScaler()
df[numerical_cols] = scaler.fit_transform(df[numerical_cols]) # Normalize numerical data

injury_counts_data = df[injury_counts_cols].values
position_data = df['position_index']
numerical_data = df[numerical_cols].values

y = to_categorical(df["injury_index"], num_classes=NUM_INJURY_CATEGORIES)

(X_injury_counts_train, X_injury_counts_val_test, X_position_train, X_position_val_test, X_numerical_train, X_numerical_val_test, y_train, y_val_test) = train_test_split(injury_counts_data, position_data, numerical_data, y, test_size=0.3)

(X_injury_counts_val, X_injury_counts_test, X_position_val, X_position_test, X_numerical_val, X_numerical_test, y_val, y_test) = train_test_split(X_injury_counts_val_test, X_position_val_test, X_numerical_val_test, y_val_test, test_size=0.1)

# Label balancing (there are so many knee injuries)
y_train_int = np.argmax(y_train, axis=1)
class_weights = class_weight.compute_class_weight(
    'balanced',
    classes=np.unique(y_train_int),
    y=y_train_int
)
class_weights_dict = dict(enumerate(class_weights))

In [None]:
tuner = kt.RandomSearch(
    build_model,
    objective='val_accuracy',
    max_trials=20,
    executions_per_trial=1,
    directory='tuning',
    project_name='injury_model_tuning'
)

tuner.search_space_summary()

tuner.search([X_injury_counts_train, X_position_train, X_numerical_train], y_train,
             epochs=100,
             validation_data=([X_injury_counts_val, X_position_val, X_numerical_val], y_val),
                 )

best_model = tuner.get_best_models(num_models=1)[0]
best_hyperparameters = tuner.get_best_hyperparameters(num_trials=1)[0]

tuner.results_summary()

Trial 6 Complete [00h 00m 48s]
val_accuracy: 0.38729068636894226

Best val_accuracy So Far: 0.48862171173095703
Total elapsed time: 00h 04m 19s

Search: Running Trial #7

Value             |Best Value So Far |Hyperparameter
4                 |10                |embed_dim
192               |128               |units_injury
0.4               |0.35              |dropout_injury
128               |192               |units_num
0.15              |0.2               |dropout_num
256               |384               |units_hidden
0.3               |0.3               |dropout_hidden
128               |96                |units_hidden2
0.2               |0.2               |dropout_hidden2
1.3417e-05        |0.0036006         |learning_rate

Epoch 1/100


In [None]:
import tensorflow as tf

reduce_lr = tf.keras.callbacks.ReduceLROnPlateau(
    monitor='val_accuracy',    # monitor validation loss
    factor=0.5,            # factor by which the learning rate will be reduced
    patience=10,           # number of epochs with no improvement after which learning rate will be reduced
    min_lr=1e-6,           # lower bound on the learning rate
    verbose=1
)

final_model = build_model(best_hyperparameters)

final_model.fit(
    x=[X_injury_counts_train, X_position_train, X_numerical_train],
    y=y_train,
    validation_data=([X_injury_counts_val, X_position_val, X_numerical_val], y_val),
    epochs=400,
    batch_size=128,
    callbacks=[reduce_lr]
)


Epoch 1/400
[1m48/48[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 6ms/step - accuracy: 0.2075 - loss: 2.8633 - val_accuracy: 0.3182 - val_loss: 2.3980 - learning_rate: 0.0011
Epoch 2/400
[1m48/48[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 4ms/step - accuracy: 0.3305 - loss: 2.3700 - val_accuracy: 0.3469 - val_loss: 2.2803 - learning_rate: 0.0011
Epoch 3/400
[1m48/48[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 4ms/step - accuracy: 0.3394 - loss: 2.2908 - val_accuracy: 0.3590 - val_loss: 2.2185 - learning_rate: 0.0011
Epoch 4/400
[1m48/48[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 4ms/step - accuracy: 0.3676 - loss: 2.2082 - val_accuracy: 0.3701 - val_loss: 2.1850 - learning_rate: 0.0011
Epoch 5/400
[1m48/48[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 4ms/step - accuracy: 0.3781 - loss: 2.1277 - val_accuracy: 0.3731 - val_loss: 2.1325 - learning_rate: 0.0011
Epoch 6/400
[1m48/48[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m

KeyboardInterrupt: 

In [17]:
# Test

test_loss, test_acc = best_model.evaluate(
    x=[X_injury_counts_test, X_position_test, X_numerical_test],
    y=y_test,
    verbose=1
)

print('Test Loss:', test_loss)
print('Test Accuracy:', test_acc)

[1m9/9[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 6ms/step - accuracy: 0.5305 - loss: 1.8371 
Test Loss: 1.8646795749664307
Test Accuracy: 0.5250965356826782
