In [1]:
# Imports
import pandas as pd
import numpy as np
import tensorflow as tf

from sklearn.model_selection import train_test_split


In [2]:
df = pd.read_csv('./data/processed_data.csv')
df.shape


(18471, 69)

In [3]:
# Final labels
labels = ['patientState_good', 'patientState_caution', 'patientState_danger']
# Section labels
section_labels = {
    'fever': ['feverState_good', 'feverState_caution', 'feverState_danger'],
    'medication': ['medicationState_good', 'medicationState_caution', 'medicationState_danger'],
    'hydration': ['hydrationState_good', 'hydrationState_caution', 'hydrationState_danger'],
    'respiration': ['respirationState_good', 'respirationState_caution', 'respirationState_danger'],
    'skin': ['skinState_good', 'skinState_caution', 'skinState_danger'],
    'pulse': ['pulseState_good', 'pulseState_caution', 'pulseState_danger'],
    'general': ['generalState_good', 'generalState_caution', 'generalState_danger'],
}
# Section labels in an array flattened
section_labels_arr = [item for val in section_labels.values() for item in val]
# All labels
all_labels = [*labels, *section_labels_arr]
len(all_labels)


24

In [4]:
df[section_labels['general']].value_counts()


generalState_good  generalState_caution  generalState_danger
1.0                0.0                   0.0                    10142
0.0                1.0                   0.0                     7976
                   0.0                   1.0                      353
dtype: int64

In [5]:
# Separating to testing and training

# Prepare the data: You should split your data into training and test sets.
# The training set will be used to train the model and the test set will be used
# to evaluate the model's performance.

_x, x_test, _y, y_test = train_test_split(
    df.drop(columns=all_labels).to_numpy(),
    df[labels].to_numpy(),
    test_size=0.2,
    random_state=42
)
x_train, x_val, y_train, y_val = train_test_split(_x, _y, test_size=0.25, train_size=0.75)
x_train.shape


(11082, 45)

In [6]:
str(list(x_test[1]))


'[15.6173706445342, 1.0, 38.3, 0.0, 0.0, 0.0, 1.0, 1.0, 50.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 13.0, 0.0, 0.0, 0.0, 0.0, 68.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0]'

In [7]:
# Preprocess the data: Data preprocessing is an important step, which includes
# cleaning and transforming the data. You should normalize the data,
# one-hot encode categorical variables, and split the data into features and labels.

# TODO: Does KNNImputer create a correlation between training and test split?
# If yes do ->
# TODO: move KNNImputer here
# TODO: move every preprocessing step that would create a connection between train and test split


In [7]:
from tensorflow import keras
# Define the model: TensorFlow provides a high-level API for building and
# training neural network models. You should choose the appropriate model
# architecture for your problem and specify the hyperparameters,
# such as the number of hidden layers and the number of neurons in each layer.

callbacks = [
    keras.callbacks.ModelCheckpoint(
        filepath='./checkpoints/model_{epoch}',
        save_freq='epoch'),
    keras.callbacks.TensorBoard(log_dir='./logs')
]


def build_model(hp):
    model = keras.Sequential([
          keras.layers.Dense(
            units=hp.Int('units', min_value=32, max_value=512, step=32),
            activation=hp.Choice('activation', values=['tanh', 'sigmoid', 'relu']),
            input_shape=(45,)),
    ])
    layey_two = hp.Boolean('2nd_layer')
    with hp.conditional_scope('2nd_layer', True):
        if (layey_two):
            model.add(
                keras.layers.Dense(
                    units=hp.Int('units2', min_value=32, max_value=512, step=32),
                    activation=hp.Choice('activation', values=['tanh', 'sigmoid', 'relu']))
            )
    model.add(
        keras.layers.Dense(3, activation='softmax')
    )
    model.compile(
        optimizer=keras.optimizers.Adam(hp.Choice('learning_rate', values=[1e-2, 1e-3, 1e-4])),
        # optimizer='adam',
        loss='categorical_crossentropy',
        metrics=['accuracy'])
    model.build()
    return model


In [8]:
# Compile the model: You should compile the model by specifying the optimizer,
# loss function, and evaluation metrics.
import keras_tuner

tuner = keras_tuner.tuners.BayesianOptimization(
    build_model,
    objective='val_loss',
    max_trials=50,
    executions_per_trial=2,
    overwrite=True,
    directory='tuner')


Metal device set to: Apple M1

systemMemory: 8.00 GB
maxCacheSize: 2.67 GB



2023-02-22 08:10:18.716744: I tensorflow/core/common_runtime/pluggable_device/pluggable_device_factory.cc:305] Could not identify NUMA node of platform GPU ID 0, defaulting to 0. Your kernel may not have been built with NUMA support.
2023-02-22 08:10:18.717719: I tensorflow/core/common_runtime/pluggable_device/pluggable_device_factory.cc:271] Created TensorFlow device (/job:localhost/replica:0/task:0/device:GPU:0 with 0 MB memory) -> physical PluggableDevice (device: 0, name: METAL, pci bus id: <undefined>)


In [73]:
stop_early = tf.keras.callbacks.EarlyStopping(monitor='val_loss', patience=5)

tuner.search(x_train, y_train, validation_data=(x_val, y_val), epochs=4, callbacks=[stop_early])

Trial 104 Complete [00h 01m 08s]
val_loss: 0.3560260981321335

Best val_loss So Far: 0.3011022210121155
Total elapsed time: 02h 07m 51s

Search: Running Trial #105

Value             |Best Value So Far |Hyperparameter
512               |512               |units
tanh              |tanh              |activation
True              |True              |2nd_layer
0.001             |0.001             |learning_rate
224               |192               |units2

Epoch 1/4


2023-02-19 20:08:56.631771: I tensorflow/core/grappler/optimizers/custom_graph_optimizer_registry.cc:113] Plugin optimizer for device_type GPU is enabled.




2023-02-19 20:09:04.609793: I tensorflow/core/grappler/optimizers/custom_graph_optimizer_registry.cc:113] Plugin optimizer for device_type GPU is enabled.


Epoch 2/4

KeyboardInterrupt: 

In [74]:

tuner.results_summary()

Results summary
Results in tuner/untitled_project
Showing 10 best trials
<keras_tuner.engine.objective.Objective object at 0x1502ec5b0>
Trial summary
Hyperparameters:
units: 512
activation: tanh
2nd_layer: True
learning_rate: 0.001
units2: 192
Score: 0.3011022210121155
Trial summary
Hyperparameters:
units: 512
activation: tanh
2nd_layer: True
learning_rate: 0.001
units2: 160
Score: 0.3045855313539505
Trial summary
Hyperparameters:
units: 512
activation: tanh
2nd_layer: True
learning_rate: 0.001
units2: 192
Score: 0.31072577834129333
Trial summary
Hyperparameters:
units: 512
activation: sigmoid
2nd_layer: True
learning_rate: 0.001
units2: 128
Score: 0.31402669847011566
Trial summary
Hyperparameters:
units: 512
activation: tanh
2nd_layer: True
learning_rate: 0.001
units2: 160
Score: 0.3164873719215393
Trial summary
Hyperparameters:
units: 512
activation: sigmoid
2nd_layer: True
learning_rate: 0.001
units2: 128
Score: 0.31706707179546356
Trial summary
Hyperparameters:
units: 512
activatio

Best val_loss So Far: 0.3011022210121155

- 5 epochs
- 512 units
- tanh activation
- True 2nd layer
- 0.001 learning_rate
- 192 units2

In [75]:
params = tuner.get_best_hyperparameters(1)[0]
params.values
model = build_model(params)

In [76]:
model.fit(x_train, y_train, callbacks=callbacks, epochs=30, validation_data=(x_val, y_val))


Epoch 1/30


2023-02-19 20:11:10.472644: I tensorflow/core/grappler/optimizers/custom_graph_optimizer_registry.cc:113] Plugin optimizer for device_type GPU is enabled.




2023-02-19 20:11:17.433571: I tensorflow/core/grappler/optimizers/custom_graph_optimizer_registry.cc:113] Plugin optimizer for device_type GPU is enabled.


INFO:tensorflow:Assets written to: ./checkpoints/model_1/assets
Epoch 2/30
Epoch 3/30
Epoch 4/30
Epoch 5/30
Epoch 6/30
Epoch 7/30
Epoch 8/30
Epoch 9/30
Epoch 10/30
Epoch 11/30
Epoch 12/30
Epoch 13/30
Epoch 14/30
Epoch 15/30
Epoch 16/30
Epoch 17/30
Epoch 18/30
Epoch 19/30
Epoch 20/30
Epoch 21/30
Epoch 22/30
Epoch 23/30
Epoch 24/30
Epoch 25/30
Epoch 26/30
Epoch 27/30
Epoch 28/30
Epoch 29/30
Epoch 30/30


<keras.callbacks.History at 0x28853e190>

In [77]:
# Evaluate the model: You should evaluate the performance of the model
# on the test set and compare it to the training set performance to determine
# if the model has overfitted or underfitted the data.
loss, acc = model.evaluate(x_test, y_test)

print("loss: %.2f" % loss)
print("acc: %.2f" % acc)


loss: 0.24
acc: 0.90


In [11]:
r = model.predict(x_test)
# res = []
# tmp = []
# for i in range(24):
#   if i%3 ==0 and i !=0:
#     res.append(tmp)
#     tmp = []
#   tmp.append(r[i])
# res.append(tmp)
# res
res = [np.argmax(i) for i in r]




2023-02-19 09:21:42.608024: I tensorflow/core/grappler/optimizers/custom_graph_optimizer_registry.cc:113] Plugin optimizer for device_type GPU is enabled.




In [12]:
res


[0,
 2,
 1,
 2,
 1,
 1,
 1,
 1,
 0,
 1,
 1,
 1,
 2,
 1,
 1,
 0,
 0,
 1,
 1,
 1,
 1,
 2,
 1,
 0,
 1,
 0,
 0,
 1,
 0,
 0,
 1,
 1,
 1,
 1,
 1,
 0,
 1,
 1,
 0,
 0,
 1,
 1,
 0,
 2,
 2,
 1,
 2,
 1,
 1,
 1,
 0,
 1,
 1,
 2,
 1,
 1,
 1,
 2,
 1,
 1,
 0,
 1,
 0,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 0,
 1,
 1,
 1,
 1,
 1,
 0,
 1,
 1,
 0,
 1,
 2,
 1,
 0,
 0,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 2,
 0,
 1,
 1,
 2,
 1,
 2,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 0,
 1,
 1,
 1,
 1,
 0,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 0,
 2,
 1,
 1,
 2,
 1,
 2,
 1,
 0,
 1,
 1,
 0,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 0,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 0,
 1,
 1,
 1,
 2,
 2,
 1,
 1,
 1,
 1,
 1,
 0,
 0,
 1,
 2,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 0,
 1,
 2,
 2,
 0,
 1,
 1,
 1,
 1,
 0,
 1,
 1,
 0,
 1,
 0,
 1,
 1,
 1,
 2,
 1,
 1,
 1,
 1,
 1,
 2,
 0,
 1,
 1,
 0,
 1,
 1,
 0,
 1,
 0,
 1,
 0,
 1,
 1,
 0,
 1,
 2,
 0,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 0,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 0,
 2,
 2,


In [13]:
model.save('./out/model')


INFO:tensorflow:Assets written to: ./out/model/assets


In [14]:
# Convert to TFLite model
import tensorflow as tf

# Convert the model
converter = tf.lite.TFLiteConverter.from_saved_model(
    './out/model')  # path to the SavedModel directory
tflite_model = converter.convert()

# Save the model.
with open('model.tflite', 'wb') as f:
    f.write(tflite_model)


2023-02-19 09:22:20.905687: W tensorflow/compiler/mlir/lite/python/tf_tfl_flatbuffer_helpers.cc:362] Ignored output_format.
2023-02-19 09:22:20.905799: W tensorflow/compiler/mlir/lite/python/tf_tfl_flatbuffer_helpers.cc:365] Ignored drop_control_dependency.
2023-02-19 09:22:20.908545: I tensorflow/cc/saved_model/reader.cc:43] Reading SavedModel from: ./out/model
2023-02-19 09:22:20.910006: I tensorflow/cc/saved_model/reader.cc:81] Reading meta graph with tags { serve }
2023-02-19 09:22:20.910011: I tensorflow/cc/saved_model/reader.cc:122] Reading SavedModel debug info (if present) from: ./out/model
2023-02-19 09:22:20.911909: I tensorflow/compiler/mlir/mlir_graph_optimization_pass.cc:354] MLIR V1 optimization pass is not enabled
2023-02-19 09:22:20.912338: I tensorflow/cc/saved_model/loader.cc:228] Restoring SavedModel bundle.
2023-02-19 09:22:20.936537: I tensorflow/cc/saved_model/loader.cc:212] Running initialization op on SavedModel bundle at path: ./out/model
2023-02-19 09:22:20.94