# evaluation

Evaluate both baseline and TabTransformer models with test set

In [44]:
import keras_preprocessing, tensorflow_addons, keras
from keras import layers
import tensorflow as tf

from pathlib import Path
import pandas as pd


print(tf.config.list_physical_devices('GPU'))

[PhysicalDevice(name='/physical_device:GPU:0', device_type='GPU')]


In [45]:
def split_label(data: pd.DataFrame):
    x = data.copy().drop('stroke', axis=1)
    y = data["stroke"]  # labels

    return x, y

keras.metrics.AUC()

<keras.metrics.metrics.AUC at 0x7f6079e77a00>

In [46]:
CSV_HEADER = [
    "gender",
    "age",
    "hypertension",
    "heart_disease",
    "ever_married",
    "work_type",
    "residence_type",
    "avg_glucose_level",
    "bmi",
    "smoking_status",
    "stroke",
]

FEATURES = CSV_HEADER[:-1]
TARGET = CSV_HEADER[-1]

test_data_path = Path().resolve().joinpath("dataset/test_data.csv")
test_data_file = str(test_data_path.absolute())
test_data = pd.read_csv(test_data_file, names=CSV_HEADER)

x_test, y_test = split_label(test_data)

y_test = y_test.replace({"No": 0, 'Yes': 1})

x_test = x_test
y_test = y_test


In [47]:
LEARNING_RATE = 0.001
WEIGHT_DECAY = 0.0001
DROPOUT_RATE = 0.1
BATCH_SIZE = 32
NUM_EPOCHS = 100

MLP_MODEL_PATH=str(Path().resolve().joinpath('model/mlp_model'))
TABTRANSFORMER_MODEL_PATH=str(Path().resolve().joinpath('model/tabtransformer_model'))

TARGET_FEATURE_NAME='stroke'
TARGET_LABELS = [1, 0]

In [48]:
# data proccessing pipeline

target_label_lookup = layers.StringLookup(
    vocabulary=TARGET_LABELS, mask_token=None, num_oov_indices=0
)


def prepare_example(features, target):
    #target_index = target_label_lookup(target)
    target_index = target
    return features, target_index


def get_dataset_from_csv(csv_file_path, batch_size=128, shuffle=False):
    """dataset from, csv"""
    dataset = tf.data.experimental.make_csv_dataset(
        csv_file_path,
        batch_size=batch_size,
        column_names=CSV_HEADER,
        label_name=TARGET_FEATURE_NAME,
        num_epochs=1,
        header=False,
        na_value="?",
        shuffle=shuffle,
    ).map(prepare_example, num_parallel_calls=tf.data.AUTOTUNE, deterministic=False)
    return dataset.cache()


ValueError: Failed to convert a NumPy array to a Tensor (Unsupported object type int).

In [None]:
def evalate_model(model: keras.Model, test_data_file):
    test_data = get_dataset_from_csv(test_data_file)

    model.compile(
        loss=tf.keras.losses.BinaryCrossentropy(),
        metrics=[
            tf.keras.metrics.AUC(
                num_thresholds=200,
                curve="ROC",
                summation_method="interpolation",
                name="auc",
            ),
            tf.keras.metrics.BinaryAccuracy(name="accuracy"),
        ]
    )

    model.evaluate(
        x=test_data,
        batch_size=BATCH_SIZE,
        verbose="auto",
        steps=None,
        callbacks=None,
        max_queue_size=10,
        workers=1,
        use_multiprocessing=False,
        return_dict=False,
    )



def predict_model(model: keras.Model, test_data_file):
    test_data = get_dataset_from_csv(test_data_file)

    return model.predict(
        x=test_data,
        batch_size=BATCH_SIZE,
        verbose="auto",
        steps=None,
        callbacks=None,
        max_queue_size=10,
        workers=1,
    )


In [None]:
baseline_model = keras.models.load_model(MLP_MODEL_PATH)
tt_model = keras.models.load_model(TABTRANSFORMER_MODEL_PATH)

evalate_model(baseline_model, test_data_file)
evalate_model(tt_model, test_data_file)



### MLP:
loss: 0.2073 - auc: 0.8241 - accuracy: 0.9397
### TabTransformer:
loss: 0.2197 - auc: 0.7708 - accuracy: 0.9335