In [1]:
import tensorflow as tf
import numpy as np
import pandas as pd
from tensorflow import keras 
from tensorflow.keras import layers
from tensorflow.keras.optimizers import Adam
from tensorflow.keras.callbacks import ModelCheckpoint



dataframe = pd.read_excel('triage dataset-coded 3.xlsx')

In [2]:
print(dataframe)

     gender   age  job  respcontact  placevisit  healthworker  fever  cough  \
0         0  30.0    1            1           1             1      1      0   
1         1  58.0    1            1           1             1      0      1   
2         0  41.0    1            0           0             1      0      0   
3         1  24.0    3            1           0             1      0      1   
4         0  48.0    1            1           1             1      0      1   
..      ...   ...  ...          ...         ...           ...    ...    ...   
435       1  60.0    6            1           0             0      1      1   
436       1  85.0    6            1           1             0      1      1   
437       1  55.0    6            1           1             0      1      1   
438       1  24.0    6            1           1             0      1      1   
439       0  29.0    1            0           1             1      0      0   

     sorethroat  vomdiarrhea  ...  comorbid  dm  ht

In [3]:
dataframe.shape

(440, 25)

In [4]:
dataframe = dataframe.dropna()

In [5]:
dataframe.head()

Unnamed: 0,gender,age,job,respcontact,placevisit,healthworker,fever,cough,sorethroat,vomdiarrhea,...,comorbid,dm,htn,cardiac,ckd,chest,hepatic,cancer,others,pcr
0,0,30.0,1,1,1,1,1,0,1,0,...,0,0,0,0,0,0,0,0,0,1
1,1,58.0,1,1,1,1,0,1,1,0,...,0,0,0,0,0,0,0,0,0,0
2,0,41.0,1,0,0,1,0,0,1,0,...,0,0,0,0,0,0,0,0,0,1
3,1,24.0,3,1,0,1,0,1,1,0,...,0,0,0,0,0,0,0,0,0,0
4,0,48.0,1,1,1,1,0,1,0,0,...,1,0,0,0,0,1,0,0,0,0


In [6]:
dataframe.shape

(409, 25)

In [7]:
val_dataframe = dataframe.sample(frac=0.2, random_state=1337)
train_dataframe = dataframe.drop(val_dataframe.index)

print(
    "Using %d samples for training and %d for validation"
    % (len(train_dataframe), len(val_dataframe))
)

Using 327 samples for training and 82 for validation


In [8]:
def dataframe_to_dataset(dataframe):
    dataframe = dataframe.copy()
    labels = dataframe.pop("pcr")
    ds = tf.data.Dataset.from_tensor_slices((dict(dataframe), labels))
    ds = ds.shuffle(buffer_size=len(dataframe))
    return ds


train_ds = dataframe_to_dataset(train_dataframe)
val_ds = dataframe_to_dataset(val_dataframe)

In [9]:
for x, y in train_ds.take(1):
    print("Input:", x)
    print("Target:", y)

Input: {'gender': <tf.Tensor: shape=(), dtype=int64, numpy=0>, 'age': <tf.Tensor: shape=(), dtype=float64, numpy=17.0>, 'job': <tf.Tensor: shape=(), dtype=int64, numpy=6>, 'respcontact': <tf.Tensor: shape=(), dtype=int64, numpy=1>, 'placevisit': <tf.Tensor: shape=(), dtype=int64, numpy=0>, 'healthworker': <tf.Tensor: shape=(), dtype=int64, numpy=0>, 'fever': <tf.Tensor: shape=(), dtype=int64, numpy=1>, 'cough': <tf.Tensor: shape=(), dtype=int64, numpy=1>, 'sorethroat': <tf.Tensor: shape=(), dtype=int64, numpy=1>, 'vomdiarrhea': <tf.Tensor: shape=(), dtype=int64, numpy=1>, 'malaise': <tf.Tensor: shape=(), dtype=int64, numpy=1>, 'smelltaste': <tf.Tensor: shape=(), dtype=int64, numpy=0>, 'smoking': <tf.Tensor: shape=(), dtype=int64, numpy=0>, 'pregnancy': <tf.Tensor: shape=(), dtype=int64, numpy=0>, 'immundiff': <tf.Tensor: shape=(), dtype=int64, numpy=0>, 'comorbid': <tf.Tensor: shape=(), dtype=int64, numpy=0>, 'dm': <tf.Tensor: shape=(), dtype=int64, numpy=0>, 'htn': <tf.Tensor: shape=(

In [10]:
train_ds = train_ds.batch(32)
val_ds = val_ds.batch(32)

In [11]:
from tensorflow.keras.layers.experimental.preprocessing import Normalization
from tensorflow.keras.layers.experimental.preprocessing import CategoryEncoding
from tensorflow.keras.layers.experimental.preprocessing import StringLookup


def encode_numerical_feature(feature, name, dataset):
    # Create a Normalization layer for our feature
    normalizer = Normalization()

    # Prepare a Dataset that only yields our feature
    feature_ds = dataset.map(lambda x, y: x[name])
    feature_ds = feature_ds.map(lambda x: tf.expand_dims(x, -1))

    # Learn the statistics of the data
    normalizer.adapt(feature_ds)

    # Normalize the input feature
    encoded_feature = normalizer(feature)
    return encoded_feature


def encode_string_categorical_feature(feature, name, dataset):
    # Create a StringLookup layer which will turn strings into integer indices
    index = StringLookup()

    # Prepare a Dataset that only yields our feature
    feature_ds = dataset.map(lambda x, y: x[name])
    feature_ds = feature_ds.map(lambda x: tf.expand_dims(x, -1))

    # Learn the set of possible string values and assign them a fixed integer index
    index.adapt(feature_ds)

    # Turn the string input into integer indices
    encoded_feature = index(feature)

    # Create a CategoryEncoding for our integer indices
    encoder = CategoryEncoding(output_mode="binary")

    # Prepare a dataset of indices
    feature_ds = feature_ds.map(index)

    # Learn the space of possible indices
    encoder.adapt(feature_ds)

    # Apply one-hot encoding to our indices
    encoded_feature = encoder(encoded_feature)
    return encoded_feature


def encode_integer_categorical_feature(feature, name, dataset):
    # Create a CategoryEncoding for our integer indices
    encoder = CategoryEncoding(output_mode="binary")

    # Prepare a Dataset that only yields our feature
    feature_ds = dataset.map(lambda x, y: x[name])
    feature_ds = feature_ds.map(lambda x: tf.expand_dims(x, -1))

    # Learn the space of possible indices
    encoder.adapt(feature_ds)

    # Apply one-hot encoding to our indices
    encoded_feature = encoder(feature)
    return encoded_feature

In [12]:
# Categorical features encoded as integers
gender= keras.Input(shape=(1,), name="gender", dtype="int64")
job= keras.Input(shape=(1,), name="job", dtype="int64")
respcontact= keras.Input(shape=(1,), name="respcontact", dtype="int64")
placevisit= keras.Input(shape=(1,), name="placevisit", dtype="int64")
healthworker= keras.Input(shape=(1,), name="healthworker", dtype="int64")
fever= keras.Input(shape=(1,), name="fever", dtype="int64")
cough= keras.Input(shape=(1,), name="cough", dtype="int64")
sorethroat= keras.Input(shape=(1,), name="sorethroat", dtype="int64")
vomdiarrhea= keras.Input(shape=(1,), name="vomdiarrhea", dtype="int64")
malaise= keras.Input(shape=(1,), name="malaise", dtype="int64")
smelltaste= keras.Input(shape=(1,), name="smelltaste", dtype="int64")
smoking= keras.Input(shape=(1,), name="smoking", dtype="int64") 
pregnancy= keras.Input(shape=(1,), name="pregnancy", dtype="int64")
immundiff= keras.Input(shape=(1,), name="immundiff", dtype="int64")
comorbid= keras.Input(shape=(1,), name="comorbid", dtype="int64")
dm= keras.Input(shape=(1,), name="dm", dtype="int64")
htn= keras.Input(shape=(1,), name="htn", dtype="int64")
cardiac= keras.Input(shape=(1,), name="cardiac", dtype="int64")
ckd= keras.Input(shape=(1,), name="ckd", dtype="int64")
chest= keras.Input(shape=(1,), name="chest", dtype="int64")
hepatic= keras.Input(shape=(1,), name="hepatic", dtype="int64")
cancer= keras.Input(shape=(1,), name="cancer", dtype="int64")
others= keras.Input(shape=(1,), name="others", dtype="int64")

# Categorical feature encoded as string
# thal = keras.Input(shape=(1,), name="thal", dtype="string")

# Numerical features
age= keras.Input(shape=(1,), name="age")


all_inputs = [
    gender,
    age,
    job,
    respcontact,
    placevisit,
    healthworker,
    fever,
    cough,
    sorethroat,
    vomdiarrhea,
    malaise,
    smelltaste,
    smoking,
    pregnancy,
    immundiff,
    comorbid,
    dm,
    htn,
    cardiac,
    ckd,
    chest,
    hepatic,
    cancer,
    others,  
]

# Integer categorical features
gender_encoded = encode_integer_categorical_feature(gender, "gender", train_ds)
job_encoded = encode_integer_categorical_feature(job, "job", train_ds)
respcontact_encoded = encode_integer_categorical_feature(respcontact, "respcontact", train_ds)
placevisit_encoded = encode_integer_categorical_feature(placevisit, "placevisit", train_ds)
healthworker_encoded = encode_integer_categorical_feature(healthworker, "healthworker", train_ds)
fever_encoded = encode_integer_categorical_feature(fever, "fever", train_ds)
cough_encoded = encode_integer_categorical_feature(cough, "cough", train_ds)
sorethroat_encoded = encode_integer_categorical_feature(sorethroat, "sorethroat", train_ds)
vomdiarrhea_encoded = encode_integer_categorical_feature(vomdiarrhea, "vomdiarrhea", train_ds)
malaise_encoded = encode_integer_categorical_feature(malaise, "malaise", train_ds)
smelltaste_encoded = encode_integer_categorical_feature(smelltaste, "smelltaste", train_ds)
smoking_encoded = encode_integer_categorical_feature(smoking, "smoking", train_ds)
pregnancy_encoded = encode_integer_categorical_feature(pregnancy, "pregnancy", train_ds)
immundiff_encoded = encode_integer_categorical_feature(immundiff, "immundiff", train_ds)
comorbid_encoded = encode_integer_categorical_feature(comorbid, "comorbid", train_ds)
dm_encoded = encode_integer_categorical_feature(dm, "dm", train_ds)
htn_encoded = encode_integer_categorical_feature(htn, "htn", train_ds)
cardiac_encoded = encode_integer_categorical_feature(cardiac, "cardiac", train_ds)
ckd_encoded = encode_integer_categorical_feature(ckd, "ckd", train_ds)
chest_encoded = encode_integer_categorical_feature(chest, "chest", train_ds)
hepatic_encoded = encode_integer_categorical_feature(hepatic, "hepatic", train_ds)
cancer_encoded = encode_integer_categorical_feature(cancer, "cancer", train_ds)
others_encoded = encode_integer_categorical_feature(others, "others", train_ds)

# String categorical features
# thal_encoded = encode_string_categorical_feature(thal, "thal", train_ds)

# Numerical features
age_encoded = encode_numerical_feature(age, "age", train_ds)

all_features = layers.concatenate(
    [
        gender_encoded,
        age_encoded,
        job_encoded,
        respcontact_encoded,
        placevisit_encoded,
        healthworker_encoded,
        fever_encoded,
        cough_encoded,
        sorethroat_encoded,
        vomdiarrhea_encoded,
        malaise_encoded,
        smelltaste_encoded,
        smoking_encoded,
        pregnancy_encoded,
        immundiff_encoded,
        comorbid_encoded,
        dm_encoded,
        htn_encoded,
        cardiac_encoded,
        ckd_encoded,
        chest_encoded,
        hepatic_encoded,
        cancer_encoded,
        others_encoded,
    ]
)

# initialize the initial learning rate, number of epochs to train for,
# and batch size
INIT_LR = 1e-4
EPOCHS = 20
BS = 32


x = keras.layers.Dense(32)(all_features)
x = layers.BatchNormalization()(x)
x = layers.LeakyReLU()(x)
x = keras.layers.Dropout(0.5)(x)
x = keras.layers.Dense(16)(x)
x = layers.BatchNormalization()(x)
x = layers.LeakyReLU()(x)
x = keras.layers.Dropout(0.5)(x)
x = keras.layers.Dense(8)(x)
x = layers.BatchNormalization()(x)
x = layers.LeakyReLU()(x)
x = keras.layers.Dropout(0.5)(x)
output = keras.layers.Dense(1, activation="sigmoid")(x)

model = keras.Model(all_inputs, output)

model.compile(loss="binary_crossentropy", optimizer="Adam",metrics=["accuracy"])

In [13]:
# `rankdir='LR'` is to make the graph horizontal.
keras.utils.plot_model(model, show_shapes=True, rankdir="LR")

('Failed to import pydot. You must `pip install pydot` and install graphviz (https://graphviz.gitlab.io/download/), ', 'for `pydotprint` to work.')


In [14]:
model.summary()

Model: "model"
__________________________________________________________________________________________________
Layer (type)                    Output Shape         Param #     Connected to                     
gender (InputLayer)             [(None, 1)]          0                                            
__________________________________________________________________________________________________
age (InputLayer)                [(None, 1)]          0                                            
__________________________________________________________________________________________________
job (InputLayer)                [(None, 1)]          0                                            
__________________________________________________________________________________________________
respcontact (InputLayer)        [(None, 1)]          0                                            
______________________________________________________________________________________________

In [15]:
# checkpoint
# weights.best=0.65854
# 
filepath="weights.best_2.hdf5"
checkpoint = ModelCheckpoint(filepath, monitor='val_accuracy', verbose=1, save_best_only=True, mode='max')
callbacks_list = [checkpoint]

model.fit(train_ds, epochs=100, validation_data=val_ds, callbacks=callbacks_list)

Epoch 1/100

Epoch 00001: val_accuracy improved from -inf to 0.64634, saving model to weights.best_2.hdf5
Epoch 2/100

Epoch 00002: val_accuracy did not improve from 0.64634
Epoch 3/100

Epoch 00003: val_accuracy did not improve from 0.64634
Epoch 4/100

Epoch 00004: val_accuracy did not improve from 0.64634
Epoch 5/100

Epoch 00005: val_accuracy did not improve from 0.64634
Epoch 6/100

Epoch 00006: val_accuracy did not improve from 0.64634
Epoch 7/100

Epoch 00007: val_accuracy did not improve from 0.64634
Epoch 8/100

Epoch 00008: val_accuracy did not improve from 0.64634
Epoch 9/100

Epoch 00009: val_accuracy did not improve from 0.64634
Epoch 10/100

Epoch 00010: val_accuracy did not improve from 0.64634
Epoch 11/100

Epoch 00011: val_accuracy did not improve from 0.64634
Epoch 12/100

Epoch 00012: val_accuracy did not improve from 0.64634
Epoch 13/100

Epoch 00013: val_accuracy did not improve from 0.64634
Epoch 14/100

Epoch 00014: val_accuracy did not improve from 0.64634
Epoch


Epoch 00042: val_accuracy did not improve from 0.64634
Epoch 43/100

Epoch 00043: val_accuracy did not improve from 0.64634
Epoch 44/100

Epoch 00044: val_accuracy did not improve from 0.64634
Epoch 45/100

Epoch 00045: val_accuracy did not improve from 0.64634
Epoch 46/100

Epoch 00046: val_accuracy did not improve from 0.64634
Epoch 47/100

Epoch 00047: val_accuracy did not improve from 0.64634
Epoch 48/100

Epoch 00048: val_accuracy did not improve from 0.64634
Epoch 49/100

Epoch 00049: val_accuracy did not improve from 0.64634
Epoch 50/100

Epoch 00050: val_accuracy did not improve from 0.64634
Epoch 51/100

Epoch 00051: val_accuracy did not improve from 0.64634
Epoch 52/100

Epoch 00052: val_accuracy did not improve from 0.64634
Epoch 53/100

Epoch 00053: val_accuracy did not improve from 0.64634
Epoch 54/100

Epoch 00054: val_accuracy did not improve from 0.64634
Epoch 55/100

Epoch 00055: val_accuracy did not improve from 0.64634
Epoch 56/100

Epoch 00056: val_accuracy did not

<tensorflow.python.keras.callbacks.History at 0x7fce18fced68>

In [16]:
# model.save('triage-v4-0.7119')
