In [None]:
# Using Tensorflow to build a CNN

import tensorflow as tf
from tensorflow.keras import layers

In [None]:
# I'm going to re-import my data to turn it into a tensor
tensor_data = pd.read_csv('/Users/dinuka/Downloads/healthcare-dataset-stroke-data 2.csv')

In [None]:
# Pre-Cleaning data before creating tensors
#dropping id column as we do not need unique identifier
tensor_data.drop('id', inplace = True , axis = 1)

# Filling in the null values of BMI with the average values
tensor_data['bmi'].fillna(tensor_data['bmi'].mean(), inplace = True)




In [None]:
# Splitting data into features and labels
stroke_features = tensor_data.copy()
stroke_labels = stroke_features.pop('stroke')

#Splitting data sets in training and test 
stroke_features, stroke_features_test, stroke_labels, stroke_labels_test = train_test_split(stroke_features, stroke_labels)




In [None]:
# Creating a symbolic input object
# Creating input function 
inputs = {}
for name, column in stroke_features.items():
    dtype = column.dtype
    if dtype == object:
        dtype = tf.string
    else: 
        dtype = tf.float32
    
    inputs[name] = tf.keras.Input(shape=(1,), name = name, dtype = dtype)
    
inputs

In [None]:
# Creating a symbolic input object for test data
# Creating input function 
inputs = {}
for name, column in stroke_features_test.items():
    dtype = column.dtype
    if dtype == object:
        dtype = tf.string
    else: 
        dtype = tf.float32
    
    inputs[name] = tf.keras.Input(shape=(1,), name = name, dtype = dtype)
    
inputs

                                                

In [None]:
# Concatenating Numeric Inputs and Normalizing
numeric_inputs = {name:input for name, input in inputs.items()
                    if input.dtype == tf.float32}
x = layers.Concatenate()(list(numeric_inputs.values()))
norm = layers.Normalization()
norm.adapt(np.array(tensor_data[numeric_inputs.keys()]))
train_numeric_inputs = norm(x)

train_numeric_inputs

In [None]:
# Concatenating Numeric Inputs and Normalizing for test data
numeric_inputs = {name:input for name, input in inputs.items()
                    if input.dtype == tf.float32}
x = layers.Concatenate()(list(numeric_inputs.values()))
norm = layers.Normalization()
norm.adapt(np.array(tensor_data[numeric_inputs.keys()]))
test_numeric_inputs = norm(x)

test_numeric_inputs

In [None]:
# Collecting all the pre-processed results train
preprocessed_inputs_train = [train_numeric_inputs]

In [None]:
# Collecting all the pre-processed results test
preprocessed_inputs_test = [test_numeric_inputs]

In [None]:
# Using string-lookup and category encoding to convert string data to indexed float32
for name, input in inputs.items():
    if input.dtype == tf.float32:
        continue
        
    lookup = layers.StringLookup(vocabulary=np.unique(stroke_features[name]))
    one_hot = layers.CategoryEncoding(max_tokens=lookup.vocab_size())
    
    x = lookup(input)
    x = one_hot(x)
    
    preprocessed_inputs_train.append(x)
    
    

In [None]:
# Using string-lookup and category encoding to convert string data to indexed float32 for test data
for name, input in inputs.items():
    if input.dtype == tf.float32:
        continue
        
    lookup = layers.StringLookup(vocabulary=np.unique(stroke_features[name]))
    one_hot = layers.CategoryEncoding(max_tokens=lookup.vocab_size())
    
    x = lookup(input)
    x = one_hot(x)
    
    preprocessed_inputs_test.append(x)
    

In [None]:
# Need to install pydot and graphviz
import pydot
import graphviz

In [None]:
# Concatenating all preprocessed inputs train
preprocessed_inputs_train_cat = layers.Concatenate()(preprocessed_inputs_train)


# Concatenating all preprocessed inputs test
preprocessed_inputs_test_cat = layers.Concatenate()(preprocessed_inputs_test)



stroke_preprocessing = tf.keras.Model(inputs, preprocessed_inputs_train_cat)

#tf.keras.utils.plot_model(model = stroke_preprocessing, rankdir="LR", dpi = 72, show_shapes= True)

In [None]:
# Creating a dictionary of tensors
stroke_features_dict = {name: np.array(value)
                       for name, value in stroke_features.items()}


In [None]:
# Creating a dictionary of tensors for test data
stroke_test_features_dict = {name: np.array(value)
                       for name, value in stroke_features_test.items()}


In [None]:
# Creating a slice of the first training example to pass it through model
features_dict = {name:values[:1] for name, values in stroke_features_dict.items()}
stroke_preprocessing(features_dict)


In [None]:
# Now building model
def stroke_model(preprocessing_head, inputs):
    body = tf.keras.Sequential([
        layers.Dense(64),
        layers.Dense(1)
    ])
    
    preprocessed_inputs = preprocessing_head(inputs)
    result = body(preprocessed_inputs)
    model = tf.keras.Model(inputs, result)
    
    model.compile(loss=tf.losses.BinaryCrossentropy(from_logits=True),
                                                   optimizer = tf.optimizers.Adam()
                                                  )
    
    return model

stroke_model = stroke_model(stroke_preprocessing, inputs)

In [None]:
# Training model with dictionary of features x and labels y
stroke_model.fit(x = stroke_features_dict, y = stroke_labels, epochs = 15)

In [None]:
# Evaluating loss function of test results

test_results = stroke_model.evaluate(x = stroke_test_features_dict, y = stroke_labels_test)
print(test_results)

In [None]:
# Predicting based on test inputs and finding precision and f1 scores
keras_predict = stroke_model.predict(stroke_test_features_dict)

print('Accuracy --> ',accuracy_score(random_forest_predict,test_y))
print('F1 Score --> ',f1_score(random_forest_predict,test_y))
print('Classification Report  --> \n',classification_report(random_forest_predict,test_y))
