# Basic Neural Network Approach

In [1]:
# Importing libraries
import numpy as np
import pandas as pd
from tensorflow import keras
from tensorflow.keras.optimizers import SGD
from sklearn.preprocessing import StandardScaler
from sklearn.model_selection import train_test_split

In [2]:
# Importing the cleaned data
stroke_clean = pd.read_csv('stroke_clean.csv', index_col='id')

In [3]:
# Do a train-test split
X = np.array(stroke_clean.loc[:, stroke_clean.columns != 'stroke'])
y = np.array(stroke_clean['stroke'])
test_size = 0.3

X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=test_size, random_state=42)

In [4]:
# Scaling the data
scaler = StandardScaler()
X_train = scaler.fit_transform(X_train)
X_test = scaler.transform(X_test)

In [5]:
# Making the layers
model = keras.models.Sequential([
    keras.layers.Input(shape=[17]),
    keras.layers.Dense(100, activation="relu"),
    keras.layers.Dropout(rate=0.5),
    keras.layers.Dense(100, activation="relu"),
    keras.layers.Dropout(rate=0.5),
    keras.layers.Dense(1, activation="sigmoid")
])

In [6]:
# Checking out the model
model.summary()

Model: "sequential"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
dense (Dense)                (None, 100)               1800      
_________________________________________________________________
dropout (Dropout)            (None, 100)               0         
_________________________________________________________________
dense_1 (Dense)              (None, 100)               10100     
_________________________________________________________________
dropout_1 (Dropout)          (None, 100)               0         
_________________________________________________________________
dense_2 (Dense)              (None, 1)                 101       
Total params: 12,001
Trainable params: 12,001
Non-trainable params: 0
_________________________________________________________________


In [7]:
# Compiling the model
model.compile(loss="binary_crossentropy", optimizer='SGD', metrics=[keras.metrics.Recall(), keras.metrics.Precision(), 'accuracy'])

In [8]:
# Fitting the model to people 
class_weight = {0: 1., 1: 50.}

model.fit(X_train, y_train, class_weight=class_weight, epochs=5)

Epoch 1/5
Epoch 2/5
Epoch 3/5
Epoch 4/5
Epoch 5/5


<tensorflow.python.keras.callbacks.History at 0x1fafce71b50>

In [9]:
# Testing the model
model.evaluate(X_test, y_test)

# Høy recall -- De fleste som vil få et slag vil få bekjed
# Middels accuracy -- Spår bare riktig 50% av gangene
# Lav precision -- Vil lage en del falske positive. Hvis testen sier "du får slag", så er du
# fortsatt bare 10% sannsynlig for å få slag. Er slik feks. mammografi fungerer (med sikkert bedre
# tall en jeg får her)



[0.710206151008606, 0.9305555820465088, 0.0899328887462616, 0.5363204479217529]

In [11]:
def build_model(hp):
    inputs = keras.layers.Input(shape=[17])
    x = layers.Dense(
        units=hp.Int('units', min_value=32, max_value=512, step=32),
        activation='relu')(inputs)
    outputs = keras.layers.Dense(1, activation="sigmoid")(x)
    model = keras.Model(inputs, outputs)
    model.compile(
        optimizer=keras.optimizers.Adam(
            hp.Choice('learning_rate',
                      values=[1e-2, 1e-3, 1e-4])),
        loss="binary_crossentropy",
        metrics=[keras.metrics.Recall(), keras.metrics.Precision(), 'accuracy'])
    return model

In [15]:
import kerastuner

tuner = kerastuner.tuners.Hyperband(
  build_model,
  objective=keras.metrics.Recall(),
  max_epochs=100,
  max_trials=200,
  executions_per_trial=2,
  directory='my_dir')

ModuleNotFoundError: No module named 'kerastuner'