In [87]:
import pandas as pd
import numpy as np
import tensorflow as tf
from keras.models import Sequential
from keras.layers import Dense, Dropout
from sklearn.compose import ColumnTransformer
from sklearn.preprocessing import OneHotEncoder, StandardScaler
from sklearn.model_selection import train_test_split
from sklearn.metrics import accuracy_score

In [89]:
# Load and preprocess data
df = pd.read_csv('healthcare-dataset-stroke-data.csv')
df = df.dropna()

In [91]:
# Prepare features and labels
X = df.iloc[:, 1:-1].values
y = df.iloc[:, -1].values

In [93]:
# One-hot encode categorical features
ct = ColumnTransformer(
    transformers=[
        ('encoder', OneHotEncoder(drop='first'), [-1])
    ], 
    remainder='passthrough'
)
X_transformed = ct.fit_transform(X)

In [95]:
# Identify numeric columns for scaling
# Assuming the columns after one-hot encoding are all numeric
numeric_columns_indices = [i for i in range(X_transformed.shape[1]) if np.issubdtype(X_transformed[:, i].dtype, np.number)]

In [97]:
# Apply StandardScaler to numeric columns if any
if numeric_columns_indices:
    scaler = StandardScaler()
    X_numeric = X_transformed[:, numeric_columns_indices]
    X_numeric = scaler.fit_transform(X_numeric)

    # Replace normalized numeric columns in transformed data
    X_transformed[:, numeric_columns_indices] = X_numeric
else:
    print("No numeric columns found for scaling.")

No numeric columns found for scaling.


In [99]:
# Convert to float32
X_transformed = np.asarray(X_transformed).astype('float32')
y = np.asarray(y).astype('float32')

In [101]:
# Split data into training and testing sets
X_train, X_test, y_train, y_test = train_test_split(X_transformed, y, test_size=0.2, random_state=42)

In [103]:
# Build the model
model = Sequential()
model.add(Dense(64, activation='relu', input_shape=(X_train.shape[1],)))
model.add(Dropout(0.5))
model.add(Dense(128, activation='relu'))
model.add(Dropout(0.5))
model.add(Dense(1, activation='sigmoid'))

  super().__init__(activity_regularizer=activity_regularizer, **kwargs)


In [105]:
model.compile(optimizer='adam', loss='binary_crossentropy', metrics=['accuracy'])

In [107]:
# Train the model
model.fit(X_train, y_train, epochs=100, validation_split=0.1)

Epoch 1/100
[1m111/111[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 747us/step - accuracy: 0.8934 - loss: 1.6190 - val_accuracy: 0.9517 - val_loss: 0.6860
Epoch 2/100
[1m111/111[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 338us/step - accuracy: 0.9158 - loss: 0.8097 - val_accuracy: 0.9517 - val_loss: 0.3120
Epoch 3/100
[1m111/111[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 347us/step - accuracy: 0.9314 - loss: 0.4203 - val_accuracy: 0.9517 - val_loss: 0.2385
Epoch 4/100
[1m111/111[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 348us/step - accuracy: 0.9351 - loss: 0.3534 - val_accuracy: 0.9517 - val_loss: 0.2204
Epoch 5/100
[1m111/111[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 347us/step - accuracy: 0.9467 - loss: 0.2675 - val_accuracy: 0.9517 - val_loss: 0.2355
Epoch 6/100
[1m111/111[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 353us/step - accuracy: 0.9592 - loss: 0.2005 - val_accuracy: 0.9517 - val_loss: 0.2273
Epoch 7/10

<keras.src.callbacks.history.History at 0x2a474d070>

In [109]:
# Evaluate the model
y_pred = model.predict(X_test)
y_pred = (y_pred > 0.5).astype('float32')  # Convert probabilities to binary outcomes
print('Accuracy:', accuracy_score(y_test, y_pred))

[1m31/31[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 612us/step
Accuracy: 0.9460285132382892


In [111]:
# Prepare input for prediction
def preprocess_input(input_features):
    # Convert input to a DataFrame
    input_df = pd.DataFrame([input_features], columns=df.columns[1:-1])
    
    # Apply the same preprocessing steps
    input_transformed = ct.transform(input_df)
    
    # Normalize if numeric columns exist in the transformed input
    input_numeric = input_transformed[:, numeric_columns_indices] if numeric_columns_indices else np.array([])
    if input_numeric.size > 0:
        input_numeric = scaler.transform(input_numeric)
        input_transformed[:, numeric_columns_indices] = input_numeric
    
    return input_transformed

In [127]:
# Example input
example_input = [0, 0, 1, 0, 40, 1, 1, 1, 0, 0, 170, 20]