In [None]:
import pandas as pd
import numpy as np
import tensorflow as tf
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler, OneHotEncoder
from sklearn.compose import ColumnTransformer
from sklearn.metrics import accuracy_score, confusion_matrix, ConfusionMatrixDisplay


In [None]:
# Load the dataset
df = pd.read_csv('../../sqllite/heart_disease_uci.csv')

# # Select features and target variable
# x = df[['age', 'sex', 'cp', 'trestbps', 'chol', 'fbs', 'restecg', 'thalch', 'exang', 'oldpeak', 'slope', 'ca', 'thal']]
# y = df['num'].apply(lambda x: 1 if x > 0 else 0)  # Transform 'num' to binary labels

# # Split the dataset into training and testing sets
# x_train, x_test, y_train, y_test = train_test_split(
#     x, 
#     y, 
#     test_size = 0.15, 
#     random_state = 48
# )

In [None]:
# Select features and target variable
x = df[['age', 'sex', 'cp', 'trestbps', 'chol', 'fbs', 'restecg', 'thalch', 'exang', 'oldpeak', 'slope', 'ca', 'thal']]
y = df['num'].apply(lambda x: 1 if x > 0 else 0)  # Transform 'num' to binary labels

# Split the dataset into training and testing sets
x_train, x_test, y_train, y_test = train_test_split(x, y, test_size=0.15, random_state=48)

# Preprocess categorical variables using one-hot encoding
categorical_cols = ['sex', 'cp', 'restecg', 'slope', 'ca', 'thal']
preprocessor = ColumnTransformer(
    transformers=[('cat', OneHotEncoder(), categorical_cols)],
    remainder='passthrough'
)
x_train_encoded = preprocessor.fit_transform(x_train)
x_test_encoded = preprocessor.transform(x_test)

# Standardize the features
scaler = StandardScaler()
x_train_scaled = scaler.fit_transform(x_train_encoded)
x_test_scaled = scaler.transform(x_test_encoded)

# Define the neural network architecture
model = tf.keras.Sequential([
    tf.keras.layers.Dense(64, activation='relu', input_shape=(x_train_scaled.shape[1],)),
    tf.keras.layers.Dense(32, activation='relu'),
    tf.keras.layers.Dense(1, activation='sigmoid')
])

# Compile the model
model.compile(optimizer='adam', loss='binary_crossentropy', metrics=['accuracy'])

# Train the model
history = model.fit(x_train_scaled, y_train, epochs=50, batch_size=32, validation_split=0.1, verbose=0)

# Evaluate the model on the test set
test_loss, test_accuracy = model.evaluate(x_test_scaled, y_test)
print('Test Accuracy:', test_accuracy * 100, '%')

# Make predictions
y_pred_prob = model.predict(x_test_scaled)
y_pred = np.round(y_pred_prob).flatten()

# Create a confusion matrix and display
cm = confusion_matrix(y_test, y_pred)
disp = ConfusionMatrixDisplay(confusion_matrix=cm, display_labels=['No Heart Disease', 'Heart Disease'])
disp.plot()