In [1]:
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
import tensorflow as tf
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
from tensorflow.keras import layers
from tensorflow.keras.callbacks import EarlyStopping
from sklearn.utils.class_weight import compute_class_weight
import numpy as np

In [2]:
file_path = 'Data_Sheet_1_Association between socioeconomic status and arteriosclerotic cardiovascular disease risk and cause-specific and all-cause mortality.CSV' 
data = pd.read_csv(file_path)

In [3]:
features = data[['PIR', 'edu', 'poverty', "eth", "sex", "PHQ9"]]
target = data['ASCVD']

categorical_columns = features.select_dtypes(include=['object']).columns
features = pd.get_dummies(features, columns=categorical_columns, drop_first=True)
features = features.fillna(features.median())
target = target.fillna(target.median())

target = target.astype(int)

X_train, X_test, y_train, y_test = train_test_split(features, target, test_size=0.2, random_state=33)

scaler = StandardScaler()
X_train = scaler.fit_transform(X_train)
X_test = scaler.transform(X_test)

y_train = np.array(y_train).astype(int).reshape(-1)
y_test = np.array(y_test).astype(int).reshape(-1)

In [None]:
from tensorflow.keras.layers import LeakyReLU

model = tf.keras.Sequential([
    layers.InputLayer(input_shape=(X_train.shape[1],)),

    # First hidden layer
    layers.Dense(128, kernel_regularizer=tf.keras.regularizers.l2(0.001)),
    LeakyReLU(alpha=0.1),
    layers.BatchNormalization(),
    layers.Dropout(0.3), 

    # Second hidden layer
    layers.Dense(64, kernel_regularizer=tf.keras.regularizers.l2(0.001)),
    LeakyReLU(alpha=0.1),
    layers.BatchNormalization(),
    layers.Dropout(0.2),

    # Third hidden layer
    layers.Dense(32, kernel_regularizer=tf.keras.regularizers.l2(0.001)),
    LeakyReLU(alpha=0.1),
    layers.BatchNormalization(),
    layers.Dropout(0.2),

    # Fourth hidden layer
    layers.Dense(32, kernel_regularizer=tf.keras.regularizers.l2(0.001)),
    LeakyReLU(alpha=0.1),
    layers.BatchNormalization(),
    layers.Dropout(0.2),

    layers.Dense(1, activation='sigmoid')
])

# Compile the model with RMSprop optimizer
# optimizer = tf.keras.optimizers.Adam(learning_rate=0.0001)

# lr_schedule = tf.keras.optimizers.schedules.ExponentialDecay(
#     initial_learning_rate=0.0001,
#     decay_steps=1000,
#     decay_rate=0.96,
#     staircase=True
# )
optimizer = tf.keras.optimizers.Adam(learning_rate=0.0001)

model.compile(optimizer=optimizer, loss='binary_crossentropy', metrics=['accuracy'])

# Early stopping to prevent overfitting
early_stopping = EarlyStopping(monitor='val_loss', patience=10, restore_best_weights=True)

class_weights = compute_class_weight(
    class_weight='balanced',
    classes=np.unique(y_train),
    y=y_train
)
class_weight_dict = {0: class_weights[0], 1: class_weights[1]}
print(f"Class Weights: {class_weight_dict}")

# Train the model
history = model.fit(
    X_train, y_train,
    epochs=100,
    batch_size=32,
    validation_split=0.2,
    callbacks=[early_stopping],
    class_weight=class_weight_dict
)

# Evaluate the model
loss, accuracy = model.evaluate(X_test, y_test)
print(f'Test Accuracy: {accuracy:.2f}')

# Make predictions
predictions = model.predict(X_test)



Class Weights: {0: 0.5545504892006645, 1: 5.082910321489002}
Epoch 1/100
[1m601/601[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m5s[0m 2ms/step - accuracy: 0.4896 - loss: 1.1235 - val_accuracy: 0.4267 - val_loss: 1.0068
Epoch 2/100
[1m601/601[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 2ms/step - accuracy: 0.5179 - loss: 0.9854 - val_accuracy: 0.5059 - val_loss: 0.9429
Epoch 3/100
[1m601/601[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 2ms/step - accuracy: 0.5301 - loss: 0.9629 - val_accuracy: 0.5124 - val_loss: 0.9245
Epoch 4/100
[1m601/601[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 2ms/step - accuracy: 0.5262 - loss: 0.9369 - val_accuracy: 0.5259 - val_loss: 0.8888
Epoch 5/100
[1m601/601[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 2ms/step - accuracy: 0.5404 - loss: 0.9062 - val_accuracy: 0.5130 - val_loss: 0.8869
Epoch 6/100
[1m601/601[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 2ms/step - accuracy: 0.5415 - loss: 0.9058 - v

In [None]:
import seaborn as sns
import numpy as np
from sklearn.metrics import confusion_matrix
import matplotlib.pyplot as plt

binary_predictions = (predictions > 0.5).astype(int)

cm = confusion_matrix(y_test, binary_predictions)

cm_normalized = cm.astype('float') / cm.sum(axis=1)[:, np.newaxis]

labels = ["Negative ASCVD", "Positive ASCVD"]

plt.figure(figsize=(8, 6))
sns.heatmap(cm_normalized, annot=True, fmt=".2%", cmap="Blues", xticklabels=labels, yticklabels=labels)
plt.xlabel("Predicted Label")
plt.ylabel("True Label")
plt.title("Normalized Confusion Matrix for MLP")
plt.show()


In [None]:
train_acc = history.history['accuracy']
val_acc = history.history['val_accuracy']
train_loss = history.history['loss']
val_loss = history.history['val_loss']

plt.figure(figsize=(8, 6))
plt.plot(train_acc, label='Training Accuracy')
plt.plot(val_acc, label='Validation Accuracy')
plt.xlabel('Epochs')
plt.ylabel('Accuracy')
plt.title('Model Accuracy Over Epochs')
plt.legend()
plt.grid()
plt.show()

plt.figure(figsize=(8, 6))
plt.plot(train_loss, label='Training Loss')
plt.plot(val_loss, label='Validation Loss')
plt.xlabel('Epochs')
plt.ylabel('Loss')
plt.title('Model Loss Over Epochs')
plt.legend()
plt.grid()
plt.show()