In [2]:
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
import tensorflow as tf
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense, Dropout

# function to create the model
def build_model(input_shape, num_layers, num_units, dropout_rate, learning_rate):
    model = Sequential()
    model.add(Dense(num_units, activation='relu', input_shape=(input_shape,)))
    model.add(Dropout(dropout_rate))
    
    for _ in range(num_layers - 1):
        model.add(Dense(num_units, activation='relu'))
        model.add(Dropout(dropout_rate))
    
    model.add(Dense(1, activation='sigmoid'))
    optimizer = tf.keras.optimizers.Adam(learning_rate=learning_rate)
    model.compile(optimizer=optimizer, loss='binary_crossentropy', metrics=['accuracy'])
    
    return model

# load and preprocess the dataset
data = pd.read_csv('cardio_train.csv', delimiter=';')
data['age_years'] = data['age'] / 365
data.drop(['id', 'age'], axis=1, inplace=True)

# split the data into features and target variable
X = data.drop('cardio', axis=1)
y = data['cardio']

# split the dataset into training and testing sets
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# feature scaling
scaler = StandardScaler()
X_train_scaled = scaler.fit_transform(X_train)
X_test_scaled = scaler.transform(X_test)

# hyperparameter tuning
num_layers_options = [2, 3, 4]
num_units_options = [64, 128, 256]
dropout_rate_options = [0.3, 0.5, 0.7]
learning_rate_options = [0.001, 0.01, 0.1]

best_accuracy = 0
best_params = {}

for num_layers in num_layers_options:
    for num_units in num_units_options:
        for dropout_rate in dropout_rate_options:
            for learning_rate in learning_rate_options:
                model = build_model(X_train_scaled.shape[1], num_layers, num_units, dropout_rate, learning_rate)
                model.fit(X_train_scaled, y_train, epochs=50, batch_size=32, verbose=0, validation_split=0.2)
                loss, accuracy = model.evaluate(X_test_scaled, y_test, verbose=0)
                print(f"layers: {num_layers}, units: {num_units}, dropout: {dropout_rate}, learning rate: {learning_rate}, accuracy: {accuracy:.4f}")

                if accuracy > best_accuracy:
                    best_accuracy = accuracy
                    best_params = {'num_layers': num_layers, 'num_units': num_units, 'dropout_rate': dropout_rate, 'learning_rate': learning_rate}

# print best parameters and their accuracy
print("Best Accuracy:", best_accuracy)
print("Best Parameters:", best_params)


n_layers: 2, n_units: 64, dropout_rate: 0.3, learning_rate: 0.001, accuracy: 0.739642858505249
n_layers: 2, n_units: 64, dropout_rate: 0.3, learning_rate: 0.01, accuracy: 0.7384285926818848
n_layers: 2, n_units: 64, dropout_rate: 0.3, learning_rate: 0.1, accuracy: 0.7386428713798523
n_layers: 2, n_units: 64, dropout_rate: 0.5, learning_rate: 0.001, accuracy: 0.7390714287757874
n_layers: 2, n_units: 64, dropout_rate: 0.5, learning_rate: 0.01, accuracy: 0.7384999990463257
n_layers: 2, n_units: 64, dropout_rate: 0.5, learning_rate: 0.1, accuracy: 0.7389285564422607
n_layers: 2, n_units: 64, dropout_rate: 0.7, learning_rate: 0.001, accuracy: 0.7367143034934998
n_layers: 2, n_units: 64, dropout_rate: 0.7, learning_rate: 0.01, accuracy: 0.7348571419715881
n_layers: 2, n_units: 64, dropout_rate: 0.7, learning_rate: 0.1, accuracy: 0.737928569316864
n_layers: 2, n_units: 128, dropout_rate: 0.3, learning_rate: 0.001, accuracy: 0.7405714392662048
n_layers: 2, n_units: 128, dropout_rate: 0.3, lear