# Random Forest+Modified GA

In [2]:
import numpy as np
import pandas as pd
from sklearn.ensemble import RandomForestClassifier
from sklearn.model_selection import StratifiedKFold
from sklearn.preprocessing import StandardScaler, LabelEncoder
from sklearn.metrics import accuracy_score

# Set random seed for reproducibility
RANDOM_SEED = 42
np.random.seed(RANDOM_SEED)

# Load dataset
df = pd.read_csv('heart.csv')

# Encode categorical features
categorical_features = ['sex', 'cp', 'fbs', 'restecg', 'exang', 'slope', 'target']
le = LabelEncoder()
for feature in categorical_features:
    df[feature] = le.fit_transform(df[feature])

# Scale numerical features
numerical_features = ['age', 'trestbps', 'chol', 'thalach', 'oldpeak']
scaler = StandardScaler()
df[numerical_features] = scaler.fit_transform(df[numerical_features])

# Separate features and target
X = df.drop(columns=['target']).values
y = df['target'].values

# Fitness function using Stratified K-Fold Cross-Validation
def fitness_function(chromosome, X, y):
    selected_features = np.where(chromosome == 1)[0]
    if len(selected_features) == 0:
        return 0  # Avoid empty feature sets

    X_selected = X[:, selected_features]
    skf = StratifiedKFold(n_splits=5, shuffle=True, random_state=RANDOM_SEED)
    accuracy_scores = []

    for train_idx, val_idx in skf.split(X_selected, y):
        X_train, X_val = X_selected[train_idx], X_selected[val_idx]
        y_train, y_val = y[train_idx], y[val_idx]

        model = RandomForestClassifier(n_estimators=100, random_state=RANDOM_SEED)
        model.fit(X_train, y_train)
        accuracy_scores.append(accuracy_score(y_val, model.predict(X_val)))

    return np.mean(accuracy_scores)

# Modified Genetic Algorithm for Feature Selection
def modified_genetic_algorithm(X, y, num_generations=50, population_size=20, mutation_rate=0.1):
    num_features = X.shape[1]
    population = np.random.randint(2, size=(population_size, num_features))

    for generation in range(num_generations):
        fitness_scores = np.array([fitness_function(chrom, X, y) for chrom in population])
        sorted_indices = np.argsort(fitness_scores)[::-1]
        population = population[sorted_indices]

        if fitness_scores[0] - fitness_scores[-1] <= 0.01:  # Convergence check
            break

        parents = population[:4]  # Select top 4 parents
        offspring = np.mean(parents, axis=0) > 0.5  # Crossover strategy
        offspring = np.array(offspring, dtype=int)  # Convert boolean to int

        # Mutation
        mutation_mask = np.random.rand(*offspring.shape) < mutation_rate
        offspring[mutation_mask] = 1 - offspring[mutation_mask]

        if fitness_function(offspring, X, y) > fitness_scores[-1]:
            population[-1] = offspring  # Replace worst individual

    return population[0]

# Run Modified Genetic Algorithm
best_chromosome = modified_genetic_algorithm(X, y)
selected_features = np.where(best_chromosome == 1)[0]

# Perform K-Fold Cross-Validation on the selected features
skf = StratifiedKFold(n_splits=5, shuffle=True, random_state=RANDOM_SEED)
accuracy_scores_rf = []

for train_idx, test_idx in skf.split(X[:, selected_features], y):
    X_train, X_test = X[train_idx][:, selected_features], X[test_idx][:, selected_features]
    y_train, y_test = y[train_idx], y[test_idx]

    rf_model = RandomForestClassifier(n_estimators=100, random_state=RANDOM_SEED)
    rf_model.fit(X_train, y_train)

    y_pred = rf_model.predict(X_test)
    accuracy_scores_rf.append(accuracy_score(y_test, y_pred))

# Output Results
print("Best Feature Selection:", best_chromosome)
print("Mean Test Accuracy (Random Forest with K-Fold CV):", np.mean(accuracy_scores_rf))
from sklearn.metrics import precision_score, recall_score, roc_auc_score

# Perform K-Fold Cross-Validation on the selected features
skf = StratifiedKFold(n_splits=5, shuffle=True, random_state=RANDOM_SEED)
precision_scores = []
recall_scores = []
roc_auc_scores = []

for train_idx, test_idx in skf.split(X[:, selected_features], y):
    X_train, X_test = X[train_idx][:, selected_features], X[test_idx][:, selected_features]
    y_train, y_test = y[train_idx], y[test_idx]

    rf_model = RandomForestClassifier(n_estimators=100, random_state=RANDOM_SEED)
    rf_model.fit(X_train, y_train)

    y_pred = rf_model.predict(X_test)
    
    precision_scores.append(precision_score(y_test, y_pred))
    recall_scores.append(recall_score(y_test, y_pred))
    roc_auc_scores.append(roc_auc_score(y_test, rf_model.predict_proba(X_test)[:, 1]))  # AUC-ROC for probability

# Output Results
print("Mean Precision:", np.mean(precision_scores))
print("Mean Recall:", np.mean(recall_scores))
print("Mean AUC-ROC:", np.mean(roc_auc_scores))


Best Feature Selection: [1 1 0 1 0 1 0 0 1 1 0 1 1]
Mean Test Accuracy (Random Forest with K-Fold CV): 1.0
Mean Precision: 1.0
Mean Recall: 1.0
Mean AUC-ROC: 1.0


# XGBoost+Modified GA

In [3]:
import numpy as np
import pandas as pd
from sklearn.model_selection import StratifiedKFold
from sklearn.preprocessing import StandardScaler, LabelEncoder
from xgboost import XGBClassifier
from sklearn.metrics import accuracy_score

# Set a fixed random seed for reproducibility
RANDOM_SEED = 42
np.random.seed(RANDOM_SEED)

# Load dataset
df = pd.read_csv('heart.csv')

# Encode categorical features
categorical_features = ['sex', 'cp', 'fbs', 'restecg', 'exang', 'slope', 'target']
le = LabelEncoder()
for feature in categorical_features:
    df[feature] = le.fit_transform(df[feature])

# Scale numerical features
numerical_features = ['age', 'trestbps', 'chol', 'thalach', 'oldpeak']
scaler = StandardScaler()
df[numerical_features] = scaler.fit_transform(df[numerical_features])

# Define features and target variable
X = df.drop(columns=['target']).values
y = df['target'].values

# Fitness function using Stratified K-Fold Cross-Validation
def fitness_function(chromosome, X, y):
    selected_features = np.where(chromosome == 1)[0]
    if len(selected_features) == 0:
        return 0  # Avoid empty feature sets

    X_selected = X[:, selected_features]
    skf = StratifiedKFold(n_splits=5, shuffle=True, random_state=RANDOM_SEED)
    accuracy_scores = []

    for train_idx, val_idx in skf.split(X_selected, y):
        X_train, X_val = X_selected[train_idx], X_selected[val_idx]
        y_train, y_val = y[train_idx], y[val_idx]

        model = XGBClassifier(n_estimators=100, max_depth=3, min_child_weight=3, random_state=RANDOM_SEED)
        model.fit(X_train, y_train)
        accuracy_scores.append(accuracy_score(y_val, model.predict(X_val)))

    return np.mean(accuracy_scores)  # Return average accuracy

# Modified Genetic Algorithm for Feature Selection
def modified_genetic_algorithm(X, y, num_generations=50, population_size=20, mutation_rate=0.1):
    num_features = X.shape[1]
    population = np.random.randint(2, size=(population_size, num_features))

    for generation in range(num_generations):
        fitness_scores = np.array([fitness_function(chrom, X, y) for chrom in population])
        sorted_indices = np.argsort(fitness_scores)[::-1]
        population = population[sorted_indices]

        if fitness_scores[0] - fitness_scores[-1] <= 0.01:  # Convergence check
            break

        parents = population[:4]  # Select top 4 parents
        offspring = np.mean(parents, axis=0) > 0.5  # Crossover strategy
        offspring = np.array(offspring, dtype=int)

        # Mutation
        mutation_mask = np.random.rand(*offspring.shape) < mutation_rate
        offspring[mutation_mask] = 1 - offspring[mutation_mask]

        if fitness_function(offspring, X, y) > fitness_scores[-1]:
            population[-1] = offspring  # Replace worst individual

    return population[0]  # Return best chromosome

# Run Modified Genetic Algorithm
best_chromosome = modified_genetic_algorithm(X, y)
selected_features = np.where(best_chromosome == 1)[0]

# Perform K-Fold Cross-Validation on the selected features
skf = StratifiedKFold(n_splits=5, shuffle=True, random_state=RANDOM_SEED)
accuracy_scores_xgb = []

for train_idx, test_idx in skf.split(X[:, selected_features], y):
    X_train, X_test = X[train_idx][:, selected_features], X[test_idx][:, selected_features]
    y_train, y_test = y[train_idx], y[test_idx]

    xgb_model = XGBClassifier(n_estimators=100, max_depth=3, min_child_weight=3, random_state=RANDOM_SEED)
    xgb_model.fit(X_train, y_train)

    y_pred = xgb_model.predict(X_test)
    accuracy_scores_xgb.append(accuracy_score(y_test, y_pred))

# Output Results
print("Best Feature Selection:", best_chromosome)
print("Mean Test Accuracy (XGBoost with K-Fold CV):", np.mean(accuracy_scores_xgb))
from sklearn.metrics import precision_score, recall_score, roc_auc_score

# Perform K-Fold Cross-Validation on the selected features for XGBoost
precision_scores = []
recall_scores = []
roc_auc_scores = []

for train_idx, test_idx in skf.split(X[:, selected_features], y):
    X_train, X_test = X[train_idx][:, selected_features], X[test_idx][:, selected_features]
    y_train, y_test = y[train_idx], y[test_idx]

    xgb_model = XGBClassifier(n_estimators=100, max_depth=3, min_child_weight=3, random_state=RANDOM_SEED)
    xgb_model.fit(X_train, y_train)

    y_pred = xgb_model.predict(X_test)
    y_pred_prob = xgb_model.predict_proba(X_test)[:, 1]  # Probabilities for AUC-ROC

    # Calculate metrics
    precision_scores.append(precision_score(y_test, y_pred))
    recall_scores.append(recall_score(y_test, y_pred))
    roc_auc_scores.append(roc_auc_score(y_test, y_pred_prob))  # AUC-ROC based on predicted probabilities

# Output Results
print("Mean Precision:", np.mean(precision_scores))
print("Mean Recall:", np.mean(recall_scores))
print("Mean AUC-ROC:", np.mean(roc_auc_scores))


Best Feature Selection: [1 1 1 1 1 1 1 1 0 0 1 1 1]
Mean Test Accuracy (XGBoost with K-Fold CV): 0.9902439024390244
Mean Precision: 0.996116504854369
Mean Recall: 0.9847619047619048
Mean AUC-ROC: 0.9996571428571428


# Logistic Regression+Modified GA

In [4]:
import numpy as np
import pandas as pd
from sklearn.model_selection import StratifiedKFold
from sklearn.preprocessing import StandardScaler, LabelEncoder
from sklearn.linear_model import LogisticRegression
from sklearn.metrics import accuracy_score

# Set a fixed random seed for reproducibility
RANDOM_SEED = 42
np.random.seed(RANDOM_SEED)

# Load dataset
df = pd.read_csv('heart.csv')

# Encode categorical features
categorical_features = ['sex', 'cp', 'fbs', 'restecg', 'exang', 'slope', 'target']
le = LabelEncoder()
for feature in categorical_features:
    df[feature] = le.fit_transform(df[feature])

# Scale numerical features
numerical_features = ['age', 'trestbps', 'chol', 'thalach', 'oldpeak']
scaler = StandardScaler()
df[numerical_features] = scaler.fit_transform(df[numerical_features])

# Define features and target variable
X = df.drop(columns=['target']).values
y = df['target'].values

# Fitness function using Stratified K-Fold Cross-Validation
def fitness_function(chromosome, X, y):
    selected_features = np.where(chromosome == 1)[0]
    if len(selected_features) == 0:
        return 0  # Avoid empty feature sets

    X_selected = X[:, selected_features]
    skf = StratifiedKFold(n_splits=5, shuffle=True, random_state=RANDOM_SEED)
    accuracy_scores = []

    for train_idx, val_idx in skf.split(X_selected, y):
        X_train, X_val = X_selected[train_idx], X_selected[val_idx]
        y_train, y_val = y[train_idx], y[val_idx]

        model = LogisticRegression(max_iter=500, random_state=RANDOM_SEED)
        model.fit(X_train, y_train)
        accuracy_scores.append(accuracy_score(y_val, model.predict(X_val)))

    return np.mean(accuracy_scores)  # Return average accuracy

# Modified Genetic Algorithm for Feature Selection
def modified_genetic_algorithm(X, y, num_generations=50, population_size=20, mutation_rate=0.1):
    num_features = X.shape[1]
    population = np.random.randint(2, size=(population_size, num_features))

    for generation in range(num_generations):
        fitness_scores = np.array([fitness_function(chrom, X, y) for chrom in population])
        sorted_indices = np.argsort(fitness_scores)[::-1]
        population = population[sorted_indices]

        if fitness_scores[0] - fitness_scores[-1] <= 0.01:  # Convergence check
            break

        parents = population[:4]  # Select top 4 parents
        offspring = np.mean(parents, axis=0) > 0.5  # Crossover strategy
        offspring = np.array(offspring, dtype=int)

        # Mutation
        mutation_mask = np.random.rand(*offspring.shape) < mutation_rate
        offspring[mutation_mask] = 1 - offspring[mutation_mask]

        if fitness_function(offspring, X, y) > fitness_scores[-1]:
            population[-1] = offspring  # Replace worst individual

    return population[0]  # Return best chromosome

# Run Modified Genetic Algorithm
best_chromosome = modified_genetic_algorithm(X, y)
selected_features = np.where(best_chromosome == 1)[0]

# Perform K-Fold Cross-Validation on the selected features
skf = StratifiedKFold(n_splits=5, shuffle=True, random_state=RANDOM_SEED)
accuracy_scores_lr = []

for train_idx, test_idx in skf.split(X[:, selected_features], y):
    X_train, X_test = X[train_idx][:, selected_features], X[test_idx][:, selected_features]
    y_train, y_test = y[train_idx], y[test_idx]

    lr_model = LogisticRegression(max_iter=500, random_state=RANDOM_SEED)
    lr_model.fit(X_train, y_train)

    y_pred = lr_model.predict(X_test)
    accuracy_scores_lr.append(accuracy_score(y_test, y_pred))

# Output Results
print("Best Feature Selection:", best_chromosome)
print("Mean Test Accuracy (Logistic Regression with K-Fold CV):", np.mean(accuracy_scores_lr))
from sklearn.metrics import precision_score, recall_score, roc_auc_score

# Initialize lists for the new metrics
precision_scores = []
recall_scores = []
roc_auc_scores = []

# Perform K-Fold Cross-Validation on the selected features for Logistic Regression
for train_idx, test_idx in skf.split(X[:, selected_features], y):
    X_train, X_test = X[train_idx][:, selected_features], X[test_idx][:, selected_features]
    y_train, y_test = y[train_idx], y[test_idx]

    lr_model = LogisticRegression(max_iter=500, random_state=RANDOM_SEED)
    lr_model.fit(X_train, y_train)

    y_pred = lr_model.predict(X_test)
    y_pred_prob = lr_model.predict_proba(X_test)[:, 1]  # Probabilities for AUC-ROC

    # Calculate metrics
    precision_scores.append(precision_score(y_test, y_pred))
    recall_scores.append(recall_score(y_test, y_pred))
    roc_auc_scores.append(roc_auc_score(y_test, y_pred_prob))  # AUC-ROC based on predicted probabilities

# Output Results
print("Mean Precision:", np.mean(precision_scores))
print("Mean Recall:", np.mean(recall_scores))
print("Mean AUC-ROC:", np.mean(roc_auc_scores))


Best Feature Selection: [0 1 1 1 1 0 1 0 0 1 1 1 1]
Mean Test Accuracy (Logistic Regression with K-Fold CV): 0.8458536585365855
Mean Precision: 0.8354640591476686
Mean Recall: 0.872524707996406
Mean AUC-ROC: 0.9069237605162133


# KNN+Modified GA

In [5]:
import numpy as np
import pandas as pd
from sklearn.model_selection import StratifiedKFold
from sklearn.preprocessing import StandardScaler, LabelEncoder
from sklearn.neighbors import KNeighborsClassifier
from sklearn.metrics import accuracy_score

# Set a fixed random seed for reproducibility
RANDOM_SEED = 42
np.random.seed(RANDOM_SEED)

# Load dataset
df = pd.read_csv('heart.csv')

# Encode categorical features
categorical_features = ['sex', 'cp', 'fbs', 'restecg', 'exang', 'slope', 'target']
le = LabelEncoder()
for feature in categorical_features:
    df[feature] = le.fit_transform(df[feature])

# Scale numerical features
numerical_features = ['age', 'trestbps', 'chol', 'thalach', 'oldpeak']
scaler = StandardScaler()
df[numerical_features] = scaler.fit_transform(df[numerical_features])

# Define features and target variable
X = df.drop(columns=['target']).values
y = df['target'].values

# Fitness function using Stratified K-Fold Cross-Validation
def fitness_function(chromosome, X, y):
    selected_features = np.where(chromosome == 1)[0]
    if len(selected_features) == 0:
        return 0  # Avoid empty feature sets

    X_selected = X[:, selected_features]
    skf = StratifiedKFold(n_splits=5, shuffle=True, random_state=RANDOM_SEED)
    accuracy_scores = []

    for train_idx, val_idx in skf.split(X_selected, y):
        X_train, X_val = X_selected[train_idx], X_selected[val_idx]
        y_train, y_val = y[train_idx], y[val_idx]

        model = KNeighborsClassifier(n_neighbors=5)  # KNN with k=5
        model.fit(X_train, y_train)
        accuracy_scores.append(accuracy_score(y_val, model.predict(X_val)))

    return np.mean(accuracy_scores)  # Return average accuracy

# Modified Genetic Algorithm for Feature Selection
def modified_genetic_algorithm(X, y, num_generations=50, population_size=20, mutation_rate=0.1):
    num_features = X.shape[1]
    population = np.random.randint(2, size=(population_size, num_features))

    for generation in range(num_generations):
        fitness_scores = np.array([fitness_function(chrom, X, y) for chrom in population])
        sorted_indices = np.argsort(fitness_scores)[::-1]
        population = population[sorted_indices]

        if fitness_scores[0] - fitness_scores[-1] <= 0.01:  # Convergence check
            break

        parents = population[:4]  # Select top 4 parents
        offspring = np.mean(parents, axis=0) > 0.5  # Crossover strategy
        offspring = np.array(offspring, dtype=int)

        # Mutation
        mutation_mask = np.random.rand(*offspring.shape) < mutation_rate
        offspring[mutation_mask] = 1 - offspring[mutation_mask]

        if fitness_function(offspring, X, y) > fitness_scores[-1]:
            population[-1] = offspring  # Replace worst individual

    return population[0]  # Return best chromosome

# Run Modified Genetic Algorithm
best_chromosome = modified_genetic_algorithm(X, y)
selected_features = np.where(best_chromosome == 1)[0]

# Perform K-Fold Cross-Validation on the selected features
skf = StratifiedKFold(n_splits=5, shuffle=True, random_state=RANDOM_SEED)
accuracy_scores_knn = []

for train_idx, test_idx in skf.split(X[:, selected_features], y):
    X_train, X_test = X[train_idx][:, selected_features], X[test_idx][:, selected_features]
    y_train, y_test = y[train_idx], y[test_idx]

    knn_model = KNeighborsClassifier(n_neighbors=5)  # KNN with k=5
    knn_model.fit(X_train, y_train)

    y_pred = knn_model.predict(X_test)
    accuracy_scores_knn.append(accuracy_score(y_test, y_pred))

# Output Results
print("Best Feature Selection:", best_chromosome)
print("Mean Test Accuracy (KNN with K-Fold CV):", np.mean(accuracy_scores_knn))
from sklearn.metrics import precision_score, recall_score, roc_auc_score

# Initialize lists for the new metrics
precision_scores = []
recall_scores = []
roc_auc_scores = []

# Perform K-Fold Cross-Validation on the selected features for KNN
for train_idx, test_idx in skf.split(X[:, selected_features], y):
    X_train, X_test = X[train_idx][:, selected_features], X[test_idx][:, selected_features]
    y_train, y_test = y[train_idx], y[test_idx]

    knn_model = KNeighborsClassifier(n_neighbors=5)  # KNN with k=5
    knn_model.fit(X_train, y_train)

    y_pred = knn_model.predict(X_test)
    y_pred_prob = knn_model.predict_proba(X_test)[:, 1]  # Probabilities for AUC-ROC

    # Calculate metrics
    precision_scores.append(precision_score(y_test, y_pred))
    recall_scores.append(recall_score(y_test, y_pred))
    roc_auc_scores.append(roc_auc_score(y_test, y_pred_prob))  # AUC-ROC based on predicted probabilities

# Output Results
print("Mean Precision:", np.mean(precision_scores))
print("Mean Recall:", np.mean(recall_scores))
print("Mean AUC-ROC:", np.mean(roc_auc_scores))


Best Feature Selection: [1 1 0 1 0 1 0 0 1 1 0 1 1]
Mean Test Accuracy (KNN with K-Fold CV): 0.862439024390244
Mean Precision: 0.8998890105878058
Mean Recall: 0.8231805929919138
Mean AUC-ROC: 0.964367034223638


# SVM+Modified GA

In [6]:
import numpy as np
import pandas as pd
from sklearn.model_selection import StratifiedKFold
from sklearn.preprocessing import StandardScaler, LabelEncoder
from sklearn.svm import SVC
from sklearn.metrics import accuracy_score

# Set a fixed random seed for reproducibility
RANDOM_SEED = 42
np.random.seed(RANDOM_SEED)

# Load dataset
df = pd.read_csv('heart.csv')

# Encode categorical features
categorical_features = ['sex', 'cp', 'fbs', 'restecg', 'exang', 'slope', 'target']
le = LabelEncoder()
for feature in categorical_features:
    df[feature] = le.fit_transform(df[feature])

# Scale numerical features
numerical_features = ['age', 'trestbps', 'chol', 'thalach', 'oldpeak']
scaler = StandardScaler()
df[numerical_features] = scaler.fit_transform(df[numerical_features])

# Define features and target variable
X = df.drop(columns=['target']).values
y = df['target'].values

# Fitness function using Stratified K-Fold Cross-Validation
def fitness_function(chromosome, X, y):
    selected_features = np.where(chromosome == 1)[0]
    if len(selected_features) == 0:
        return 0  # Avoid empty feature sets

    X_selected = X[:, selected_features]
    skf = StratifiedKFold(n_splits=5, shuffle=True, random_state=RANDOM_SEED)
    accuracy_scores = []

    for train_idx, val_idx in skf.split(X_selected, y):
        X_train, X_val = X_selected[train_idx], X_selected[val_idx]
        y_train, y_val = y[train_idx], y[val_idx]

        model = SVC(kernel='rbf', C=1.0, random_state=RANDOM_SEED)  # SVM with RBF kernel
        model.fit(X_train, y_train)
        accuracy_scores.append(accuracy_score(y_val, model.predict(X_val)))

    return np.mean(accuracy_scores)  # Return average accuracy

# Modified Genetic Algorithm for Feature Selection
def modified_genetic_algorithm(X, y, num_generations=50, population_size=20, mutation_rate=0.1):
    num_features = X.shape[1]
    population = np.random.randint(2, size=(population_size, num_features))

    for generation in range(num_generations):
        fitness_scores = np.array([fitness_function(chrom, X, y) for chrom in population])
        sorted_indices = np.argsort(fitness_scores)[::-1]
        population = population[sorted_indices]

        if fitness_scores[0] - fitness_scores[-1] <= 0.01:  # Convergence check
            break

        parents = population[:4]  # Select top 4 parents
        offspring = np.mean(parents, axis=0) > 0.5  # Crossover strategy
        offspring = np.array(offspring, dtype=int)

        # Mutation
        mutation_mask = np.random.rand(*offspring.shape) < mutation_rate
        offspring[mutation_mask] = 1 - offspring[mutation_mask]

        if fitness_function(offspring, X, y) > fitness_scores[-1]:
            population[-1] = offspring  # Replace worst individual

    return population[0]  # Return best chromosome

# Run Modified Genetic Algorithm
best_chromosome = modified_genetic_algorithm(X, y)
selected_features = np.where(best_chromosome == 1)[0]

# Perform K-Fold Cross-Validation on the selected features
skf = StratifiedKFold(n_splits=5, shuffle=True, random_state=RANDOM_SEED)
accuracy_scores_svm = []

for train_idx, test_idx in skf.split(X[:, selected_features], y):
    X_train, X_test = X[train_idx][:, selected_features], X[test_idx][:, selected_features]
    y_train, y_test = y[train_idx], y[test_idx]

    svm_model = SVC(kernel='rbf', C=1.0, random_state=RANDOM_SEED)  # SVM with RBF kernel
    svm_model.fit(X_train, y_train)

    y_pred = svm_model.predict(X_test)
    accuracy_scores_svm.append(accuracy_score(y_test, y_pred))

# Output Results
print("Best Feature Selection:", best_chromosome)
print("Mean Test Accuracy (SVM with K-Fold CV):", np.mean(accuracy_scores_svm))
from sklearn.metrics import precision_score, recall_score, roc_auc_score

# Initialize lists for the new metrics
precision_scores = []
recall_scores = []
roc_auc_scores = []

# Perform K-Fold Cross-Validation on the selected features for SVM
for train_idx, test_idx in skf.split(X[:, selected_features], y):
    X_train, X_test = X[train_idx][:, selected_features], X[test_idx][:, selected_features]
    y_train, y_test = y[train_idx], y[test_idx]

    svm_model = SVC(kernel='rbf', C=1.0, random_state=RANDOM_SEED, probability=True)  # Enable probability estimates
    svm_model.fit(X_train, y_train)

    y_pred = svm_model.predict(X_test)
    y_pred_prob = svm_model.predict_proba(X_test)[:, 1]  # Probabilities for AUC-ROC

    # Calculate metrics
    precision_scores.append(precision_score(y_test, y_pred))
    recall_scores.append(recall_score(y_test, y_pred))
    roc_auc_scores.append(roc_auc_score(y_test, y_pred_prob))  # AUC-ROC based on predicted probabilities

# Output Results
print("Mean Precision:", np.mean(precision_scores))
print("Mean Recall:", np.mean(recall_scores))
print("Mean AUC-ROC:", np.mean(roc_auc_scores))


Best Feature Selection: [1 1 1 1 1 1 1 1 0 0 1 1 1]
Mean Test Accuracy (SVM with K-Fold CV): 0.8917073170731709
Mean Precision: 0.8755508010519263
Mean Recall: 0.9201437556154538
Mean AUC-ROC: 0.9474428163031936


# Decision Tree+Modified GA

In [7]:
import numpy as np
import pandas as pd
from sklearn.model_selection import StratifiedKFold
from sklearn.preprocessing import StandardScaler, LabelEncoder
from sklearn.tree import DecisionTreeClassifier
from sklearn.metrics import accuracy_score

# Set a fixed random seed for reproducibility
RANDOM_SEED = 42
np.random.seed(RANDOM_SEED)

# Load dataset
df = pd.read_csv('heart.csv')

# Encode categorical features
categorical_features = ['sex', 'cp', 'fbs', 'restecg', 'exang', 'slope', 'target']
le = LabelEncoder()
for feature in categorical_features:
    df[feature] = le.fit_transform(df[feature])

# Scale numerical features
numerical_features = ['age', 'trestbps', 'chol', 'thalach', 'oldpeak']
scaler = StandardScaler()
df[numerical_features] = scaler.fit_transform(df[numerical_features])

# Define features and target variable
X = df.drop(columns=['target']).values
y = df['target'].values

# Fitness function using Stratified K-Fold Cross-Validation
def fitness_function(chromosome, X, y):
    selected_features = np.where(chromosome == 1)[0]
    if len(selected_features) == 0:
        return 0  # Avoid empty feature sets

    X_selected = X[:, selected_features]
    skf = StratifiedKFold(n_splits=5, shuffle=True, random_state=RANDOM_SEED)
    accuracy_scores = []

    for train_idx, val_idx in skf.split(X_selected, y):
        X_train, X_val = X_selected[train_idx], X_selected[val_idx]
        y_train, y_val = y[train_idx], y[val_idx]

        model = DecisionTreeClassifier(random_state=RANDOM_SEED)
        model.fit(X_train, y_train)
        accuracy_scores.append(accuracy_score(y_val, model.predict(X_val)))

    return np.mean(accuracy_scores)  # Return average accuracy

# Modified Genetic Algorithm for Feature Selection
def modified_genetic_algorithm(X, y, num_generations=50, population_size=20, mutation_rate=0.1):
    num_features = X.shape[1]
    population = np.random.randint(2, size=(population_size, num_features))

    for generation in range(num_generations):
        fitness_scores = np.array([fitness_function(chrom, X, y) for chrom in population])
        sorted_indices = np.argsort(fitness_scores)[::-1]
        population = population[sorted_indices]

        if fitness_scores[0] - fitness_scores[-1] <= 0.01:  # Convergence check
            break

        parents = population[:4]  # Select top 4 parents
        offspring = np.mean(parents, axis=0) > 0.5  # Crossover strategy
        offspring = np.array(offspring, dtype=int)

        # Mutation
        mutation_mask = np.random.rand(*offspring.shape) < mutation_rate
        offspring[mutation_mask] = 1 - offspring[mutation_mask]

        if fitness_function(offspring, X, y) > fitness_scores[-1]:
            population[-1] = offspring  # Replace worst individual

    return population[0]  # Return best chromosome

# Run Modified Genetic Algorithm
best_chromosome = modified_genetic_algorithm(X, y)
selected_features = np.where(best_chromosome == 1)[0]

# Perform K-Fold Cross-Validation on the selected features
skf = StratifiedKFold(n_splits=5, shuffle=True, random_state=RANDOM_SEED)
accuracy_scores_dt = []

for train_idx, test_idx in skf.split(X[:, selected_features], y):
    X_train, X_test = X[train_idx][:, selected_features], X[test_idx][:, selected_features]
    y_train, y_test = y[train_idx], y[test_idx]

    dt_model = DecisionTreeClassifier(random_state=RANDOM_SEED)
    dt_model.fit(X_train, y_train)

    y_pred = dt_model.predict(X_test)
    accuracy_scores_dt.append(accuracy_score(y_test, y_pred))

# Output Results
print("Best Feature Selection:", best_chromosome)
print("Mean Test Accuracy (Decision Tree with K-Fold CV):", np.mean(accuracy_scores_dt))
from sklearn.metrics import precision_score, recall_score, roc_auc_score

# Initialize lists for the new metrics
precision_scores = []
recall_scores = []
roc_auc_scores = []

# Perform K-Fold Cross-Validation on the selected features for Decision Tree
for train_idx, test_idx in skf.split(X[:, selected_features], y):
    X_train, X_test = X[train_idx][:, selected_features], X[test_idx][:, selected_features]
    y_train, y_test = y[train_idx], y[test_idx]

    dt_model = DecisionTreeClassifier(random_state=RANDOM_SEED)
    dt_model.fit(X_train, y_train)

    y_pred = dt_model.predict(X_test)
    y_pred_prob = dt_model.predict_proba(X_test)[:, 1]  # Probabilities for AUC-ROC

    # Calculate metrics
    precision_scores.append(precision_score(y_test, y_pred))
    recall_scores.append(recall_score(y_test, y_pred))
    roc_auc_scores.append(roc_auc_score(y_test, y_pred_prob))  # AUC-ROC based on predicted probabilities

# Output Results
print("Mean Precision:", np.mean(precision_scores))
print("Mean Recall:", np.mean(recall_scores))
print("Mean AUC-ROC:", np.mean(roc_auc_scores))


Best Feature Selection: [1 1 1 1 0 1 1 0 1 0 0 1 0]
Mean Test Accuracy (Decision Tree with K-Fold CV): 1.0
Mean Precision: 1.0
Mean Recall: 1.0
Mean AUC-ROC: 1.0


# Naive Bayes+Modified GA

In [8]:
import numpy as np
import pandas as pd
from sklearn.model_selection import StratifiedKFold
from sklearn.preprocessing import StandardScaler, LabelEncoder
from sklearn.naive_bayes import GaussianNB
from sklearn.metrics import accuracy_score

# Set a fixed random seed for reproducibility
RANDOM_SEED = 42
np.random.seed(RANDOM_SEED)

# Load dataset
df = pd.read_csv('heart.csv')

# Encode categorical features
categorical_features = ['sex', 'cp', 'fbs', 'restecg', 'exang', 'slope', 'target']
le = LabelEncoder()
for feature in categorical_features:
    df[feature] = le.fit_transform(df[feature])

# Scale numerical features
numerical_features = ['age', 'trestbps', 'chol', 'thalach', 'oldpeak']
scaler = StandardScaler()
df[numerical_features] = scaler.fit_transform(df[numerical_features])

# Define features and target variable
X = df.drop(columns=['target']).values
y = df['target'].values

# Fitness function using Stratified K-Fold Cross-Validation
def fitness_function(chromosome, X, y):
    selected_features = np.where(chromosome == 1)[0]
    if len(selected_features) == 0:
        return 0  # Avoid empty feature sets

    X_selected = X[:, selected_features]
    skf = StratifiedKFold(n_splits=5, shuffle=True, random_state=RANDOM_SEED)
    accuracy_scores = []

    for train_idx, val_idx in skf.split(X_selected, y):
        X_train, X_val = X_selected[train_idx], X_selected[val_idx]
        y_train, y_val = y[train_idx], y[val_idx]

        model = GaussianNB()  # Naive Bayes Classifier
        model.fit(X_train, y_train)
        accuracy_scores.append(accuracy_score(y_val, model.predict(X_val)))

    return np.mean(accuracy_scores)  # Return average accuracy

# Modified Genetic Algorithm for Feature Selection
def modified_genetic_algorithm(X, y, num_generations=50, population_size=20, mutation_rate=0.1):
    num_features = X.shape[1]
    population = np.random.randint(2, size=(population_size, num_features))

    for generation in range(num_generations):
        fitness_scores = np.array([fitness_function(chrom, X, y) for chrom in population])
        sorted_indices = np.argsort(fitness_scores)[::-1]
        population = population[sorted_indices]

        if fitness_scores[0] - fitness_scores[-1] <= 0.01:  # Convergence check
            break

        parents = population[:4]  # Select top 4 parents
        offspring = np.mean(parents, axis=0) > 0.5  # Crossover strategy
        offspring = np.array(offspring, dtype=int)

        # Mutation
        mutation_mask = np.random.rand(*offspring.shape) < mutation_rate
        offspring[mutation_mask] = 1 - offspring[mutation_mask]

        if fitness_function(offspring, X, y) > fitness_scores[-1]:
            population[-1] = offspring  # Replace worst individual

    return population[0]  # Return best chromosome

# Run Modified Genetic Algorithm
best_chromosome = modified_genetic_algorithm(X, y)
selected_features = np.where(best_chromosome == 1)[0]

# Perform K-Fold Cross-Validation on the selected features
skf = StratifiedKFold(n_splits=5, shuffle=True, random_state=RANDOM_SEED)
accuracy_scores_nb = []

for train_idx, test_idx in skf.split(X[:, selected_features], y):
    X_train, X_test = X[train_idx][:, selected_features], X[test_idx][:, selected_features]
    y_train, y_test = y[train_idx], y[test_idx]

    nb_model = GaussianNB()
    nb_model.fit(X_train, y_train)

    y_pred = nb_model.predict(X_test)
    accuracy_scores_nb.append(accuracy_score(y_test, y_pred))

# Output Results
print("Best Feature Selection:", best_chromosome)
print("Mean Test Accuracy (Naive Bayes with K-Fold CV):", np.mean(accuracy_scores_nb))
from sklearn.metrics import precision_score, recall_score, roc_auc_score

# Initialize lists for the new metrics
precision_scores = []
recall_scores = []
roc_auc_scores = []

# Perform K-Fold Cross-Validation on the selected features for Naive Bayes
for train_idx, test_idx in skf.split(X[:, selected_features], y):
    X_train, X_test = X[train_idx][:, selected_features], X[test_idx][:, selected_features]
    y_train, y_test = y[train_idx], y[test_idx]

    nb_model = GaussianNB()
    nb_model.fit(X_train, y_train)

    y_pred = nb_model.predict(X_test)
    y_pred_prob = nb_model.predict_proba(X_test)[:, 1]  # Probabilities for AUC-ROC

    # Calculate metrics
    precision_scores.append(precision_score(y_test, y_pred))
    recall_scores.append(recall_score(y_test, y_pred))
    roc_auc_scores.append(roc_auc_score(y_test, y_pred_prob))  # AUC-ROC based on predicted probabilities

# Output Results
print("Mean Precision:", np.mean(precision_scores))
print("Mean Recall:", np.mean(recall_scores))
print("Mean AUC-ROC:", np.mean(roc_auc_scores))


Best Feature Selection: [0 1 1 1 1 0 1 0 0 1 1 1 1]
Mean Test Accuracy (Naive Bayes with K-Fold CV): 0.8487804878048781
Mean Precision: 0.8337847081393388
Mean Recall: 0.8821024258760108
Mean AUC-ROC: 0.9039907430096109


# LightGBM+Modified GA

In [9]:
import numpy as np
import pandas as pd
import lightgbm as lgb
from sklearn.model_selection import StratifiedKFold
from sklearn.preprocessing import StandardScaler, LabelEncoder
from sklearn.metrics import accuracy_score
import warnings
warnings.filterwarnings("ignore")
# Set a fixed random seed for reproducibility
RANDOM_SEED = 42
np.random.seed(RANDOM_SEED)

# Load dataset
df = pd.read_csv('heart.csv')

# Encode categorical features
categorical_features = ['sex', 'cp', 'fbs', 'restecg', 'exang', 'slope', 'target']
le = LabelEncoder()
for feature in categorical_features:
    df[feature] = le.fit_transform(df[feature])

# Scale numerical features
numerical_features = ['age', 'trestbps', 'chol', 'thalach', 'oldpeak']
scaler = StandardScaler()
df[numerical_features] = scaler.fit_transform(df[numerical_features])

# Define features and target variable
X = df.drop(columns=['target']).values
y = df['target'].values

# Fitness function using Stratified K-Fold Cross-Validation
def fitness_function(chromosome, X, y):
    selected_features = np.where(chromosome == 1)[0]
    if len(selected_features) == 0:
        return 0  # Avoid empty feature sets

    X_selected = X[:, selected_features]
    skf = StratifiedKFold(n_splits=5, shuffle=True, random_state=RANDOM_SEED)
    accuracy_scores = []

    for train_idx, val_idx in skf.split(X_selected, y):
        X_train, X_val = X_selected[train_idx], X_selected[val_idx]
        y_train, y_val = y[train_idx], y[val_idx]

        model = lgb.LGBMClassifier(n_estimators=100, learning_rate=0.05, random_state=RANDOM_SEED, verbose=-1)
        model.fit(X_train, y_train)
        accuracy_scores.append(accuracy_score(y_val, model.predict(X_val)))

    return np.mean(accuracy_scores)  # Return average accuracy

# Modified Genetic Algorithm for Feature Selection
def modified_genetic_algorithm(X, y, num_generations=50, population_size=20, mutation_rate=0.1):
    num_features = X.shape[1]
    population = np.random.randint(2, size=(population_size, num_features))

    for generation in range(num_generations):
        fitness_scores = np.array([fitness_function(chrom, X, y) for chrom in population])
        sorted_indices = np.argsort(fitness_scores)[::-1]
        population = population[sorted_indices]

        if fitness_scores[0] - fitness_scores[-1] <= 0.01:  # Convergence check
            break

        parents = population[:4]  # Select top 4 parents
        offspring = np.mean(parents, axis=0) > 0.5  # Crossover strategy
        offspring = np.array(offspring, dtype=int)

        # Mutation
        mutation_mask = np.random.rand(*offspring.shape) < mutation_rate
        offspring[mutation_mask] = 1 - offspring[mutation_mask]

        if fitness_function(offspring, X, y) > fitness_scores[-1]:
            population[-1] = offspring  # Replace worst individual

    return population[0]  # Return best chromosome

# Run Modified Genetic Algorithm
best_chromosome = modified_genetic_algorithm(X, y)
selected_features = np.where(best_chromosome == 1)[0]

# Perform K-Fold Cross-Validation on the selected features
skf = StratifiedKFold(n_splits=5, shuffle=True, random_state=RANDOM_SEED)
accuracy_scores_lgb = []

for train_idx, test_idx in skf.split(X[:, selected_features], y):
    X_train, X_test = X[train_idx][:, selected_features], X[test_idx][:, selected_features]
    y_train, y_test = y[train_idx], y[test_idx]

    lgb_model = lgb.LGBMClassifier(n_estimators=100, learning_rate=0.1, random_state=RANDOM_SEED,verbose=-1)
    lgb_model.fit(X_train, y_train)

    y_pred = lgb_model.predict(X_test)
    accuracy_scores_lgb.append(accuracy_score(y_test, y_pred))

# Output Results
print("Best Feature Selection:", best_chromosome)
print("Mean Test Accuracy (LightGBM with K-Fold CV):", np.mean(accuracy_scores_lgb))
from sklearn.metrics import precision_score, recall_score, roc_auc_score

# Initialize lists for the new metrics
precision_scores = []
recall_scores = []
roc_auc_scores = []

# Perform K-Fold Cross-Validation on the selected features for LightGBM
for train_idx, test_idx in skf.split(X[:, selected_features], y):
    X_train, X_test = X[train_idx][:, selected_features], X[test_idx][:, selected_features]
    y_train, y_test = y[train_idx], y[test_idx]

    lgb_model = lgb.LGBMClassifier(n_estimators=100, learning_rate=0.1, random_state=RANDOM_SEED, verbose=-1)
    lgb_model.fit(X_train, y_train)

    y_pred = lgb_model.predict(X_test)
    y_pred_prob = lgb_model.predict_proba(X_test)[:, 1]  # Probabilities for AUC-ROC

    # Calculate metrics
    precision_scores.append(precision_score(y_test, y_pred))
    recall_scores.append(recall_score(y_test, y_pred))
    roc_auc_scores.append(roc_auc_score(y_test, y_pred_prob))  # AUC-ROC based on predicted probabilities

# Output Results
print("Mean Precision:", np.mean(precision_scores))
print("Mean Recall:", np.mean(recall_scores))
print("Mean AUC-ROC:", np.mean(roc_auc_scores))


Best Feature Selection: [1 1 1 1 1 1 1 1 0 0 1 1 1]
Mean Test Accuracy (LightGBM with K-Fold CV): 1.0
Mean Precision: 1.0
Mean Recall: 1.0
Mean AUC-ROC: 1.0


# CNN+Modified GA

In [14]:
import numpy as np
import pandas as pd
import tensorflow as tf
import random
import os
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense, Dropout
from sklearn.model_selection import StratifiedKFold
from sklearn.preprocessing import StandardScaler, LabelEncoder
from sklearn.metrics import accuracy_score, precision_score, recall_score, roc_auc_score
from sklearn.linear_model import LogisticRegression
import warnings

# Enable warnings
warnings.filterwarnings("default")

# Set fixed random seed for full reproducibility
RANDOM_SEED = 42
os.environ['PYTHONHASHSEED'] = str(RANDOM_SEED)
np.random.seed(RANDOM_SEED)
random.seed(RANDOM_SEED)
tf.random.set_seed(RANDOM_SEED)

# Ensure TensorFlow operates deterministically
tf.config.experimental.enable_op_determinism()

# Load dataset
df = pd.read_csv('heart.csv')

# Encode categorical features
categorical_features = ['sex', 'cp', 'fbs', 'restecg', 'exang', 'slope', 'target']
le = LabelEncoder()
for feature in categorical_features:
    df[feature] = le.fit_transform(df[feature])

# Scale numerical features
numerical_features = ['age', 'trestbps', 'chol', 'thalach', 'oldpeak']
scaler = StandardScaler()
df[numerical_features] = scaler.fit_transform(df[numerical_features])

# Define features and target variable
X = df.drop(columns=['target']).values.astype(np.float32)
y = df['target'].values.astype(np.int32)

# Fitness function using Stratified K-Fold CV with Logistic Regression
def fitness_function(chromosome, X, y):
    selected_features = np.where(chromosome == 1)[0]
    if len(selected_features) == 0:
        return 0  # Avoid empty feature sets

    X_selected = X[:, selected_features]
    skf = StratifiedKFold(n_splits=5, shuffle=True, random_state=RANDOM_SEED)  # Using K-Fold CV
    accuracy_scores = []

    for train_idx, val_idx in skf.split(X_selected, y):
        X_train, X_val = X_selected[train_idx], X_selected[val_idx]
        y_train, y_val = y[train_idx], y[val_idx]

        # Logistic Regression for fast evaluation
        model = LogisticRegression(max_iter=100, solver='liblinear', random_state=RANDOM_SEED)
        model.fit(X_train, y_train)
        accuracy_scores.append(accuracy_score(y_val, model.predict(X_val)))

    return np.mean(accuracy_scores)

# Optimized Genetic Algorithm for Faster Feature Selection
def fast_genetic_algorithm(X, y, num_generations=20, population_size=10, mutation_rate=0.05):
    num_features = X.shape[1]
    population = np.random.randint(2, size=(population_size, num_features))

    for generation in range(num_generations):
        fitness_scores = np.array([fitness_function(chrom, X, y) for chrom in population])
        sorted_indices = np.argsort(fitness_scores)[::-1]
        population = population[sorted_indices]

        # Early stopping if fitness doesn't improve significantly
        if fitness_scores[0] - fitness_scores[-1] <= 0.005:
            break

        # Select top 2 parents
        parents = population[:2]

        # Create new offspring using uniform crossover
        offspring = (parents[0] + parents[1]) // 2
        offspring = np.array(offspring, dtype=int)

        # Apply mutation
        mutation_mask = np.random.rand(*offspring.shape) < mutation_rate
        offspring[mutation_mask] = 1 - offspring[mutation_mask]

        if fitness_function(offspring, X, y) > fitness_scores[-1]:
            population[-1] = offspring  # Replace worst individual

    return population[0]  # Return best chromosome

# Run Optimized GA for Feature Selection
best_chromosome = fast_genetic_algorithm(X, y)
selected_features = np.where(best_chromosome == 1)[0]

# Initialize lists for metrics
accuracy_scores = []
precision_scores = []
recall_scores = []
roc_auc_scores = []

# Stratified K-Fold Cross-Validation for CNN
skf = StratifiedKFold(n_splits=5, shuffle=True, random_state=RANDOM_SEED)

for train_idx, val_idx in skf.split(X[:, selected_features], y):
    X_train, X_val = X[train_idx][:, selected_features], X[val_idx][:, selected_features]
    y_train, y_val = y[train_idx], y[val_idx]

    # Build Final CNN Model
    cnn_model = Sequential([
        Dense(64, activation='relu', input_shape=(X_train.shape[1],)),
        Dropout(0.3),
        Dense(32, activation='relu'),
        Dense(1, activation='sigmoid')
    ])

    cnn_model.compile(optimizer='adam', loss='binary_crossentropy', metrics=['accuracy'])

    # Train CNN Model (Reduced Epochs for Speed)
    cnn_model.fit(X_train, y_train, epochs=5, batch_size=32, verbose=1)

    # Test the Model
    y_pred = (cnn_model.predict(X_val) > 0.5).astype(int)

    # Calculate metrics
    accuracy_scores.append(accuracy_score(y_val, y_pred))
    precision_scores.append(precision_score(y_val, y_pred))
    recall_scores.append(recall_score(y_val, y_pred))

    # For AUC-ROC, use the probabilities predicted by the sigmoid activation
    y_pred_prob = cnn_model.predict(X_val)[:, 0]  # Use the first column for probability
    roc_auc_scores.append(roc_auc_score(y_val, y_pred_prob))

# Output Results
print("Best Feature Selection:", best_chromosome)
print("Selected Feature Count:", len(selected_features))
print("Mean Accuracy:", np.mean(accuracy_scores))
print("Mean Precision:", np.mean(precision_scores))
print("Mean Recall:", np.mean(recall_scores))
print("Mean AUC-ROC:", np.mean(roc_auc_scores))


Epoch 1/5


  super().__init__(activity_regularizer=activity_regularizer, **kwargs)


[1m26/26[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 1ms/step - accuracy: 0.5425 - loss: 0.7075   
Epoch 2/5
[1m26/26[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 1ms/step - accuracy: 0.7423 - loss: 0.5894 
Epoch 3/5
[1m26/26[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 1ms/step - accuracy: 0.7871 - loss: 0.5048 
Epoch 4/5
[1m26/26[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 2ms/step - accuracy: 0.7901 - loss: 0.4580 
Epoch 5/5
[1m26/26[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 1ms/step - accuracy: 0.8074 - loss: 0.4229 
[1m7/7[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 4ms/step 
[1m7/7[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 1ms/step 
Epoch 1/5


  super().__init__(activity_regularizer=activity_regularizer, **kwargs)


[1m26/26[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 1ms/step - accuracy: 0.5628 - loss: 0.6868   
Epoch 2/5
[1m26/26[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 1ms/step - accuracy: 0.7228 - loss: 0.5779 
Epoch 3/5
[1m26/26[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 1ms/step - accuracy: 0.7619 - loss: 0.5086 
Epoch 4/5
[1m26/26[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 1ms/step - accuracy: 0.8090 - loss: 0.4419 
Epoch 5/5
[1m26/26[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 1ms/step - accuracy: 0.8129 - loss: 0.4157 
[1m7/7[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 5ms/step 
[1m7/7[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 1ms/step 
Epoch 1/5


  super().__init__(activity_regularizer=activity_regularizer, **kwargs)


[1m26/26[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 1ms/step - accuracy: 0.6175 - loss: 0.6374   
Epoch 2/5
[1m26/26[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 1ms/step - accuracy: 0.7912 - loss: 0.5249 
Epoch 3/5
[1m26/26[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 2ms/step - accuracy: 0.7986 - loss: 0.4726 
Epoch 4/5
[1m26/26[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 1ms/step - accuracy: 0.8136 - loss: 0.4292 
Epoch 5/5
[1m26/26[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 1ms/step - accuracy: 0.8041 - loss: 0.4158 
[1m7/7[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 6ms/step 
[1m7/7[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 1ms/step 
Epoch 1/5


  super().__init__(activity_regularizer=activity_regularizer, **kwargs)


[1m26/26[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 1ms/step - accuracy: 0.5192 - loss: 0.6944   
Epoch 2/5
[1m26/26[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 1ms/step - accuracy: 0.7394 - loss: 0.5712 
Epoch 3/5
[1m26/26[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 1ms/step - accuracy: 0.8106 - loss: 0.4865 
Epoch 4/5
[1m26/26[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 1ms/step - accuracy: 0.7972 - loss: 0.4296 
Epoch 5/5
[1m26/26[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 1ms/step - accuracy: 0.8108 - loss: 0.4063 
[1m7/7[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 5ms/step 
[1m7/7[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 1ms/step 
Epoch 1/5


  super().__init__(activity_regularizer=activity_regularizer, **kwargs)


[1m26/26[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 3ms/step - accuracy: 0.5443 - loss: 0.6782 
Epoch 2/5
[1m26/26[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 1ms/step - accuracy: 0.7518 - loss: 0.5598 
Epoch 3/5
[1m26/26[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 1ms/step - accuracy: 0.7982 - loss: 0.4695 
Epoch 4/5
[1m26/26[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 1ms/step - accuracy: 0.8236 - loss: 0.4177 
Epoch 5/5
[1m26/26[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 1ms/step - accuracy: 0.8288 - loss: 0.4037 
[1m7/7[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 5ms/step 
[1m7/7[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 1ms/step 
Best Feature Selection: [1 1 1 1 1 0 1 0 1 1 0 1 0]
Selected Feature Count: 9
Mean Accuracy: 0.8195121951219513
Mean Precision: 0.798703382216334
Mean Recall: 0.8706558849955076
Mean AUC-ROC: 0.9044831277736938


# LSTM+Modified GA

In [15]:
import numpy as np
import pandas as pd
import tensorflow as tf
import random
import os
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import LSTM, Dense, Dropout
from sklearn.model_selection import StratifiedKFold
from sklearn.preprocessing import StandardScaler, LabelEncoder
from sklearn.metrics import accuracy_score, precision_score, recall_score, roc_auc_score
from sklearn.linear_model import LogisticRegression
import warnings

# Enable warnings
warnings.filterwarnings("default")

# Set fixed random seed for full reproducibility
RANDOM_SEED = 42
os.environ['PYTHONHASHSEED'] = str(RANDOM_SEED)
np.random.seed(RANDOM_SEED)
random.seed(RANDOM_SEED)
tf.random.set_seed(RANDOM_SEED)

# Ensure TensorFlow operates deterministically
tf.config.experimental.enable_op_determinism()

# Load dataset
df = pd.read_csv('heart.csv')

# Encode categorical features
categorical_features = ['sex', 'cp', 'fbs', 'restecg', 'exang', 'slope', 'target']
le = LabelEncoder()
for feature in categorical_features:
    df[feature] = le.fit_transform(df[feature])

# Scale numerical features
numerical_features = ['age', 'trestbps', 'chol', 'thalach', 'oldpeak']
scaler = StandardScaler()
df[numerical_features] = scaler.fit_transform(df[numerical_features])

# Define features and target variable
X = df.drop(columns=['target']).values.astype(np.float32)
y = df['target'].values.astype(np.int32)

# Fitness function using Stratified K-Fold CV with Logistic Regression
def fitness_function(chromosome, X, y):
    selected_features = np.where(chromosome == 1)[0]
    if len(selected_features) == 0:
        return 0  # Avoid empty feature sets

    X_selected = X[:, selected_features]
    skf = StratifiedKFold(n_splits=5, shuffle=True, random_state=RANDOM_SEED)
    accuracy_scores = []

    for train_idx, val_idx in skf.split(X_selected, y):
        X_train, X_val = X_selected[train_idx], X_selected[val_idx]
        y_train, y_val = y[train_idx], y[val_idx]

        # Logistic Regression for fast evaluation
        model = LogisticRegression(max_iter=100, solver='liblinear', random_state=RANDOM_SEED)
        model.fit(X_train, y_train)
        accuracy_scores.append(accuracy_score(y_val, model.predict(X_val)))

    return np.mean(accuracy_scores)

# Optimized Genetic Algorithm for Faster Feature Selection
def fast_genetic_algorithm(X, y, num_generations=20, population_size=10, mutation_rate=0.05):
    num_features = X.shape[1]
    population = np.random.randint(2, size=(population_size, num_features))

    for generation in range(num_generations):
        fitness_scores = np.array([fitness_function(chrom, X, y) for chrom in population])
        sorted_indices = np.argsort(fitness_scores)[::-1]
        population = population[sorted_indices]

        # Early stopping if fitness doesn't improve significantly
        if fitness_scores[0] - fitness_scores[-1] <= 0.005:
            break

        # Select top 2 parents
        parents = population[:2]

        # Create new offspring using uniform crossover
        offspring = (parents[0] + parents[1]) // 2
        offspring = np.array(offspring, dtype=int)

        # Apply mutation
        mutation_mask = np.random.rand(*offspring.shape) < mutation_rate
        offspring[mutation_mask] = 1 - offspring[mutation_mask]

        if fitness_function(offspring, X, y) > fitness_scores[-1]:
            population[-1] = offspring  # Replace worst individual

    return population[0]  # Return best chromosome

# Run Optimized GA for Feature Selection
best_chromosome = fast_genetic_algorithm(X, y)
selected_features = np.where(best_chromosome == 1)[0]
X_selected = X[:, selected_features]

# Initialize lists for metrics
accuracy_scores = []
precision_scores = []
recall_scores = []
roc_auc_scores = []

# Stratified K-Fold Cross-Validation for LSTM
skf = StratifiedKFold(n_splits=5, shuffle=True, random_state=RANDOM_SEED)
for train_idx, test_idx in skf.split(X_selected, y):
    X_train, X_test = X_selected[train_idx], X_selected[test_idx]
    y_train, y_test = y[train_idx], y[test_idx]

    # Reshape input for LSTM
    X_train = X_train.reshape((X_train.shape[0], X_train.shape[1], 1))
    X_test = X_test.reshape((X_test.shape[0], X_test.shape[1], 1))

    # Build LSTM Model
    lstm_model = Sequential([
        LSTM(64, activation='relu', input_shape=(X_train.shape[1], 1)),
        Dropout(0.3),
        Dense(32, activation='relu'),
        Dense(1, activation='sigmoid')
    ])

    lstm_model.compile(optimizer='adam', loss='binary_crossentropy', metrics=['accuracy'])

    # Train LSTM Model (Reduced Epochs for Speed)
    lstm_model.fit(X_train, y_train, epochs=5, batch_size=32, verbose=1)

    # Test the Model
    y_pred = (lstm_model.predict(X_test) > 0.5).astype(int)

    # Calculate metrics
    accuracy_scores.append(accuracy_score(y_test, y_pred))
    precision_scores.append(precision_score(y_test, y_pred))
    recall_scores.append(recall_score(y_test, y_pred))

    # For AUC-ROC, use the probabilities predicted by the sigmoid activation
    y_pred_prob = lstm_model.predict(X_test)[:, 0]  # Use the first column for probability
    roc_auc_scores.append(roc_auc_score(y_test, y_pred_prob))

# Output Results
print("Best Feature Selection:", best_chromosome)
print("Selected Feature Count:", len(selected_features))
print("Mean Accuracy:", np.mean(accuracy_scores))
print("Mean Precision:", np.mean(precision_scores))
print("Mean Recall:", np.mean(recall_scores))
print("Mean AUC-ROC:", np.mean(roc_auc_scores))


Epoch 1/5


  super().__init__(**kwargs)


[1m26/26[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 3ms/step - accuracy: 0.5564 - loss: 0.6915
Epoch 2/5
[1m26/26[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 3ms/step - accuracy: 0.6947 - loss: 0.6359
Epoch 3/5
[1m26/26[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 3ms/step - accuracy: 0.7308 - loss: 0.5786
Epoch 4/5
[1m26/26[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 3ms/step - accuracy: 0.7650 - loss: 0.5491
Epoch 5/5
[1m26/26[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 3ms/step - accuracy: 0.7812 - loss: 0.5256
[1m7/7[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 19ms/step
[1m7/7[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 2ms/step 
Epoch 1/5


  super().__init__(**kwargs)


[1m26/26[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 3ms/step - accuracy: 0.5556 - loss: 0.6775
Epoch 2/5
[1m26/26[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 3ms/step - accuracy: 0.7395 - loss: 0.6300
Epoch 3/5
[1m26/26[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 3ms/step - accuracy: 0.7475 - loss: 0.5446
Epoch 4/5
[1m26/26[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 3ms/step - accuracy: 0.7667 - loss: 0.5251
Epoch 5/5
[1m26/26[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 3ms/step - accuracy: 0.7691 - loss: 0.5196
[1m7/7[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 21ms/step
[1m7/7[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 2ms/step 
Epoch 1/5


  super().__init__(**kwargs)


[1m26/26[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 3ms/step - accuracy: 0.6822 - loss: 0.6687
Epoch 2/5
[1m26/26[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 3ms/step - accuracy: 0.7601 - loss: 0.5966
Epoch 3/5
[1m26/26[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 3ms/step - accuracy: 0.7521 - loss: 0.5329
Epoch 4/5
[1m26/26[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 4ms/step - accuracy: 0.7637 - loss: 0.5230
Epoch 5/5
[1m26/26[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 3ms/step - accuracy: 0.7551 - loss: 0.5144
[1m7/7[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 22ms/step
[1m7/7[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 2ms/step 


  super().__init__(**kwargs)


Epoch 1/5
[1m26/26[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 3ms/step - accuracy: 0.6166 - loss: 0.6855
Epoch 2/5
[1m26/26[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 3ms/step - accuracy: 0.7310 - loss: 0.6429
Epoch 3/5
[1m26/26[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 3ms/step - accuracy: 0.7162 - loss: 0.5627
Epoch 4/5
[1m26/26[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 3ms/step - accuracy: 0.7502 - loss: 0.5430
Epoch 5/5
[1m26/26[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 3ms/step - accuracy: 0.7633 - loss: 0.5274
[1m7/7[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 19ms/step
[1m7/7[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 2ms/step 
Epoch 1/5


  super().__init__(**kwargs)


[1m26/26[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 3ms/step - accuracy: 0.6411 - loss: 0.6840
Epoch 2/5
[1m26/26[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 3ms/step - accuracy: 0.7167 - loss: 0.6279
Epoch 3/5
[1m26/26[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 3ms/step - accuracy: 0.7461 - loss: 0.5417
Epoch 4/5
[1m26/26[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 2ms/step - accuracy: 0.7560 - loss: 0.5356
Epoch 5/5
[1m26/26[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 3ms/step - accuracy: 0.7664 - loss: 0.5006
[1m7/7[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 19ms/step
[1m7/7[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 2ms/step 
Best Feature Selection: [1 1 1 1 1 0 1 0 1 1 0 1 0]
Selected Feature Count: 9
Mean Accuracy: 0.7892682926829269
Mean Precision: 0.7575591397849462
Mean Recall: 0.8688409703504043
Mean AUC-ROC: 0.8377486890468022


# MLP+Modified GA

In [16]:
import numpy as np
import pandas as pd
import tensorflow as tf
import random
import os
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense, Dropout
from sklearn.model_selection import StratifiedKFold
from sklearn.preprocessing import StandardScaler, LabelEncoder
from sklearn.metrics import accuracy_score, precision_score, recall_score, roc_auc_score
from sklearn.linear_model import LogisticRegression
import warnings

# Enable warnings
warnings.filterwarnings("default")

# Set fixed random seed for full reproducibility
RANDOM_SEED = 42
os.environ['PYTHONHASHSEED'] = str(RANDOM_SEED)
np.random.seed(RANDOM_SEED)
random.seed(RANDOM_SEED)
tf.random.set_seed(RANDOM_SEED)

# Ensure TensorFlow operates deterministically
tf.config.experimental.enable_op_determinism()

# Load dataset
df = pd.read_csv('heart.csv')

# Encode categorical features
categorical_features = ['sex', 'cp', 'fbs', 'restecg', 'exang', 'slope', 'target']
le = LabelEncoder()
for feature in categorical_features:
    df[feature] = le.fit_transform(df[feature])

# Scale numerical features
numerical_features = ['age', 'trestbps', 'chol', 'thalach', 'oldpeak']
scaler = StandardScaler()
df[numerical_features] = scaler.fit_transform(df[numerical_features])

# Define features and target variable
X = df.drop(columns=['target']).values.astype(np.float32)
y = df['target'].values.astype(np.int32)

# Fitness function using Stratified K-Fold CV with Logistic Regression
def fitness_function(chromosome, X, y):
    selected_features = np.where(chromosome == 1)[0]
    if len(selected_features) == 0:
        return 0  # Avoid empty feature sets

    X_selected = X[:, selected_features]
    skf = StratifiedKFold(n_splits=5, shuffle=True, random_state=RANDOM_SEED)
    accuracy_scores = []

    for train_idx, val_idx in skf.split(X_selected, y):
        X_train, X_val = X_selected[train_idx], X_selected[val_idx]
        y_train, y_val = y[train_idx], y[val_idx]

        # Logistic Regression for fast evaluation
        model = LogisticRegression(max_iter=100, solver='liblinear', random_state=RANDOM_SEED)
        model.fit(X_train, y_train)
        accuracy_scores.append(accuracy_score(y_val, model.predict(X_val)))

    return np.mean(accuracy_scores)

# Optimized Genetic Algorithm for Faster Feature Selection
def fast_genetic_algorithm(X, y, num_generations=20, population_size=10, mutation_rate=0.05):
    num_features = X.shape[1]
    population = np.random.randint(2, size=(population_size, num_features))

    for generation in range(num_generations):
        fitness_scores = np.array([fitness_function(chrom, X, y) for chrom in population])
        sorted_indices = np.argsort(fitness_scores)[::-1]
        population = population[sorted_indices]

        # Early stopping if fitness doesn't improve significantly
        if fitness_scores[0] - fitness_scores[-1] <= 0.005:
            break

        # Select top 2 parents
        parents = population[:2]

        # Create new offspring using uniform crossover
        offspring = (parents[0] + parents[1]) // 2
        offspring = np.array(offspring, dtype=int)

        # Apply mutation
        mutation_mask = np.random.rand(*offspring.shape) < mutation_rate
        offspring[mutation_mask] = 1 - offspring[mutation_mask]

        if fitness_function(offspring, X, y) > fitness_scores[-1]:
            population[-1] = offspring  # Replace worst individual

    return population[0]  # Return best chromosome

# Run Optimized GA for Feature Selection
best_chromosome = fast_genetic_algorithm(X, y)
selected_features = np.where(best_chromosome == 1)[0]
X_selected = X[:, selected_features]

# Initialize lists for metrics
accuracy_scores = []
precision_scores = []
recall_scores = []
roc_auc_scores = []

# Perform Stratified K-Fold Cross-Validation with MLP
skf = StratifiedKFold(n_splits=5, shuffle=True, random_state=RANDOM_SEED)
for train_idx, val_idx in skf.split(X_selected, y):
    X_train, X_val = X_selected[train_idx], X_selected[val_idx]
    y_train, y_val = y[train_idx], y[val_idx]

    # Build MLP Model
    mlp_model = Sequential([
        Dense(128, activation='relu', input_shape=(X_train.shape[1],)),
        Dropout(0.3),
        Dense(64, activation='relu'),
        Dense(32, activation='relu'),
        Dense(1, activation='sigmoid')
    ])

    mlp_model.compile(optimizer='adam', loss='binary_crossentropy', metrics=['accuracy'])

    # Train MLP Model (Reduced Epochs for Speed)
    mlp_model.fit(X_train, y_train, epochs=5, batch_size=32, verbose=0)

    # Validate the Model
    y_pred = (mlp_model.predict(X_val) > 0.5).astype(int)
    
    # Calculate metrics
    accuracy_scores.append(accuracy_score(y_val, y_pred))
    precision_scores.append(precision_score(y_val, y_pred))
    recall_scores.append(recall_score(y_val, y_pred))

    # For AUC-ROC, use the probabilities predicted by the sigmoid activation
    y_pred_prob = mlp_model.predict(X_val)[:, 0]  # Use the first column for probability
    roc_auc_scores.append(roc_auc_score(y_val, y_pred_prob))

# Output Results
print("Best Feature Selection:", best_chromosome)
print("Selected Feature Count:", len(selected_features))
print("Mean Test Accuracy (MLP with Stratified K-Fold CV):", np.mean(accuracy_scores))
print("Mean Precision (MLP with Stratified K-Fold CV):", np.mean(precision_scores))
print("Mean Recall (MLP with Stratified K-Fold CV):", np.mean(recall_scores))
print("Mean AUC-ROC (MLP with Stratified K-Fold CV):", np.mean(roc_auc_scores))


  super().__init__(activity_regularizer=activity_regularizer, **kwargs)


[1m7/7[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 6ms/step 
[1m7/7[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 6ms/step 


  super().__init__(activity_regularizer=activity_regularizer, **kwargs)


[1m7/7[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 5ms/step 
[1m7/7[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 1ms/step 


  super().__init__(activity_regularizer=activity_regularizer, **kwargs)


[1m7/7[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 6ms/step 
[1m7/7[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 1ms/step 


  super().__init__(activity_regularizer=activity_regularizer, **kwargs)


[1m7/7[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 6ms/step 
[1m7/7[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 1ms/step 


  super().__init__(activity_regularizer=activity_regularizer, **kwargs)


[1m7/7[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 6ms/step 
[1m7/7[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 1ms/step 
Best Feature Selection: [1 1 1 1 1 0 1 0 1 1 0 1 0]
Selected Feature Count: 9
Mean Test Accuracy (MLP with Stratified K-Fold CV): 0.8556097560975611
Mean Precision (MLP with Stratified K-Fold CV): 0.8273047820395586
Mean Recall (MLP with Stratified K-Fold CV): 0.9125067385444743
Mean AUC-ROC (MLP with Stratified K-Fold CV): 0.9213609845081543
