In [1]:
import pandas as pd
import numpy as np
import tensorflow as tf
from tensorflow.keras import layers
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
from sklearn.feature_selection import SelectKBest, f_classif
from sklearn.metrics import classification_report, confusion_matrix, accuracy_score

# Load dataset
df = pd.read_csv("Dataset1.csv")

Training and Saving Models on each disease(3)

In [3]:
from sklearn.feature_selection import mutual_info_classif, mutual_info_regression


In [4]:
# Define disease labels (targets)
diseases = ["Diabetes", "Stroke", "HeartDiseaseorAttack"]

# Define feature columns (all except target labels)
features = [col for col in df.columns if col not in diseases]

# Initialize an empty dictionary to store selected features
selected_features_dict = {}

# Feature Selection Function
def select_important_features(features, target, num_features=10):
    # Determine if the target is for classification or regression
    if pd.api.types.is_numeric_dtype(target) and target.nunique() > 10:  # Assuming regression if more than 10 unique values
        mutual_info = mutual_info_regression(features, target)
    else:
        mutual_info = mutual_info_classif(features, target)

    feature_scores = pd.Series(mutual_info, index=features.columns)
    selected_features = feature_scores.nlargest(num_features).index.tolist()
    print(f"Selected features for target '{target.name}': {selected_features}")
    return selected_features


# Preprocess data with feature selection
def preprocess_data_with_selection(features, target, num_features):
    # Feature Selection
    selected_features = select_important_features(df[features], df[target], num_features)

    print(f"Features for target '{target}': {selected_features}")

    # Store selected features in the dictionary
    selected_features_dict[target] = selected_features

    # Prepare data
    X = df[selected_features].values
    y = df[target].values
    X = np.expand_dims(X, axis=-1)  # Reshape for CNN (samples, features, 1)
    return train_test_split(X, y, test_size=0.2, random_state=42)

# CNN model
def build_cnn(input_shape):
    model = tf.keras.Sequential([
        tf.keras.layers.Conv1D(32, kernel_size=3, activation='relu', input_shape=input_shape),
        tf.keras.layers.MaxPooling1D(pool_size=2),
        tf.keras.layers.Conv1D(64, kernel_size=3, activation='relu'),
        tf.keras.layers.MaxPooling1D(pool_size=2),
        tf.keras.layers.Flatten(),
        tf.keras.layers.Dense(128, activation='relu'),
        tf.keras.layers.Dense(1, activation='sigmoid')  # Binary classification
    ])
    model.compile(optimizer='adam', loss='binary_crossentropy', metrics=['accuracy'])
    return model

# Target diseases
diseases = ["Diabetes", "Stroke", "HeartDiseaseorAttack"]

# Train and evaluate models with feature selection
results = {}
num_features = 10  # Select top 10 features for each disease
for disease in diseases:
    print(f"\n--- Training model for {disease} with Feature Selection ---")
    features = [col for col in df.columns if col != disease]
    X_train, X_test, y_train, y_test = preprocess_data_with_selection(features, disease, num_features)

    input_shape = (X_train.shape[1], 1)
    model = build_cnn(input_shape)

    # Early stopping
    early_stopping = tf.keras.callbacks.EarlyStopping(
        monitor="val_accuracy", patience=3, restore_best_weights=True
    )

    # Train model
    history = model.fit(
        X_train, y_train,
        epochs=50,
        batch_size=32,
        validation_split=0.2,
        callbacks=[early_stopping],
        verbose=1
    )

    # Evaluate model
    y_pred = (model.predict(X_test) > 0.5).astype(int)
    print(f"\nClassification Report for {disease}:")
    report = classification_report(y_test, y_pred, output_dict=True)
    print(classification_report(y_test, y_pred))

    # Confusion matrix
    cm = confusion_matrix(y_test, y_pred)
    print(f"Confusion Matrix for {disease}:\n{cm}")

    # Save model with max validation accuracy
    model_file = f"/content/models/{disease}_best_model_with_selection.keras"
    model.save(model_file)
    print(f"Saved best model for {disease} to {model_file}")

    # Store results
    results[disease] = {
        "model": model,
        "history": history,
        "classification_report": report,
        "confusion_matrix": cm,
    }


--- Training model for Diabetes with Feature Selection ---
Selected features for target 'Diabetes': ['GenHlth', 'HighBP', 'AnyHealthcare', 'CholCheck', 'PhysActivity', 'Veggies', 'HighChol', 'Fruits', 'BMI', 'Age']
Features for target 'Diabetes': ['GenHlth', 'HighBP', 'AnyHealthcare', 'CholCheck', 'PhysActivity', 'Veggies', 'HighChol', 'Fruits', 'BMI', 'Age']


  super().__init__(activity_regularizer=activity_regularizer, **kwargs)


Epoch 1/50
[1m5074/5074[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m24s[0m 4ms/step - accuracy: 0.7053 - loss: 0.4211 - val_accuracy: 0.6245 - val_loss: 0.3746
Epoch 2/50
[1m5074/5074[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m14s[0m 3ms/step - accuracy: 0.7071 - loss: 0.3320 - val_accuracy: 0.7357 - val_loss: 0.1939
Epoch 3/50
[1m5074/5074[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m15s[0m 3ms/step - accuracy: 0.7106 - loss: -0.1473 - val_accuracy: 0.7203 - val_loss: -1.9357
Epoch 4/50
[1m5074/5074[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m15s[0m 3ms/step - accuracy: 0.7323 - loss: -3.6534 - val_accuracy: 0.7693 - val_loss: -9.1809
Epoch 5/50
[1m5074/5074[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m18s[0m 4ms/step - accuracy: 0.7525 - loss: -12.7772 - val_accuracy: 0.7689 - val_loss: -26.9623
Epoch 6/50
[1m5074/5074[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m18s[0m 3ms/step - accuracy: 0.7213 - loss: -34.5608 - val_accuracy: 0.6953 - val_loss: -45.087

  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))


Selected features for target 'Stroke': ['AnyHealthcare', 'CholCheck', 'PhysActivity', 'Veggies', 'Fruits', 'HighBP', 'HighChol', 'Smoker', 'Sex', 'GenHlth']
Features for target 'Stroke': ['AnyHealthcare', 'CholCheck', 'PhysActivity', 'Veggies', 'Fruits', 'HighBP', 'HighChol', 'Smoker', 'Sex', 'GenHlth']
Epoch 1/50


  super().__init__(activity_regularizer=activity_regularizer, **kwargs)


[1m5074/5074[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m18s[0m 3ms/step - accuracy: 0.9575 - loss: 0.1662 - val_accuracy: 0.9602 - val_loss: 0.1488
Epoch 2/50
[1m5074/5074[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m20s[0m 3ms/step - accuracy: 0.9585 - loss: 0.1549 - val_accuracy: 0.9602 - val_loss: 0.1498
Epoch 3/50
[1m5074/5074[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m21s[0m 3ms/step - accuracy: 0.9592 - loss: 0.1509 - val_accuracy: 0.9602 - val_loss: 0.1484
Epoch 4/50
[1m5074/5074[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m21s[0m 3ms/step - accuracy: 0.9593 - loss: 0.1511 - val_accuracy: 0.9602 - val_loss: 0.1495
[1m1586/1586[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 1ms/step

Classification Report for Stroke:
              precision    recall  f1-score   support

           0       0.96      1.00      0.98     48679
           1       0.00      0.00      0.00      2057

    accuracy                           0.96     50736
   macro avg       0.

  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))


Selected features for target 'HeartDiseaseorAttack': ['AnyHealthcare', 'CholCheck', 'PhysActivity', 'Veggies', 'HighBP', 'Fruits', 'GenHlth', 'HighChol', 'Age', 'Smoker']
Features for target 'HeartDiseaseorAttack': ['AnyHealthcare', 'CholCheck', 'PhysActivity', 'Veggies', 'HighBP', 'Fruits', 'GenHlth', 'HighChol', 'Age', 'Smoker']
Epoch 1/50


  super().__init__(activity_regularizer=activity_regularizer, **kwargs)


[1m5074/5074[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m16s[0m 3ms/step - accuracy: 0.9024 - loss: 0.2673 - val_accuracy: 0.9051 - val_loss: 0.2492
Epoch 2/50
[1m5074/5074[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m14s[0m 3ms/step - accuracy: 0.9067 - loss: 0.2503 - val_accuracy: 0.9052 - val_loss: 0.2499
Epoch 3/50
[1m5074/5074[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m16s[0m 3ms/step - accuracy: 0.9060 - loss: 0.2502 - val_accuracy: 0.9051 - val_loss: 0.2530
Epoch 4/50
[1m5074/5074[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m20s[0m 3ms/step - accuracy: 0.9061 - loss: 0.2468 - val_accuracy: 0.9051 - val_loss: 0.2504
Epoch 5/50
[1m5074/5074[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m15s[0m 3ms/step - accuracy: 0.9062 - loss: 0.2483 - val_accuracy: 0.9052 - val_loss: 0.2488
[1m1586/1586[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 1ms/step

Classification Report for HeartDiseaseorAttack:
              precision    recall  f1-score   support

     

Training MultiClassification Model

In [5]:
from sklearn.metrics import classification_report, accuracy_score, f1_score, precision_score, recall_score, confusion_matrix
import tensorflow as tf
from sklearn.model_selection import train_test_split
import numpy as np

# Build CNN model for multi-class classification
def build_cnn_multiclass(input_shape):
    model = tf.keras.Sequential([
        tf.keras.layers.Conv1D(32, kernel_size=3, activation='relu', input_shape=input_shape),
        tf.keras.layers.MaxPooling1D(pool_size=2),
        tf.keras.layers.Conv1D(64, kernel_size=3, activation='relu'),
        tf.keras.layers.MaxPooling1D(pool_size=2),
        tf.keras.layers.Flatten(),
        tf.keras.layers.Dense(128, activation='relu'),
        tf.keras.layers.Dense(3, activation='softmax')  # 3 output units for multi-class
    ])
    model.compile(optimizer='adam', loss='categorical_crossentropy', metrics=['accuracy'])
    return model

# Preprocess data for multi-class classification
def preprocess_data_multiclass(features, diseases):
    # Prepare features and target
    X = df[features].values
    y = df[diseases].values  # Multi-target columns for diseases
    X = np.expand_dims(X, axis=-1)  # Reshape for CNN (samples, features, 1)
    return train_test_split(X, y, test_size=0.2, random_state=42)

# Target diseases
diseases = ["Diabetes", "Stroke", "HeartDiseaseorAttack"]

# Define feature columns (all except target labels)
features = [col for col in df.columns if col not in diseases]

# Preprocess data
X_train, X_test, y_train, y_test = preprocess_data_multiclass(features, diseases)

# Build CNN model for multi-class classification
input_shape = (X_train.shape[1], 1)
model = build_cnn_multiclass(input_shape)

# Early stopping
early_stopping = tf.keras.callbacks.EarlyStopping(
    monitor="val_loss", patience=3, restore_best_weights=True
)

# Train model
history = model.fit(
    X_train, y_train,
    epochs=20,
    batch_size=32,
    validation_split=0.2,
    callbacks=[early_stopping],
    verbose=1
)

# Evaluate model
y_pred = model.predict(X_test)
y_pred_class = np.argmax(y_pred, axis=1)
y_test_class = np.argmax(y_test, axis=1)

# Classification Report and Metrics
print("\nClassification Report:")
report = classification_report(y_test_class, y_pred_class, target_names=diseases)
print(report)

# Accuracy, Precision, Recall, F1 Score
accuracy = accuracy_score(y_test_class, y_pred_class)
precision = precision_score(y_test_class, y_pred_class, average='weighted')
recall = recall_score(y_test_class, y_pred_class, average='weighted')
f1 = f1_score(y_test_class, y_pred_class, average='weighted')

print(f"Accuracy: {accuracy:.4f}")
print(f"Precision: {precision:.4f}")
print(f"Recall: {recall:.4f}")
print(f"F1 Score: {f1:.4f}")

# Confusion Matrix
cm = confusion_matrix(y_test_class, y_pred_class)
print(f"Confusion Matrix:\n{cm}")

# Save model with max validation accuracy
model_file = "/content/models/multiclass_best_model.keras"
model.save(model_file)
print(f"Saved best model to {model_file}")


Epoch 1/20


  super().__init__(activity_regularizer=activity_regularizer, **kwargs)


[1m5074/5074[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m25s[0m 5ms/step - accuracy: 0.5061 - loss: 36181.4023 - val_accuracy: 0.0560 - val_loss: 590345.5000
Epoch 2/20
[1m5074/5074[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m38s[0m 4ms/step - accuracy: 0.4853 - loss: 686008.7500 - val_accuracy: 0.2078 - val_loss: 1007243.4375
Epoch 3/20
[1m5074/5074[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m42s[0m 4ms/step - accuracy: 0.4915 - loss: 2550766.0000 - val_accuracy: 0.0530 - val_loss: 4842902.0000
Epoch 4/20
[1m5074/5074[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m19s[0m 4ms/step - accuracy: 0.4991 - loss: 5876947.0000 - val_accuracy: 0.9204 - val_loss: 6311167.5000
[1m1586/1586[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m3s[0m 2ms/step

Classification Report:
                      precision    recall  f1-score   support

            Diabetes       0.75      0.00      0.01     46698
              Stroke       0.00      0.00      0.00      1373
HeartDiseaseorAttac

  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
