Connected to Python 3.12.5

 # Diabetes Prediction Model with
 # Neural Network and Fuzzy Logic

 # 1-Data Preparation and Neural Network Setup

In [2]:
import pandas as pd
import numpy as np
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense, Dropout, BatchNormalization, Input
from tensorflow.keras.callbacks import EarlyStopping, ReduceLROnPlateau
from tensorflow.keras.optimizers import Adam
from tensorflow.keras.regularizers import l2
from sklearn.metrics import (classification_report, roc_auc_score, confusion_matrix, 
                             roc_curve, fbeta_score)
from sklearn.model_selection import KFold
from sklearn.utils.class_weight import compute_class_weight
from imblearn.over_sampling import SMOTE
import matplotlib.pyplot as plt
import seaborn as sns
import skfuzzy as fuzz
from skfuzzy import control as ctrl
from sklearn.preprocessing import StandardScaler


In [3]:
# Load preprocessed data
try:
    X_train = pd.read_csv('X_train_preprocessed.csv')
    X_test = pd.read_csv('X_test_preprocessed.csv')
    y_train = pd.read_csv('y_train_preprocessed.csv')
    y_test = pd.read_csv('y_test_preprocessed.csv')
except FileNotFoundError as e:
    print(f"Error: {e}. Ensure preprocessed CSV files are in the project directory.")
    exit(1)

In [4]:
# Load original dataset for fuzzy features (fallback)
try:
    original_data = pd.read_csv('pima-indians-diabetes.data.csv')
except FileNotFoundError as e:
    print(f"Error: {e}. Ensure pima-indians-diabetes.data.csv is in the project directory.")
    exit(1)

In [5]:
# Convert to numeric and check dtypes
X_train_df = X_train.apply(pd.to_numeric, errors='coerce')  # Keep DataFrame for unscaling
X_test_df = X_test.apply(pd.to_numeric, errors='coerce')  # Keep as DataFrame for feature extraction
y_train = pd.to_numeric(y_train.squeeze(), errors='coerce')
y_test = pd.to_numeric(y_test.squeeze(), errors='coerce')

In [6]:
# Check for NaN values after conversion
if X_train_df.isna().any().any() or X_test_df.isna().any().any() or y_train.isna().any() or y_test.isna().any():
    print("Error: Non-numeric values found in data. Check CSV files for invalid entries.")
    exit(1)

In [7]:
# Convert X_train and X_test to numpy arrays for neural network
X_train = X_train_df.values.astype(np.float32)
X_test = X_test_df.values.astype(np.float32)
y_train = y_train.values.astype(np.int32)
y_test = y_test.values.astype(np.int32)

In [8]:
# Save test set indices for fuzzy alignment
test_indices = X_test_df.index if hasattr(X_test_df, 'index') else np.arange(len(X_test_df))

In [9]:
# Verify shapes and dtypes
print("X_train shape:", X_train.shape, "dtype:", X_train.dtype)
print("X_test shape:", X_test.shape, "dtype:", X_test.dtype)
print("y_train shape:", y_train.shape, "dtype:", y_train.dtype)
print("y_test shape:", y_test.shape, "dtype:", y_test.dtype)

X_train shape: (696, 10) dtype: float32
X_test shape: (175, 10) dtype: float32
y_train shape: (696,) dtype: int32
y_test shape: (175,) dtype: int32


In [10]:
# Apply SMOTE with adjusted sampling strategy
smote = SMOTE(random_state=42, sampling_strategy=0.9)
try:
    X_train, y_train = smote.fit_resample(X_train, y_train)
except ValueError as e:
    print(f"Error in SMOTE: {e}. Ensure X_train contains only numeric data.")
    exit(1)
print("X_train shape after SMOTE:", X_train.shape)
print("y_train class distribution after SMOTE:", pd.Series(y_train).value_counts(normalize=True))

X_train shape after SMOTE: (735, 10)
y_train class distribution after SMOTE: 0    0.526531
1    0.473469
Name: proportion, dtype: float64




In [11]:
# Compute class weights and adjust for diabetic class
classes = np.unique(y_train)
class_weights = compute_class_weight('balanced', classes=classes, y=y_train)
class_weight_dict = dict(zip(classes, class_weights))
class_weight_dict[1] *= 1.5  # Further increase weight for diabetic class
print("Adjusted class weights:", class_weight_dict)

Adjusted class weights: {np.int32(0): np.float64(0.9496124031007752), np.int32(1): np.float64(1.584051724137931)}


In [12]:
# Define the enhanced neural network model
def create_model():
    model = Sequential([
        Input(shape=(X_train.shape[1],)),
        Dense(128, activation='relu', kernel_regularizer=l2(0.0001)),
        BatchNormalization(),
        Dropout(0.3),
        Dense(64, activation='relu', kernel_regularizer=l2(0.0001)),
        BatchNormalization(),
        Dropout(0.2),
        Dense(32, activation='relu', kernel_regularizer=l2(0.0001)),
        BatchNormalization(),
        Dropout(0.1),
        Dense(1, activation='sigmoid')
    ])
    optimizer = Adam(learning_rate=0.0005)
    model.compile(optimizer=optimizer, loss='binary_crossentropy', metrics=['accuracy'])
    return model

 # 2-Fuzzy Logic System Setup

In [13]:
# Define fuzzy variables
risk = ctrl.Antecedent(np.arange(0, 1.01, 0.01), 'risk')
glucose = ctrl.Antecedent(np.arange(0, 401, 1), 'glucose')
bmi = ctrl.Antecedent(np.arange(0, 81, 0.5), 'bmi')
blood_pressure = ctrl.Antecedent(np.arange(50, 151, 1), 'blood_pressure')
age = ctrl.Antecedent(np.arange(20, 101, 1), 'age')
output = ctrl.Consequent(np.arange(0, 1.01, 0.01), 'output')

In [14]:
# Define Membership Functions (refined for Pima dataset distribution)
# Glucose: ~5th=80, 50th=120, 95th=180 mg/dL
glucose['low'] = fuzz.trapmf(glucose.universe, [0, 0, 60, 80])
glucose['normal'] = fuzz.trapmf(glucose.universe, [70, 90, 150, 170])
glucose['high'] = fuzz.trapmf(glucose.universe, [160, 190, 400, 400])

# BMI: ~5th=20, 50th=32, 95th=45
bmi['low'] = fuzz.trapmf(bmi.universe, [0, 0, 18, 22])
bmi['normal'] = fuzz.trapmf(bmi.universe, [18, 23, 33, 38])
bmi['high'] = fuzz.trapmf(bmi.universe, [35, 40, 80, 80])

# BloodPressure: ~5th=60, 50th=80, 95th=110 mmHg
blood_pressure['low'] = fuzz.trapmf(blood_pressure.universe, [50, 50, 55, 65])
blood_pressure['normal'] = fuzz.trapmf(blood_pressure.universe, [60, 70, 90, 100])
blood_pressure['high'] = fuzz.trapmf(blood_pressure.universe, [95, 110, 150, 150])

# Age: ~5th=21, 50th=40, 95th=65
age['young'] = fuzz.trapmf(age.universe, [20, 20, 25, 35])
age['middle'] = fuzz.trapmf(age.universe, [30, 35, 50, 60])
age['old'] = fuzz.trapmf(age.universe, [55, 65, 100, 100])

risk['low'] = fuzz.trapmf(risk.universe, [0, 0, 0.3, 0.4])
risk['medium'] = fuzz.trapmf(risk.universe, [0.35, 0.5, 0.65, 0.75])
risk['high'] = fuzz.trapmf(risk.universe, [0.7, 0.8, 1, 1])

# Adjusted output['low'] to increase low-risk scores
output['low'] = fuzz.trapmf(output.universe, [0, 0, 0.3, 0.4])
output['medium'] = fuzz.trapmf(output.universe, [0.3, 0.45, 0.65, 0.8])
output['high'] = fuzz.trapmf(output.universe, [0.75, 0.85, 1, 1])

In [15]:
# Define fuzzy rules (expanded and balanced, no weight parameter)
rule1 = ctrl.Rule(risk['high'] & glucose['high'] & bmi['high'], output['high'])
rule2 = ctrl.Rule(risk['high'] & glucose['high'] & bmi['normal'], output['high'])
rule3 = ctrl.Rule(risk['medium'] & glucose['normal'] & bmi['normal'], output['low'])
rule4 = ctrl.Rule(risk['medium'] & glucose['high'] & bmi['high'], output['medium'])
rule5 = ctrl.Rule(risk['low'] & glucose['normal'] & bmi['normal'], output['low'])
rule6 = ctrl.Rule(risk['low'] & glucose['low'] & bmi['low'], output['low'])
rule7 = ctrl.Rule(risk['high'] & glucose['normal'] & bmi['normal'], output['medium'])
rule8 = ctrl.Rule(risk['medium'] & glucose['low'] & bmi['normal'], output['low'])
rule9 = ctrl.Rule(risk['low'] & glucose['high'] & bmi['high'], output['medium'])
rule10 = ctrl.Rule(risk['medium'] & glucose['high'] & bmi['normal'], output['high'])
rule11 = ctrl.Rule(risk['low'] & glucose['high'] & bmi['normal'], output['medium'])
rule12 = ctrl.Rule(risk['high'] & glucose['low'] & bmi['low'], output['medium'])
rule13 = ctrl.Rule(risk['low'] & glucose['low'] & bmi['high'], output['low'])
rule14 = ctrl.Rule(risk['medium'] & glucose['low'] & bmi['high'], output['medium'])
rule15 = ctrl.Rule(risk['high'] & glucose['normal'] & bmi['high'], output['high'])
rule16 = ctrl.Rule(risk['medium'] & glucose['normal'] & bmi['high'], output['medium'])
rule17 = ctrl.Rule(risk['low'] & glucose['normal'] & bmi['high'], output['low'])
rule18 = ctrl.Rule(risk['high'] & glucose['high'] & bmi['low'], output['high'])
rule19 = ctrl.Rule(risk['medium'] & glucose['normal'] & bmi['low'], output['low'])
rule20 = ctrl.Rule(risk['high'] & glucose['low'] & bmi['high'], output['high'])
rule21 = ctrl.Rule(risk['medium'] & glucose['high'] & bmi['low'], output['medium'])
rule22 = ctrl.Rule(risk['low'] & glucose['low'] & bmi['normal'], output['low'])
rule23 = ctrl.Rule(risk['high'] & glucose['normal'] & bmi['low'], output['medium'])
rule24 = ctrl.Rule(risk['medium'] & glucose['low'] & bmi['low'], output['low'])
rule25 = ctrl.Rule(risk['high'] & glucose['high'] & bmi['high'] & blood_pressure['high'], output['high'])
rule26 = ctrl.Rule(risk['medium'] & glucose['normal'] & bmi['normal'] & blood_pressure['low'], output['low'])
rule27 = ctrl.Rule(risk['low'] & glucose['low'] & bmi['low'] & blood_pressure['low'], output['low'])
rule28 = ctrl.Rule(risk['high'] & glucose['normal'] & bmi['normal'] & blood_pressure['low'], output['medium'])
rule29 = ctrl.Rule(risk['medium'] & glucose['low'] & bmi['high'] & blood_pressure['normal'], output['low'])
rule30 = ctrl.Rule(risk['low'] & glucose['high'] & bmi['low'] & blood_pressure['high'], output['medium'])
rule31 = ctrl.Rule(risk['high'] & glucose['high'] & bmi['high'] & age['old'], output['high'])
rule32 = ctrl.Rule(risk['medium'] & glucose['normal'] & bmi['normal'] & age['young'], output['low'])
rule33 = ctrl.Rule(risk['high'] & glucose['high'] & bmi['high'] & blood_pressure['normal'] & age['middle'], output['high'])
rule34 = ctrl.Rule(risk['medium'] & glucose['high'] & bmi['normal'] & blood_pressure['high'], output['high'])
rule35 = ctrl.Rule(risk['medium'] & glucose['high'] & bmi['high'] & age['old'], output['high'])
rule36 = ctrl.Rule(risk['high'] & glucose['normal'] & bmi['high'] & blood_pressure['high'], output['high'])
rule37 = ctrl.Rule(risk['low'] & glucose['low'] & bmi['normal'] & blood_pressure['normal'] & age['young'], output['low'])
rule38 = ctrl.Rule(risk['medium'] & glucose['normal'] & bmi['low'] & blood_pressure['low'], output['low'])
rule39 = ctrl.Rule(risk['high'] & glucose['high'] & bmi['high'] & blood_pressure['high'] & age['middle'], output['high'])
# Non-diabetic rules (duplicated to mimic weight=1.5)
rule40 = ctrl.Rule(glucose['low'] & bmi['low'] & blood_pressure['low'] & age['young'], output['low'])
rule41 = ctrl.Rule(glucose['normal'] & bmi['normal'] & blood_pressure['normal'] & age['young'], output['low'])
rule42 = ctrl.Rule(glucose['normal'] & bmi['low'] & blood_pressure['low'] & age['middle'], output['low'])
rule43 = ctrl.Rule(glucose['high'] & bmi['high'] & blood_pressure['high'] & age['old'], output['high'])
rule44 = ctrl.Rule(glucose['normal'] & bmi['normal'] & blood_pressure['high'] & age['middle'], output['medium'])
rule45 = ctrl.Rule(glucose['low'] & bmi['normal'] & blood_pressure['normal'] & age['middle'], output['low'])
# Duplicate non-diabetic rules to increase influence
rule46 = ctrl.Rule(glucose['low'] & bmi['low'] & blood_pressure['low'] & age['young'], output['low'])
rule47 = ctrl.Rule(glucose['normal'] & bmi['normal'] & blood_pressure['normal'] & age['young'], output['low'])
rule48 = ctrl.Rule(glucose['normal'] & bmi['low'] & blood_pressure['low'] & age['middle'], output['low'])

In [16]:
# Create fuzzy system
fuzzy_system = ctrl.ControlSystem([rule1, rule2, rule3, rule4, rule5, rule6, rule7, rule8, rule9, rule10, 
                                   rule11, rule12, rule13, rule14, rule15, rule16, rule17, rule18, rule19, 
                                   rule20, rule21, rule22, rule23, rule24, rule25, rule26, rule27, rule28, 
                                   rule29, rule30, rule31, rule32, rule33, rule34, rule35, rule36, rule37, 
                                   rule38, rule39, rule40, rule41, rule42, rule43, rule44, rule45, rule46, 
                                   rule47, rule48])
fuzzy_simulation = ctrl.ControlSystemSimulation(fuzzy_system)

In [17]:
# Fuzzy prediction function
def predict_risk(nn_risk, glucose_val, bmi_val, blood_pressure_val, age_val):
    try:
        fuzzy_simulation.input['risk'] = nn_risk
        fuzzy_simulation.input['glucose'] = glucose_val
        fuzzy_simulation.input['bmi'] = bmi_val
        fuzzy_simulation.input['blood_pressure'] = blood_pressure_val
        fuzzy_simulation.input['age'] = age_val
        fuzzy_simulation.compute()
        risk_score = fuzzy_simulation.output.get('output', 0.5)
        if risk_score <= 0.35:
            risk_label = "Low Risk"
        elif risk_score <= 0.8:
            risk_label = "Medium Risk"
        else:
            risk_label = "High Risk"
        return risk_score, risk_label
    except Exception as e:
        print(f"Error in fuzzy computation: {e}. Using default values.")
        return 0.5, "Medium Risk"

 # 3-Model Training and Evaluation

In [18]:
# Perform k-fold cross-validation
kfold = KFold(n_splits=5, shuffle=True, random_state=42)
cv_scores = []

for fold, (train_idx, val_idx) in enumerate(kfold.split(X_train, y_train)):
    print(f"\nFold {fold + 1}")
    X_train_fold = X_train[train_idx]
    y_train_fold = y_train[train_idx]
    X_val_fold = X_train[val_idx]
    y_val_fold = y_train[val_idx]

    # Verify fold dtypes
    print(f"Fold {fold + 1} X_train_fold dtype:", X_train_fold.dtype)
    print(f"Fold {fold + 1} y_train_fold dtype:", y_train_fold.dtype)

    # Create and train model
    try:
        model = create_model()
    except Exception as e:
        print(f"Error creating model in fold {fold + 1}: {e}")
        exit(1)

    early_stopping = EarlyStopping(monitor='val_loss', patience=20, restore_best_weights=True)
    lr_scheduler = ReduceLROnPlateau(monitor='val_loss', factor=0.5, patience=5, min_lr=0.00001)
    try:
        history = model.fit(
            X_train_fold, y_train_fold,
            epochs=300,
            batch_size=16,
            validation_data=(X_val_fold, y_val_fold),
            class_weight=class_weight_dict,
            callbacks=[early_stopping, lr_scheduler],
            verbose=1
        )
    except Exception as e:
        print(f"Error during training in fold {fold + 1}: {e}")
        exit(1)

    # Evaluate on validation fold
    try:
        val_loss, val_accuracy = model.evaluate(X_val_fold, y_val_fold, verbose=0)
        cv_scores.append(val_accuracy)
        print(f"Fold {fold + 1} Validation Accuracy: {val_accuracy:.4f}")
    except Exception as e:
        print(f"Error evaluating model in fold {fold + 1}: {e}")
        exit(1)

print(f"\nCross-Validation Accuracy: {np.mean(cv_scores):.4f} ± {np.std(cv_scores):.4f}")


Fold 1
Fold 1 X_train_fold dtype: float32
Fold 1 y_train_fold dtype: int32
Epoch 1/300
[1m37/37[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m4s[0m 13ms/step - accuracy: 0.5928 - loss: 0.9046 - val_accuracy: 0.6803 - val_loss: 0.6574 - learning_rate: 5.0000e-04
Epoch 2/300
[1m37/37[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 6ms/step - accuracy: 0.7321 - loss: 0.6468 - val_accuracy: 0.5986 - val_loss: 0.6609 - learning_rate: 5.0000e-04
Epoch 3/300
[1m37/37[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 5ms/step - accuracy: 0.7071 - loss: 0.6533 - val_accuracy: 0.5918 - val_loss: 0.6699 - learning_rate: 5.0000e-04
Epoch 4/300
[1m37/37[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 6ms/step - accuracy: 0.7218 - loss: 0.6523 - val_accuracy: 0.6122 - val_loss: 0.6660 - learning_rate: 5.0000e-04
Epoch 5/300
[1m37/37[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 5ms/step - accuracy: 0.7703 - loss: 0.5759 - val_accuracy: 0.6327 - val_loss: 0.6605 - lear

In [19]:
# Train final model on full training data
try:
    model = create_model()
    print("Final model created successfully.")
except Exception as e:
    print(f"Error creating final model: {e}")
    exit(1)

early_stopping = EarlyStopping(monitor='val_loss', patience=20, restore_best_weights=True)
lr_scheduler = ReduceLROnPlateau(monitor='val_loss', factor=0.5, patience=5, min_lr=0.00001)
try:
    history = model.fit(
        X_train, y_train,
        epochs=300,
        batch_size=16,
        validation_data=(X_test, y_test),
        class_weight=class_weight_dict,
        callbacks=[early_stopping, lr_scheduler],
        verbose=1
    )
    print("Final model training completed.")
except Exception as e:
    print(f"Error training final model: {e}")
    exit(1)

Final model created successfully.
Epoch 1/300
[1m46/46[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m3s[0m 11ms/step - accuracy: 0.5527 - loss: 1.0684 - val_accuracy: 0.7371 - val_loss: 0.6291 - learning_rate: 5.0000e-04
Epoch 2/300
[1m46/46[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 5ms/step - accuracy: 0.7061 - loss: 0.7323 - val_accuracy: 0.6971 - val_loss: 0.6069 - learning_rate: 5.0000e-04
Epoch 3/300
[1m46/46[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 4ms/step - accuracy: 0.7137 - loss: 0.6349 - val_accuracy: 0.6914 - val_loss: 0.5922 - learning_rate: 5.0000e-04
Epoch 4/300
[1m46/46[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 5ms/step - accuracy: 0.7208 - loss: 0.6464 - val_accuracy: 0.6743 - val_loss: 0.5954 - learning_rate: 5.0000e-04
Epoch 5/300
[1m46/46[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 5ms/step - accuracy: 0.7289 - loss: 0.6623 - val_accuracy: 0.6914 - val_loss: 0.5766 - learning_rate: 5.0000e-04
Epoch 6/300
[1m46/4

In [20]:
# Evaluate on test set
try:
    test_loss, test_accuracy = model.evaluate(X_test, y_test)
    print(f"\nFinal Test Accuracy (Neural Network): {test_accuracy:.4f}")
except Exception as e:
    print(f"Error evaluating final model: {e}")
    exit(1)

[1m6/6[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 6ms/step - accuracy: 0.7797 - loss: 0.4806 

Final Test Accuracy (Neural Network): 0.7829


In [21]:
# Get neural network predictions
try:
    y_pred_prob = model.predict(X_test)
    print("Neural network predictions generated successfully.")
except Exception as e:
    print(f"Error generating predictions: {e}")
    exit(1)

[1m6/6[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 20ms/step
Neural network predictions generated successfully.


In [22]:
# Extract fuzzy features from preprocessed X_test_df
required_columns = ['Glucose', 'BMI', 'BloodPressure', 'Age']
# Debug: Print column names to diagnose unscaling issue
print("X_train_df columns:", X_train_df.columns.tolist())
print("X_test_df columns:", X_test_df.columns.tolist())

# Map possible column names (Pima dataset may use different names)
column_mapping = {
    'Glucose': ['Glucose', 'glucose', '2'],
    'BMI': ['BMI', 'bmi', '6'],
    'BloodPressure': ['BloodPressure', 'bloodpressure', '3'],
    'Age': ['Age', 'age', '7']
}
selected_columns = []
for req_col in required_columns:
    for possible_col in column_mapping[req_col]:
        if possible_col in X_test_df.columns:
            selected_columns.append(possible_col)
            break
    else:
        print(f"Error: No matching column for {req_col} in X_test_df")
        selected_columns.append(None)

if all(col is not None for col in selected_columns) and all(col in X_train_df.columns for col in selected_columns):
    # Unscale fuzzy features
    scaler = StandardScaler()
    try:
        scaler.fit(X_train_df[selected_columns])
        fuzzy_features = scaler.inverse_transform(X_test_df[selected_columns])
        print("Fuzzy features unscaled successfully.")
    except Exception as e:
        print(f"Error unscaling fuzzy features: {e}")
        print("Falling back to original dataset.")
        try:
            # Map original dataset columns (Pima uses numeric indices)
            orig_columns = {
                'Glucose': '2',
                'BMI': '6',
                'BloodPressure': '3',
                'Age': '7'
            }
            orig_selected = [orig_columns[col] for col in required_columns]
            if not all(idx in original_data.index for idx in test_indices):
                print("Error: test_indices do not align with original_data indices")
                exit(1)
            fuzzy_features = original_data.loc[test_indices, orig_selected].values
        except KeyError as e:
            print(f"Error accessing original dataset: {e}")
            exit(1)
else:
    print("Warning: Required columns not found in X_test_df. Falling back to original dataset.")
    try:
        orig_columns = {
            'Glucose': '2',
            'BMI': '6',
            'BloodPressure': '3',
            'Age': '7'
        }
        orig_selected = [orig_columns[col] for col in required_columns]
        if not all(idx in original_data.index for idx in test_indices):
            print("Error: test_indices do not align with original_data indices")
            exit(1)
        fuzzy_features = original_data.loc[test_indices, orig_selected].values
    except KeyError as e:
        print(f"Error accessing original dataset: {e}")
        exit(1)

glucose_vals = fuzzy_features[:, 0]
bmi_vals = fuzzy_features[:, 1]
blood_pressure_vals = fuzzy_features[:, 2]
age_vals = fuzzy_features[:, 3]

# Debug: Print feature ranges
print("Glucose range:", glucose_vals.min(), glucose_vals.max())
print("BMI range:", bmi_vals.min(), bmi_vals.max())
print("BloodPressure range:", blood_pressure_vals.min(), blood_pressure_vals.max())
print("Age range:", age_vals.min(), age_vals.max())

X_train_df columns: ['Pregnancies', 'Age', 'BloodPressure', 'Insulin', 'Insulin_Pedigree', 'SkinThickness', 'Log_Insulin', 'Glucose', 'Glucose_BMI', 'BMI']
X_test_df columns: ['Pregnancies', 'Age', 'BloodPressure', 'Insulin', 'Insulin_Pedigree', 'SkinThickness', 'Log_Insulin', 'Glucose', 'Glucose_BMI', 'BMI']
Fuzzy features unscaled successfully.
Glucose range: -1.760733821388778 2.5636728369864685
BMI range: -2.2789505594820985 2.674540628071887
BloodPressure range: -2.2202433109776893 2.1236281955278953
Age range: -1.1193287153346856 4.255286296458328


In [23]:
# Clip and handle NaN values (adjusted for unscaled ranges)
glucose_vals = np.clip(glucose_vals, 0, 400)
bmi_vals = np.clip(bmi_vals, 0, 80)
blood_pressure_vals = np.clip(blood_pressure_vals, 50, 150)
age_vals = np.clip(age_vals, 20, 100)
glucose_vals = np.nan_to_num(glucose_vals, nan=120)
bmi_vals = np.nan_to_num(bmi_vals, nan=25)
blood_pressure_vals = np.nan_to_num(blood_pressure_vals, nan=90)
age_vals = np.nan_to_num(age_vals, nan=40)

In [24]:
# Apply fuzzy system
fuzzy_results = []
for i in range(len(y_pred_prob)):
    nn_risk = y_pred_prob[i][0]
    glucose_val = glucose_vals[i]
    bmi_val = bmi_vals[i]
    blood_pressure_val = blood_pressure_vals[i]
    age_val = age_vals[i]
    risk_score, risk_label = predict_risk(nn_risk, glucose_val, bmi_val, blood_pressure_val, age_val)
    fuzzy_results.append({
        'Index': test_indices[i] if isinstance(test_indices, np.ndarray) else test_indices[i],
        'True_Label': y_test[i],
        'NN_Probability': nn_risk,
        'Fuzzy_Risk_Score': risk_score,
        'Fuzzy_Risk_Label': risk_label,
        'Glucose': glucose_val,
        'BMI': bmi_val,
        'BloodPressure': blood_pressure_val,
        'Age': age_val
    })

In [25]:
# Convert fuzzy results to DataFrame
results_df = pd.DataFrame(fuzzy_results)

In [26]:
# Debug: Print fuzzy risk score distribution
print("Fuzzy Risk Score Distribution:")
print(results_df['Fuzzy_Risk_Score'].describe())

Fuzzy Risk Score Distribution:
count    175.000000
mean       0.246425
std        0.088521
min        0.176190
25%        0.176190
50%        0.176190
75%        0.363382
max        0.363382
Name: Fuzzy_Risk_Score, dtype: float64


In [27]:
# Optimize threshold for fuzzy predictions with F1-score
thresholds = np.arange(0.2, 0.81, 0.01)
best_threshold = 0.5
best_f1_score = 0
for t in thresholds:
    fuzzy_pred = (results_df['Fuzzy_Risk_Score'] > t).astype(int)
    f1 = fbeta_score(y_test, fuzzy_pred, beta=1)
    if f1 > best_f1_score:
        best_f1_score = f1
        best_threshold = t
print(f"Best Fuzzy Threshold (F1-score): {best_threshold:.4f}, F1-score: {best_f1_score:.4f}")

Best Fuzzy Threshold (F1-score): 0.2700, F1-score: 0.7448


In [28]:
# Generate fuzzy predictions with best threshold
fuzzy_pred = (results_df['Fuzzy_Risk_Score'] > best_threshold).astype(int)

In [29]:
# Ensemble predictions (adjusted weights)
combined_score = 0.8 * results_df['NN_Probability'] + 0.2 * results_df['Fuzzy_Risk_Score']
combined_pred = (combined_score > 0.6).astype(int)

In [30]:
# Evaluate neural network predictions
nn_threshold = 0.6
y_pred_nn = (y_pred_prob > nn_threshold).astype(int)
print("\nNeural Network Classification Report:")
print(classification_report(y_test, y_pred_nn, target_names=['Non-Diabetic', 'Diabetic']))
nn_roc_auc = roc_auc_score(y_test, y_pred_prob)
print(f"Neural Network ROC-AUC: {nn_roc_auc:.4f}")


Neural Network Classification Report:
              precision    recall  f1-score   support

Non-Diabetic       0.83      0.79      0.81        97
    Diabetic       0.76      0.79      0.78        78

    accuracy                           0.79       175
   macro avg       0.79      0.79      0.79       175
weighted avg       0.80      0.79      0.79       175

Neural Network ROC-AUC: 0.8759


In [31]:
# Evaluate fuzzy predictions
print("\nFuzzy System Classification Report:")
print(classification_report(y_test, fuzzy_pred, target_names=['Non-Diabetic', 'Diabetic']))
fuzzy_roc_auc = roc_auc_score(y_test, results_df['Fuzzy_Risk_Score'])
print(f"Fuzzy System ROC-AUC: {fuzzy_roc_auc:.4f}")


Fuzzy System Classification Report:
              precision    recall  f1-score   support

Non-Diabetic       0.78      0.87      0.82        97
    Diabetic       0.81      0.69      0.74        78

    accuracy                           0.79       175
   macro avg       0.79      0.78      0.78       175
weighted avg       0.79      0.79      0.79       175

Fuzzy System ROC-AUC: 0.7858


In [32]:
# Evaluate ensemble predictions
print("\nEnsemble (Neural + Fuzzy) Classification Report:")
print(classification_report(y_test, combined_pred, target_names=['Non-Diabetic', 'Diabetic']))
combined_roc_auc = roc_auc_score(y_test, combined_score)
print(f"Ensemble ROC-AUC: {combined_roc_auc:.4f}")


Ensemble (Neural + Fuzzy) Classification Report:
              precision    recall  f1-score   support

Non-Diabetic       0.78      0.85      0.81        97
    Diabetic       0.79      0.71      0.74        78

    accuracy                           0.78       175
   macro avg       0.78      0.78      0.78       175
weighted avg       0.78      0.78      0.78       175

Ensemble ROC-AUC: 0.8759


In [33]:
# Check correlation and fuzzy score stats
correlation = results_df['NN_Probability'].corr(results_df['Fuzzy_Risk_Score'])
print(f"Correlation between NN_Probability and Fuzzy_Risk_Score: {correlation:.4f}")
print("Fuzzy_Risk_Score Stats: Min =", results_df['Fuzzy_Risk_Score'].min(), 
      "Max =", results_df['Fuzzy_Risk_Score'].max(), 
      "Mean =", results_df['Fuzzy_Risk_Score'].mean())

Correlation between NN_Probability and Fuzzy_Risk_Score: 0.8415
Fuzzy_Risk_Score Stats: Min = 0.17619047619047615 Max = 0.36338235294117655 Mean = 0.24642480802064853


In [34]:
# Save predictions
results_df['NN_Predicted'] = y_pred_nn.flatten()
results_df['Fuzzy_Predicted'] = fuzzy_pred
results_df['Combined_Predicted'] = combined_pred
results_df.to_csv('combined_predictions.csv', index=False)
print("Combined predictions saved to combined_predictions.csv")

Combined predictions saved to combined_predictions.csv


In [35]:
# Visualize neural vs fuzzy
plt.figure(figsize=(10, 6))
sns.scatterplot(data=results_df, x='NN_Probability', y='Fuzzy_Risk_Score', hue='Fuzzy_Risk_Label', style='True_Label')
plt.axhline(y=0.35, color='gray', linestyle='--', alpha=0.5)
plt.axhline(y=0.8, color='gray', linestyle='--', alpha=0.5)
plt.title('Neural Network Probability vs Fuzzy Risk Score')
plt.xlabel('Neural Network Probability')
plt.ylabel('Fuzzy Risk Score')
plt.grid(True)
plt.savefig('neural_vs_fuzzy.png')
plt.close()
print("Neural vs Fuzzy comparison plot saved to neural_vs_fuzzy.png")

Neural vs Fuzzy comparison plot saved to neural_vs_fuzzy.png


In [36]:
# Fuzzy score distribution
plt.figure(figsize=(8, 6))
sns.histplot(data=results_df, x='Fuzzy_Risk_Score', hue='True_Label', bins=30)
plt.title('Fuzzy Risk Score Distribution by True Label')
plt.savefig('fuzzy_score_distribution.png')
plt.close()
print("Fuzzy score distribution saved to fuzzy_score_distribution.png")

Fuzzy score distribution saved to fuzzy_score_distribution.png


In [37]:
# Confusion matrix for fuzzy predictions
cm_fuzzy = confusion_matrix(y_test, fuzzy_pred)
plt.figure(figsize=(6, 4))
sns.heatmap(cm_fuzzy, annot=True, fmt='d', cmap='Blues', 
            xticklabels=['Non-Diabetic', 'Diabetic'], yticklabels=['Non-Diabetic', 'Diabetic'])
plt.title('Fuzzy System Confusion Matrix')
plt.xlabel('Predicted')
plt.ylabel('True')
plt.savefig('confusion_matrix_fuzzy.png')
plt.close()
print("Fuzzy confusion matrix saved to confusion_matrix_fuzzy.png")

Fuzzy confusion matrix saved to confusion_matrix_fuzzy.png


In [38]:
# Confusion matrix for neural network
cm_nn = confusion_matrix(y_test, y_pred_nn)
plt.figure(figsize=(6, 4))
sns.heatmap(cm_nn, annot=True, fmt='d', cmap='Blues', 
            xticklabels=['Non-Diabetic', 'Diabetic'], yticklabels=['Non-Diabetic', 'Diabetic'])
plt.title('Neural Network Confusion Matrix')
plt.xlabel('Predicted')
plt.ylabel('True')
plt.savefig('confusion_matrix_nn.png')
plt.close()
print("Neural network confusion matrix saved to confusion_matrix_nn.png")

Neural network confusion matrix saved to confusion_matrix_nn.png


In [39]:
# Confusion matrix for ensemble
cm_combined = confusion_matrix(y_test, combined_pred)
plt.figure(figsize=(6, 4))
sns.heatmap(cm_combined, annot=True, fmt='d', cmap='Blues', 
            xticklabels=['Non-Diabetic', 'Diabetic'], yticklabels=['Non-Diabetic', 'Diabetic'])
plt.title('Ensemble (Neural + Fuzzy) Confusion Matrix')
plt.xlabel('Predicted')
plt.ylabel('True')
plt.savefig('confusion_matrix_combined.png')
plt.close()
print("Ensemble confusion matrix saved to confusion_matrix_combined.png")

Ensemble confusion matrix saved to confusion_matrix_combined.png


In [40]:
# Plot training history
plt.figure(figsize=(12, 4))
plt.subplot(1, 2, 1)
plt.plot(history.history['loss'], label='Training Loss')
plt.plot(history.history['val_loss'], label='Validation Loss')
plt.title('Model Loss')
plt.xlabel('Epoch')
plt.ylabel('Loss')
plt.legend()
plt.grid(True)

plt.subplot(1, 2, 2)
plt.plot(history.history['accuracy'], label='Training Accuracy')
plt.plot(history.history['val_accuracy'], label='Validation Accuracy')
plt.title('Model Accuracy')
plt.xlabel('Epoch')
plt.ylabel('Accuracy')
plt.legend()
plt.grid(True)

plt.tight_layout()
plt.savefig('training_history.png')
plt.close()
print("Training history plot saved to training_history.png")

Training history plot saved to training_history.png


In [None]:
# Save the model
model.save('diabetes_nn_model_enhanced.keras')
print("Model saved to diabetes_nn_model_enhanced.keras")