In [54]:
import os
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
import torch
import torch.nn as nn
import torch.optim as optim
from torch.utils.data import DataLoader, TensorDataset
from sklearn.neural_network import MLPClassifier
from sklearn.metrics import accuracy_score

# Load the dataset
df = pd.read_csv('diabetes_prediction_dataset.csv')
df_encoded = pd.get_dummies(df, columns=['gender', 'smoking_history'], drop_first=True)
testDF = df_encoded.sample(frac=1).reset_index(drop=True)
x_unscaled = testDF.drop(['diabetes'], axis=1)
y = testDF['diabetes']

# Normalize the data
numerical_columns = x_unscaled.select_dtypes(include=np.number).columns
boolean_columns = x_unscaled.select_dtypes(include=bool).columns
scaler = StandardScaler()
temp = pd.DataFrame(scaler.fit_transform(x_unscaled[numerical_columns]), columns=numerical_columns)
x_scaled = pd.concat([temp, x_unscaled[boolean_columns]], axis=1)
x_train, x_test, y_train, y_test = train_test_split(x_scaled, y, test_size=0.2, random_state=42)

In [55]:
# Define the Encoder model with reduced complexity and dropout
class Encoder(nn.Module):
    def __init__(self, input_dim, encoding_dim):
        super(Encoder, self).__init__()
        self.encoder = nn.Sequential(
            nn.Linear(input_dim, 128),
            nn.ReLU(),
            nn.Dropout(0.2),
            nn.Linear(128, 64),
            nn.ReLU(),
            nn.Dropout(0.2),
            nn.Linear(64, encoding_dim)
        )

    def forward(self, x):
        return self.encoder(x)

# Define the Decoder model with reduced complexity and dropout
class Decoder(nn.Module):
    def __init__(self, encoding_dim, input_dim):
        super(Decoder, self).__init__()
        self.decoder = nn.Sequential(
            nn.Linear(encoding_dim, 64),
            nn.ReLU(),
            nn.Dropout(0.2),
            nn.Linear(64, 128),
            nn.ReLU(),
            nn.Dropout(0.2),
            nn.Linear(128, input_dim)
        )

    def forward(self, x):
        return self.decoder(x)

In [56]:
# Define input dimensions
input_dim = x_train.shape[1]
encoding_dim = 8

# Load the trained autoencoder models
encoder_model_file = './models/encoder2.pth'
decoder_model_file = './models/decoder2.pth'

device = torch.device("cuda" if torch.cuda.is_available() else "cpu")

encoder = Encoder(input_dim, encoding_dim).to(device)
decoder = Decoder(encoding_dim, input_dim).to(device)
encoder.load_state_dict(torch.load(encoder_model_file))
decoder.load_state_dict(torch.load(decoder_model_file))
encoder.eval()
decoder.eval()

# Define the Autoencoder model
class Autoencoder(nn.Module):
    def __init__(self, encoder, decoder):
        super(Autoencoder, self).__init__()
        self.encoder = encoder
        self.decoder = decoder

    def forward(self, x):
        encoded = self.encoder(x)
        decoded = self.decoder(encoded)
        return decoded

autoencoder = Autoencoder(encoder, decoder).to(device)

  encoder.load_state_dict(torch.load(encoder_model_file))
  decoder.load_state_dict(torch.load(decoder_model_file))


In [57]:
# Define the MLP model
class MLP(nn.Module):
    def __init__(self, input_dim):
        super(MLP, self).__init__()
        self.model = nn.Sequential(
            nn.Linear(input_dim, 128),
            nn.ReLU(),
            nn.Linear(128, 64),
            nn.ReLU(),
            nn.Linear(64, 32),
            nn.ReLU(),
            nn.Linear(32, 16),
            nn.ReLU(),
            nn.Linear(16, 1)
        )

    def forward(self, x):
        return self.model(x)

## Error correction model

In [58]:
import numpy as np
import torch
from sklearn.metrics import accuracy_score, confusion_matrix

def error_correction_function(mlp, autoencoder, x, threshold=5.0, bias_factor=0.1):
    # Calculate reconstruction error
    with torch.no_grad():
        encoded = autoencoder.encoder(torch.tensor(x, dtype=torch.float32).to(device))
        reconstructed = autoencoder.decoder(encoded).cpu().numpy()
    reconstruction_error = np.mean(np.square(x - reconstructed), axis=1)
    
    # Add reconstruction error as an additional feature
    x_augmented = np.hstack((x, reconstruction_error.reshape(-1, 1)))
    
    # Make predictions with the MLP model
    y_pred_proba = mlp.predict_proba(x_augmented)[:, 1]  # Get the probability of the positive class
    
    # Adjust predictions based on reconstruction error
    high_error_indices = np.where(reconstruction_error > (threshold / 100))[0]
    y_pred_proba_corrected = y_pred_proba.copy()
    y_pred_proba_corrected[high_error_indices] += bias_factor  # Add bias to high error predictions
    y_pred_proba_corrected = np.clip(y_pred_proba_corrected, 0, 1)  # Ensure probabilities are within [0, 1]
    
    # Convert probabilities to binary predictions
    y_pred_corrected = (y_pred_proba_corrected > 0.5).astype(int)
    
    return y_pred_corrected

# Use the error correction function to make predictions
y_pred_corrected = error_correction_function(mlp, autoencoder, x_test.values)

# Calculate corrected accuracy
corrected_accuracy = accuracy_score(y_test, y_pred_corrected)
print(f"Corrected Accuracy: {corrected_accuracy * 100:.2f}%")

# Calculate confusion matrix
conf_matrix = confusion_matrix(y_test, y_pred_corrected)

# Extract values from confusion matrix
tn, fp, fn, tp = conf_matrix.ravel()

print(f"Total correct predictions: {tn + tp}")
print(f"Total wrong predictions: {fp + fn}")
print(f"Number of 0s predicted as 1s (False Positives): {fp}")
print(f"Number of 1s predicted as 0s (False Negatives): {fn}")

Corrected Accuracy: 97.20%
Total correct predictions: 19441
Total wrong predictions: 559
Number of 0s predicted as 1s (False Positives): 33
Number of 1s predicted as 0s (False Negatives): 526


## Original MLP model

In [59]:
import torch
import numpy as np
from sklearn.metrics import accuracy_score, confusion_matrix

# Define the original MLP model
class MLP(nn.Module):
    def __init__(self, input_dim):
        super(MLP, self).__init__()
        self.model = nn.Sequential(
            nn.Linear(input_dim, 128),
            nn.ReLU(),
            nn.Linear(128, 64),
            nn.ReLU(),
            nn.Linear(64, 32),
            nn.ReLU(),
            nn.Linear(32, 16),
            nn.ReLU(),
            nn.Linear(16, 1)
        )

    def forward(self, x):
        return self.model(x)

# Load the original MLP model
mlp_model_file = './models/mlp_model_resampled_0.1.pth'
input_dim = x_train.shape[1]
original_mlp = MLP(input_dim).to(device)
original_mlp.load_state_dict(torch.load(mlp_model_file))
original_mlp.eval()

# Evaluate the original MLP model
with torch.no_grad():
    x_test_tensor = torch.tensor(x_test.values, dtype=torch.float32).to(device)
    y_test_tensor = torch.tensor(y_test.values, dtype=torch.float32).to(device)
    y_pred_original = original_mlp(x_test_tensor).cpu().numpy().flatten()
    y_pred_original = (y_pred_original > 0.5).astype(int)  # Convert to binary predictions

# Calculate accuracy
original_accuracy = accuracy_score(y_test, y_pred_original)
print(f"Original MLP Accuracy: {original_accuracy * 100:.2f}%")

# Calculate confusion matrix
conf_matrix_original = confusion_matrix(y_test, y_pred_original)

# Extract values from confusion matrix
tn, fp, fn, tp = conf_matrix_original.ravel()

print(f"Total correct predictions: {tn + tp}")
print(f"Total wrong predictions: {fp + fn}")
print(f"Number of 0s predicted as 1s (False Positives): {fp}")
print(f"Number of 1s predicted as 0s (False Negatives): {fn}")

  original_mlp.load_state_dict(torch.load(mlp_model_file))


Original MLP Accuracy: 96.84%
Total correct predictions: 19367
Total wrong predictions: 633
Number of 0s predicted as 1s (False Positives): 25
Number of 1s predicted as 0s (False Negatives): 608
