In [445]:
import os
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
import torch
import torch.nn as nn
import torch.optim as optim
from torch.utils.data import DataLoader, TensorDataset
from sklearn.neural_network import MLPClassifier
from sklearn.metrics import accuracy_score

# Load the dataset
df = pd.read_csv('diabetes_prediction_dataset.csv')
df_encoded = pd.get_dummies(df, columns=['gender', 'smoking_history'], drop_first=True)
testDF = df_encoded.sample(frac=1).reset_index(drop=True)
x_unscaled = testDF.drop(['diabetes'], axis=1)
y = testDF['diabetes']

# Normalize the data
numerical_columns = x_unscaled.select_dtypes(include=np.number).columns
boolean_columns = x_unscaled.select_dtypes(include=bool).columns
scaler = StandardScaler()
temp = pd.DataFrame(scaler.fit_transform(x_unscaled[numerical_columns]), columns=numerical_columns)
x_scaled = pd.concat([temp, x_unscaled[boolean_columns]], axis=1)
x_train, x_test, y_train, y_test = train_test_split(x_scaled, y, test_size=0.2, random_state=42)

In [446]:
# Define the Encoder model with reduced complexity and dropout
class Encoder(nn.Module):
    def __init__(self, input_dim, encoding_dim):
        super(Encoder, self).__init__()
        self.encoder = nn.Sequential(
            nn.Linear(input_dim, 128),
            nn.ReLU(),
            nn.Dropout(0.2),
            nn.Linear(128, 64),
            nn.ReLU(),
            nn.Dropout(0.2),
            nn.Linear(64, encoding_dim)
        )

    def forward(self, x):
        return self.encoder(x)

# Define the Decoder model with reduced complexity and dropout
class Decoder(nn.Module):
    def __init__(self, encoding_dim, input_dim):
        super(Decoder, self).__init__()
        self.decoder = nn.Sequential(
            nn.Linear(encoding_dim, 64),
            nn.ReLU(),
            nn.Dropout(0.2),
            nn.Linear(64, 128),
            nn.ReLU(),
            nn.Dropout(0.2),
            nn.Linear(128, input_dim)
        )

    def forward(self, x):
        return self.decoder(x)

In [447]:
# Define input dimensions
input_dim = x_train.shape[1]
encoding_dim = 8

# Load the trained autoencoder models
encoder_model_file = './models/encoder2.pth'
decoder_model_file = './models/decoder2.pth'

device = torch.device("cuda" if torch.cuda.is_available() else "cpu")

encoder = Encoder(input_dim, encoding_dim).to(device)
decoder = Decoder(encoding_dim, input_dim).to(device)
encoder.load_state_dict(torch.load(encoder_model_file))
decoder.load_state_dict(torch.load(decoder_model_file))
encoder.eval()
decoder.eval()

# Define the Autoencoder model
class Autoencoder(nn.Module):
    def __init__(self, encoder, decoder):
        super(Autoencoder, self).__init__()
        self.encoder = encoder
        self.decoder = decoder

    def forward(self, x):
        encoded = self.encoder(x)
        decoded = self.decoder(encoded)
        return decoded

autoencoder = Autoencoder(encoder, decoder).to(device)

  encoder.load_state_dict(torch.load(encoder_model_file))
  decoder.load_state_dict(torch.load(decoder_model_file))


## Original MLP model

In [448]:
import torch
import numpy as np
from sklearn.metrics import accuracy_score, confusion_matrix

# Define the original MLP model
class MLP(nn.Module):
    def __init__(self, input_dim):
        super(MLP, self).__init__()
        self.model = nn.Sequential(
            nn.Linear(input_dim, 64),
            nn.ReLU(),
            nn.Linear(64, 32),
            nn.ReLU(),
            nn.Linear(32, 1),
            nn.Sigmoid()  # Sigmoid for binary classification
        )
        
    def forward(self, x):
        return self.model(x)

# Load the original MLP model
mlp_model_file = './models/mlp_model.pth'
input_dim = x_train.shape[1]
original_mlp = MLP(input_dim).to(device)
original_mlp.load_state_dict(torch.load(mlp_model_file))
original_mlp.eval()

# Evaluate the original MLP model
with torch.no_grad():
    x_train_tensor = torch.tensor(x_train.values, dtype=torch.float32).to(device)
    y_train_tensor = torch.tensor(y_train.values, dtype=torch.float32).to(device)
    y_pred_original = original_mlp(x_train_tensor).cpu().numpy().flatten()
    below_0_5 = np.sum(y_pred_original < 0.5)
    above_0_5 = np.sum(y_pred_original >= 0.5)
    print(f"Values below 0.5: {below_0_5}")
    print(f"Values above 0.5: {above_0_5}")
    y_pred_original = (y_pred_original > 0.5).astype(int)  # Convert to binary predictions

# Calculate accuracy
original_accuracy = accuracy_score(y_train, y_pred_original)
print(f"Original MLP Accuracy: {original_accuracy * 100:.2f}%")

# Calculate confusion matrix
conf_matrix_original = confusion_matrix(y_train, y_pred_original)

# Extract values from confusion matrix
original_tn, original_fp, original_fn, original_tp = conf_matrix_original.ravel()

print(f"Total correct predictions: {original_tn + original_tp}")
print(f"Total wrong predictions: {original_fp + original_fn}\n")
print(f"True Negatives: {original_tn}")
print(f"False Positives: {original_fp}")
print(f"False Negatives: {original_fn}")
print(f"True Positives: {original_tp}")

  original_mlp.load_state_dict(torch.load(mlp_model_file))


Values below 0.5: 74723
Values above 0.5: 5277
Original MLP Accuracy: 96.87%
Total correct predictions: 77494
Total wrong predictions: 2506

True Negatives: 72692
False Positives: 475
False Negatives: 2031
True Positives: 4802


### Simple error correction function by adding a bias (no learning)

In [449]:
# import numpy as np
# import torch
# import torch.nn.functional as F  # For activation functions

# def error_correction_function(mlp, autoencoder, x, bias_factor):
#     # Ensure all computations happen on the correct device
#     with torch.no_grad():
#         # Calculate reconstruction error using the autoencoder
#         x_tensor = torch.tensor(x, dtype=torch.float32).to(device)
#         reconstructed = autoencoder(x_tensor).cpu().numpy()
#     reconstruction_error = np.mean(np.square(x - reconstructed), axis=1)
#     threshold = np.mean(reconstruction_error)
    
#     # Make predictions with the MLP model
#     with torch.no_grad():
#         x_test_tensor = torch.tensor(x, dtype=torch.float32).to(device)
#         y_pred_proba = mlp(x_test_tensor).cpu().numpy().flatten()

#     # Adjust predictions based on reconstruction error with adaptive bias factor
#     y_pred_proba_corrected = y_pred_proba.copy()
#     high_error_indices = np.where(reconstruction_error > threshold)[0]
#     print("Number of high error indices: ", len(high_error_indices))
#     for idx in high_error_indices:
#         adaptive_bias = bias_factor * (reconstruction_error[idx] / threshold)
#         if y_pred_proba[idx] < 0.5:
#             y_pred_proba_corrected[idx] += adaptive_bias
#         else:
#             y_pred_proba_corrected[idx] -= adaptive_bias
#     # y_pred_proba_corrected[high_error_indices] += bias_factor  # Apply bias factor
    
#     initial_zeros = np.sum(y_pred_proba[high_error_indices] < 0.5)
#     initial_ones = np.sum(y_pred_proba[high_error_indices] >= 0.5)
#     print(f"Initial predictions of 0: {initial_zeros}")
#     print(f"Initial predictions of 1: {initial_ones}")
#     changed_predictions = np.sum((y_pred_proba[high_error_indices] > 0.5).astype(int) != (y_pred_proba_corrected[high_error_indices] > 0.5).astype(int))
#     print(f"Number of changed predictions: {changed_predictions}")
    
#     y_pred_proba_corrected = np.clip(y_pred_proba_corrected, 0, 1)  # Ensure probabilities are in [0, 1]

#     # Convert corrected probabilities to binary predictions
#     y_pred_corrected = (y_pred_proba_corrected > 0.5).astype(int)
    
#     return y_pred_corrected, reconstruction_error


In [450]:
# y_pred_corrected, reconstruction_error = error_correction_function(original_mlp, autoencoder, x_test.values, bias_factor=0.05)

In [451]:
# from sklearn.metrics import accuracy_score, confusion_matrix

# # Calculate accuracy
# corrected_accuracy = accuracy_score(y_test, y_pred_corrected)
# print(f"Corrected Accuracy: {corrected_accuracy * 100:.2f}%")

# # Calculate confusion matrix
# conf_matrix_corrected = confusion_matrix(y_test, y_pred_corrected)

# # Print confusion matrix results
# tn, fp, fn, tp = conf_matrix_corrected.ravel()
# print(f"Corrected - True Negatives: {tn}")
# print(f"Corrected - False Positives: {fp}")
# print(f"Corrected - False Negatives: {fn}")
# print(f"Corrected - True Positives: {tp}")


### TRYING OUT ERROR-CORRECTING MODEL

In [452]:
# Define the Bias Predictor Model
class BiasPredictor(nn.Module):
    def __init__(self):
        super(BiasPredictor, self).__init__()
        self.fc1 = nn.Linear(14, 128)
        self.dropout1 = nn.Dropout(0.2)
        
        self.fc2 = nn.Linear(128, 64)
        self.dropout2 = nn.Dropout(0.2)
        
        self.fc3 = nn.Linear(64, 32)
        self.dropout3 = nn.Dropout(0.2)
        
        self.fc4 = nn.Linear(32, 16)
        self.fc5 = nn.Linear(16, 8)
        self.fc6 = nn.Linear(8, 1)  # Output layer for bias correction

    def forward(self, x):
        x = F.relu(self.fc1(x))
        x = self.dropout1(x)
        
        x = F.relu(self.fc2(x))
        x = self.dropout2(x)
        
        x = F.relu(self.fc3(x))
        x = self.dropout3(x)
        
        x = F.relu(self.fc4(x))
        x = F.relu(self.fc5(x))
        return self.fc6(x)

# Prepare the training data for the bias predictor
def prepare_bias_data(reconstruction_errors, x_test, original_probs, ground_truth):
    # Combine reconstruction errors with the existing normalized features
    features = np.hstack((reconstruction_errors.reshape(-1, 1), x_test.values))
    
    # Compute target biases
    target_bias = ground_truth - original_probs
    print("Minimum bias: ", np.min(target_bias))
    print("Maximum bias: ", np.max(target_bias))
    
    return torch.tensor(features, dtype=torch.float32), torch.tensor(target_bias, dtype=torch.float32)

def prepare_bias_data_scaled(reconstruction_errors, x_data, original_probs, ground_truth, reconstruction_scale):
    """
    Prepares data for the bias predictor by combining features and scaling reconstruction errors.
    """
    # Scale reconstruction errors
    scaled_reconstruction_errors = reconstruction_errors * reconstruction_scale
    
    # Combine scaled reconstruction errors with the existing features
    features = np.hstack((scaled_reconstruction_errors.reshape(-1, 1), x_data.values))
    
    #######################################################################################################
    # use this for only reconstruction errors as the input feature
    
    # features = scaled_reconstruction_errors.reshape(-1, 1)
    # print("Minimum scaled reconstruction error: ", np.min(scaled_reconstruction_errors))
    # print("Maximum scaled reconstruction error: ", np.max(scaled_reconstruction_errors))
    
    #######################################################################################################
    
    # Compute target biases
    target_bias = ground_truth - original_probs
    print("Minimum bias: ", np.min(target_bias))
    print("Maximum bias: ", np.max(target_bias))
    
    return torch.tensor(features, dtype=torch.float32), torch.tensor(target_bias, dtype=torch.float32)



# Define the device
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")

def train_bias_predictor(features, target_bias, val_features, val_target_bias, epochs=1000, learning_rate=0.0001, patience=10):
    model = BiasPredictor().to(device)
    optimizer = optim.Adam(model.parameters(), lr=learning_rate)
    criterion = nn.MSELoss()

    best_val_loss = float('inf')  # Initialize the best validation loss
    patience_counter = 0  # Counter for early stopping
    best_model_state = None

    for epoch in range(epochs):
        # Training phase
        model.train()
        optimizer.zero_grad()
        predictions = model(features).squeeze()
        loss = criterion(predictions, target_bias)
        loss.backward()
        optimizer.step()

        # Validation phase
        model.eval()
        with torch.no_grad():
            val_predictions = model(val_features).squeeze()
            val_loss = criterion(val_predictions, val_target_bias)

        # Early stopping check
        if val_loss < best_val_loss:
            best_val_loss = val_loss
            patience_counter = 0
            best_model_state = model.state_dict()  # Save the best model state
        else:
            patience_counter += 1

        # Print progress
        if (epoch + 1) % 50 == 0:
            print(f"Epoch {epoch+1}/{epochs}, Loss: {loss.item():.4f}, Val Loss: {val_loss.item():.4f}")

        # Stop training if patience is exceeded
        if patience_counter >= patience:
            print(f"Early stopping triggered at epoch {epoch+1}. Best Val Loss: {best_val_loss:.4f}")
            break

    # Load the best model state before returning
    if best_model_state is not None:
        model.load_state_dict(best_model_state)

    return model


#####################################################################################################

# Train the Bias Predictor with Class Weights
# def train_bias_predictor_with_weights(features, target_bias, ground_truth, epochs=100, learning_rate=0.001):
# def train_bias_predictor_with_weights(features, target_bias, ground_truth, epochs=100, learning_rate=0.001, weight_positive_scale=1.0, weight_negative_scale=1.0):
#     model = BiasPredictor().to(device)
#     optimizer = optim.Adam(model.parameters(), lr=learning_rate)
    
#     # Define class weights
#     num_positive = np.sum(ground_truth == 1)
#     num_negative = np.sum(ground_truth == 0)
#     total = len(ground_truth)

#     weight_positive = (total / (2 * num_positive)) * weight_positive_scale
#     weight_negative = (total / (2 * num_negative)) * weight_negative_scale 

#     # Convert weights to tensors
#     class_weights = torch.tensor([weight_negative, weight_positive], dtype=torch.float32).to(device)
#     criterion = nn.MSELoss(reduction='none')  # Use 'none' to apply weights manually

#     for epoch in range(epochs):
#         model.train()
#         optimizer.zero_grad()
#         predictions = model(features).squeeze()
        
#         # Calculate weighted loss
#         loss = criterion(predictions, target_bias)
#         weights = torch.where(
#             target_bias > 0,  # Assign weights based on ground truth class
#             class_weights[1],  # Positive class weight
#             class_weights[0]   # Negative class weight
#         )
#         weighted_loss = torch.mean(loss * weights)
        
#         weighted_loss.backward()
#         optimizer.step()
#     return model

#####################################################################################################

def apply_bias_correction(bias_predictor, reconstruction_errors, x_test, original_probs):
    # Combine normalized reconstruction errors with other features
    combined_features = np.hstack((reconstruction_errors.reshape(-1, 1), x_test.values))
    features = torch.tensor(combined_features, dtype=torch.float32).to(next(bias_predictor.parameters()).device)
    
    #######################################################################################################
    # use this for only reconstruction errors as the input feature
    
    # combined_features = reconstruction_errors.reshape(-1, 1)
    # features = torch.tensor(combined_features, dtype=torch.float32).to(next(bias_predictor.parameters()).device)
    
    #######################################################################################################
    
    with torch.no_grad():
        bias_correction = bias_predictor(features).squeeze().cpu().numpy()
    
    # Multiply with a scaling factor
    bias_correction *= 1.2
    
    # Apply the correction and clip probabilities to [0, 1]
    corrected_probs = np.clip(original_probs + bias_correction, 0, 1)
    
    # Number of predictions that changed
    changed_predictions = np.sum((original_probs > 0.5).astype(int) != (corrected_probs > 0.5).astype(int))
    print(f"Number of changed predictions: {changed_predictions}")
    
    return corrected_probs

In [453]:
x_train_split, x_val_split, y_train_split, y_val_split = train_test_split(x_train, y_train, test_size=0.2, random_state=42)

In [454]:
import torch
import torch.nn as nn
import torch.optim as optim
import numpy as np
import torch.nn.functional as F

x_train_tensor = torch.tensor(x_train_split.values, dtype=torch.float32).to(device)
y_train_tensor = torch.tensor(y_train_split.values, dtype=torch.float32).to(device)
x_val_tensor = torch.tensor(x_val_split.values, dtype=torch.float32).to(device)
y_val_tensor = torch.tensor(y_val_split.values, dtype=torch.float32).to(device)

with torch.no_grad():
    reconstructed_train = autoencoder(x_train_tensor).cpu().numpy()
reconstructed_errors_train = np.mean(np.square(x_train_split.values - reconstructed_train), axis=1)
mean_error_train = np.mean(reconstructed_errors_train)
std_error_train = np.std(reconstructed_errors_train)
reconstruction_errors_train_normalized = (reconstructed_errors_train - mean_error_train) / std_error_train
print("Max reconstruction error: ", np.max(reconstructed_errors_train))

with torch.no_grad():
    reconstructed_val = autoencoder(x_val_tensor).cpu().numpy()
reconstructed_errors_val = np.mean(np.square(x_val_split.values - reconstructed_val), axis=1)
mean_error_val = np.mean(reconstructed_errors_val)
std_error_val = np.std(reconstructed_errors_val)
reconstruction_errors_val_normalized = (reconstructed_errors_val - mean_error_val) / std_error_val

with torch.no_grad():
    x_train_tensor = torch.tensor(x_train_split.values, dtype=torch.float32).to(device)
    # logits = original_mlp(x_test_tensor)
    # y_pred_original = torch.sigmoid(logits).cpu().numpy().flatten()
    y_pred_train = original_mlp(x_train_tensor).cpu().numpy().flatten()
    
with torch.no_grad():
    x_val_tensor = torch.tensor(x_val_split.values, dtype=torch.float32).to(device)
    y_pred_val = original_mlp(x_val_tensor).cpu().numpy().flatten()
    
    
original_probs_train = y_pred_train
ground_truth_train = y_train_split.values

original_probs_val = y_pred_val
ground_truth_val = y_val_split.values


# Prepare the data for training
# features, target_bias = prepare_bias_data(reconstruction_errors_normalized, x_train, original_probs, ground_truth)

# if model does not exist
if not os.path.exists('./models/bias_predictor.pth'):
    features_train, target_bias_train = prepare_bias_data_scaled(reconstruction_errors_train_normalized, x_train_split, original_probs_train, ground_truth_train, reconstruction_scale=20)
    features_val, target_bias_val = prepare_bias_data_scaled(reconstruction_errors_val_normalized, x_val_split, original_probs_val, ground_truth_val, reconstruction_scale=20)

    # Move tensors to the same device as the model
    features_train, target_bias_train = features_train.to(device), target_bias_train.to(device)
    features_val, target_bias_val = features_val.to(device), target_bias_val.to(device)

    # Train the bias predictor
    bias_predictor = train_bias_predictor(features_train, target_bias_train, features_val, target_bias_val, epochs=5000, learning_rate=0.0001, patience=10)
                        
    # Apply bias correction during inference
    corrected_probs = apply_bias_correction(bias_predictor, reconstruction_errors_train_normalized, x_train_split, original_probs_train)

    # Convert probabilities to binary predictions
    corrected_predictions = (corrected_probs > 0.5).astype(int)
    # Calculate accuracy
    corrected_accuracy = accuracy_score(y_train_split, corrected_predictions)
    print(f"\nCorrected Accuracy: {corrected_accuracy * 100:.2f}%")

    # Calculate confusion matrix
    conf_matrix_corrected = confusion_matrix(y_train_split, corrected_predictions)

    tn, fp, fn, tp = conf_matrix_corrected.ravel()
    print("True Positives: ", tp)
    print("True Negatives: ", tn)
    print("False Positives: ", fp)
    print("False Negatives: ", fn)

    # Evaluate the original MLP model
    with torch.no_grad():
        x_train_tensor = torch.tensor(x_train_split.values, dtype=torch.float32).to(device)
        y_train_tensor = torch.tensor(y_train_split.values, dtype=torch.float32).to(device)
        y_pred_original = original_mlp(x_train_tensor).cpu().numpy().flatten()
        y_pred_original = (y_pred_original > 0.5).astype(int)  # Convert to binary

    original_accuracy = accuracy_score(y_train_split, y_pred_original)
    print(f"\nOriginal MLP Accuracy: {original_accuracy * 100:.2f}%")

    # Calculate confusion matrix
    conf_matrix_original = confusion_matrix(y_train_split, y_pred_original)

    original_tn, original_fp, original_fn, original_tp = conf_matrix_original.ravel()
    print("True Positives: ", original_tp)
    print("True Negatives: ", original_tn)
    print("False Positives: ", original_fp)
    print("False Negatives: ", original_fn)

else:
    # Load the trained bias predictor model
    bias_predictor = BiasPredictor().to(device)
    bias_predictor.load_state_dict(torch.load('./models/bias_predictor.pth'))

Max reconstruction error:  7.528515562734883


  bias_predictor.load_state_dict(torch.load('./models/bias_predictor.pth'))


In [455]:
x_test_tensor = torch.tensor(x_test.values, dtype=torch.float32).to(device)

with torch.no_grad():
    reconstructed_test = autoencoder(x_test_tensor).cpu().numpy()
reconstructed_errors_test = np.mean(np.square(x_test.values - reconstructed_test), axis=1)
mean_error_test = np.mean(reconstructed_errors_test)
std_error_test = np.std(reconstructed_errors_test)
reconstruction_errors_test_normalized = (reconstructed_errors_test - mean_error_test) / std_error_test

with torch.no_grad():
    x_test_tensor = torch.tensor(x_test.values, dtype=torch.float32).to(device)
    y_pred_test = original_mlp(x_test_tensor).cpu().numpy().flatten()
    
original_probs_test = y_pred_test

original_accuracy_test = accuracy_score(y_test, (original_probs_test > 0.5).astype(int))


corrected_probs_test = apply_bias_correction(bias_predictor, reconstruction_errors_test_normalized, x_test, original_probs_test)

corrected_predictions_test = (corrected_probs_test > 0.5).astype(int)

corrected_accuracy_test = accuracy_score(y_test, corrected_predictions_test)
print(f"\nCorrected Accuracy on Test Set: {corrected_accuracy_test * 100:.2f}%")

conf_matrix_corrected_test = confusion_matrix(y_test, corrected_predictions_test)

tn, fp, fn, tp = conf_matrix_corrected_test.ravel()
print("True Positives: ", tp)
print("True Negatives: ", tn)
print("False Positives: ", fp)
print("False Negatives: ", fn)

print(f"\nOriginal Accuracy on Test Set: {original_accuracy_test * 100:.2f}%")

conf_matrix_original_test = confusion_matrix(y_test, (original_probs_test > 0.5).astype(int))

tn, fp, fn, tp = conf_matrix_original_test.ravel()
print("True Positives: ", tp)
print("True Negatives: ", tn)
print("False Positives: ", fp)
print("False Negatives: ", fn)

difference = corrected_accuracy_test - original_accuracy_test
difference_percent = difference/(1-original_accuracy_test)
print("Improvement: ", difference_percent*100)


Number of changed predictions: 106

Corrected Accuracy on Test Set: 97.03%
True Positives:  1127
True Negatives:  18279
False Positives:  54
False Negatives:  540

Original Accuracy on Test Set: 96.82%
True Positives:  1156
True Negatives:  18208
False Positives:  125
False Negatives:  511
Improvement:  6.603773584905969


In [456]:
if (corrected_accuracy_test > original_accuracy_test + 0.0010) and not os.path.exists('./models/bias_predictor.pth'):
    # save the model
    torch.save(bias_predictor.state_dict(), './models/bias_predictor.pth')
    print("Model saved")
    
    
