In [1]:
import numpy as np
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
from sklearn.neural_network import MLPRegressor
from sklearn.metrics import roc_curve, auc, classification_report
import matplotlib.pyplot as plt
import tensorflow as tf
from tensorflow.keras import backend as K
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense

#ucimlrepo is for the data
from ucimlrepo import fetch_ucirepo 


 # Fetch the Breast Cancer Wisconsin dataset from UCI repository
breast_cancer_wisconsin_diagnostic = fetch_ucirepo(id=17)

# Get the features (X) and targets (y) as pandas DataFrames
X = breast_cancer_wisconsin_diagnostic.data.features
y = breast_cancer_wisconsin_diagnostic.data.targets

# Print columns to verify the structure
#print("Feature columns:\n", X.columns)
#print("Target columns:\n", y.columns)

y = y.iloc[:, 0]  # extract the Series

# Map the diagnosis labels: 1 for malignant ('M'), 0 for benign ('B')
y = y.map({'M': 1, 'B': 0})  # Adjust this mapping as per the dataset's structure

# Scale the features using StandardScaler
scaler = StandardScaler()
X_scaled = scaler.fit_transform(X)

# split the data into training and testing
X_train, X_test, y_train, y_test = train_test_split(X_scaled, y, test_size=0.2, random_state=42)

# Define the input dimension
input_dim = X_train.shape[1]

# take a subset of the training data that only has benign samples
X_train_benign = X_train[y_train == 0]

# input dimension for the benign model
input_dim_benign = X_train_benign.shape[1]

#take a subset of the training data that only has malignant samples
X_train_malignant = X_train[y_train == 1]

# input dimension for the malignant model
input_dim_malignant = X_train_malignant.shape[1]


In [2]:
print("testing")

testing


In [5]:
# function to define the shared model arhitecture 
def create_autoencoder(input_dim):
    model = Sequential([
        Dense(14, activation='relu', input_shape=(input_dim,)),
        Dense(7, activation='relu'),
        Dense(14, activation='relu'),
        Dense(input_dim, activation='linear')
    ])
    return model


# Create the autoencoder models
autoencoder_normal = create_autoencoder(input_dim)
autoencoder_benign = create_autoencoder(input_dim_benign)
autoencoder_malignant = create_autoencoder(input_dim_malignant)
autoencoder_pos = create_autoencoder(input_dim)




# --------- Create the custom loss fuction----------

# Combine X_train and y_train into a single dataset
dataset = tf.data.Dataset.from_tensor_slices((X_train, y_train.values.reshape(-1, 1))) 

# Shuffle and batch the dataset
batch_size = 32
dataset = dataset.shuffle(buffer_size=1024).batch(batch_size)




# Define the custom loss function 
def custom_loss_pos(batch_data, X_pred):
    
    # Extract the features and labels from batch_data
    X_batch, labels_batch = batch_data

    # Compute the MSE between the true input and the reconstructed input
    mse = tf.reduce_mean(tf.square(X_batch - X_pred), axis=1)

    # compute the standard batch MSE and standard batch MSE using reduce_sum
    standard_batch_mse = tf.reduce_mean(mse)
    # standard_batch_mse_reduce_sum = tf.reduce_sum(mse) / len(labels_batch)

    # # Create a mask to include only those points where labels == 0
    # mask = tf.cast(labels_batch == 0, dtype=tf.float32)
    
    # # Create the inverse mask
    # inverse_mask = 1 - mask

    # # Apply the mask to the MSE
    # masked_mse = mse * mask

    # # Apply the inverse mask to the MSE
    # inverse_masked_mse = mse * inverse_mask

    # # Calculate the masked batch-level MSE
    # masked_batch_level_mse = tf.reduce_sum(masked_mse) / (tf.reduce_sum(mask) + tf.keras.backend.epsilon())

    # # Calculate the inverse masked batch-level MSE
    # inverse_masked_batch_level_mse = tf.reduce_sum(inverse_masked_mse) / (tf.reduce_sum(inverse_mask) + tf.keras.backend.epsilon())


    # # now print each of the variables above
    # tf.print("mask lenth:", tf.reduce_sum(mask))
    # tf.print("inverse mask length:", tf.reduce_sum(inverse_mask))
    # tf.print("label length:", len(labels))
    # tf.print("standard batch level mse:", standard_batch_mse)
    # tf.print("standard batch level mse using reduce_sum:", standard_batch_mse_reduce_sum)
    # tf.print("masked batch level mse:", masked_batch_level_mse)
    # tf.print("inverse masked batch level mse:", inverse_masked_batch_level_mse)


    # Return the masked MSE
    return mse


#---------------------------------------------------


# def custom_loss_pos(X_true, X_pred, labels):
#     # Compute the MSE between the true input and the reconstructed input
#     # this is at the level of the point
#     mse = tf.reduce_mean(tf.square(X_true - X_pred), axis=1)
    
#     #standard batch level mse
#     standard_batch_mse = tf.reduce_mean(mse)

#     #standard batch level mse using reduce_sum
#     standard_batch_mse_reduce_sum = tf.reduce_sum(mse) / 32
    
#     # Create a mask to include only those points meeting the below condition
#     mask = tf.cast(labels == 0, dtype=tf.float32)
    
#     # create the inverse mask
#     inverse_mask = 1 - mask

#     # Apply the mask to the MSE
#     masked_mse = mse * mask

#     #apply the inverse mask to the mse
#     inverse_masked_mse = mse * inverse_mask

#     #calculate the masked batch level mse
#     masked_batch_level_mse = tf.reduce_sum(masked_mse) / (tf.reduce_sum(mask) + tf.keras.backend.epsilon())

#     #calculate the inverse masked batch level mse
#     inverse_masked_batch_level_mse = tf.reduce_sum(inverse_masked_mse) / (tf.reduce_sum(inverse_mask) + tf.keras.backend.epsilon())

#     #return a random number
#     return masked_mse


# # Custom loss function that takes into account the labels
# def loss_wrapper(labels):
#     def custom_loss(y_true, y_pred):
#         return custom_loss_pos(y_true, y_pred, labels)
#     return custom_loss



# Compile the models witht the adam optimizer
autoencoder_normal.compile(optimizer='adam', loss='mean_squared_error')
autoencoder_benign.compile(optimizer='adam', loss='mean_squared_error')
autoencoder_malignant.compile(optimizer='adam', loss='mean_squared_error')
labels_for_loss = y_train.values.reshape(-1, 1) #needed for the custom loss function call below
autoencoder_pos.compile(optimizer='adam', loss=custom_loss_pos)


# train the models
autoencoder_normal.fit(X_train, X_train, epochs=10, batch_size=32,verbose=1)
autoencoder_benign.fit(X_train_benign, X_train_benign, epochs=10, batch_size=32,verbose=1)
autoencoder_malignant.fit(X_train_malignant, X_train_malignant, epochs=10, batch_size=32,verbose=1)
#autoencoder_pos.fit(X_train, X_train, epochs=10, batch_size=32,verbose=1)
autoencoder_pos.fit(dataset, epochs=10, verbose=1)


Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10
Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10
Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10
Epoch 1/10


2024-08-29 12:05:31.912895: I tensorflow/core/common_runtime/executor.cc:1197] [/device:CPU:0] (DEBUG INFO) Executor start aborting (this does not indicate an error and you can ignore this message): INVALID_ARGUMENT: You must feed a value for placeholder tensor 'Placeholder/_1' with dtype int64 and shape [455,1]
	 [[{{node Placeholder/_1}}]]
2024-08-29 12:05:31.913094: I tensorflow/core/common_runtime/executor.cc:1197] [/device:CPU:0] (DEBUG INFO) Executor start aborting (this does not indicate an error and you can ignore this message): INVALID_ARGUMENT: You must feed a value for placeholder tensor 'Placeholder/_1' with dtype int64 and shape [455,1]
	 [[{{node Placeholder/_1}}]]


OperatorNotAllowedInGraphError: in user code:

    File "/Users/jason.bono/anaconda3/envs/minenv/lib/python3.8/site-packages/keras/engine/training.py", line 1284, in train_function  *
        return step_function(self, iterator)
    File "/var/folders/vc/6hv2dgxj41x99fs_ywt93gxw0000gp/T/ipykernel_10626/36791877.py", line 37, in custom_loss_pos  *
        X_batch, labels_batch = batch_data

    OperatorNotAllowedInGraphError: Iterating over a symbolic `tf.Tensor` is not allowed: AutoGraph did convert this function. This might indicate you are trying to use an unsupported feature.


In [None]:
#get feature-level reconstruction errors for positive and negative autoencoder
X_test_normal = autoencoder_normal.predict(X_test)
X_test_benign = autoencoder_benign.predict(X_test)
X_test_malignant = autoencoder_malignant.predict(X_test)
X_test_pos = autoencoder_pos.predict(X_test)

# get the patient-level (ie point-level) reconstruction errors (ie the scores)
scores_normal = np.mean(np.power(X_test - X_test_normal, 2), axis=1)
scores_benign = np.mean(np.power(X_test - X_test_benign, 2), axis=1)
scores_malignant = np.mean(np.power(X_test - X_test_malignant, 2), axis=1)
scores_pos = np.mean(np.power(X_test - X_test_pos, 2), axis=1)

# Calculate the rocs and aucs
fpr_normal, tpr_normal, _ = roc_curve(y_test, scores_normal)
roc_auc_normal = auc(fpr_normal, tpr_normal)
fpr_benign, tpr_benign, _ = roc_curve(y_test, scores_benign)
roc_auc_benign = auc(fpr_benign, tpr_benign)
fpr_malignant, tpr_malignant, _ = roc_curve(y_test, scores_malignant)
roc_auc_malignant = auc(fpr_malignant, tpr_malignant)
fpr_pos, tpr_pos, _ = roc_curve(y_test, scores_pos)
roc_auc_pos = auc(fpr_pos, tpr_pos)

In [None]:
# plot the roc curves
plt.figure()
lw = 2
plt.plot(fpr_normal, tpr_normal, color='darkorange', lw=lw, label='Normal Autoencoder (area = %0.2f)' % roc_auc_normal)
plt.plot(fpr_benign, tpr_benign, color='green', lw=lw, label='Benign Autoencoder (area = %0.2f)' % roc_auc_benign)
plt.plot(fpr_malignant, tpr_malignant, color='red', lw=lw, label='Malignant Autoencoder (area = %0.2f)' % roc_auc_malignant)
plt.plot(fpr_pos, tpr_pos, color='blue', lw=lw, label='Positive Autoencoder (area = %0.2f)' % roc_auc_pos)
plt.plot([0, 1], [0, 1], color='navy', lw=lw, linestyle='--')
plt.xlim([0.0, 1.0])
plt.ylim([0.0, 1.05])
plt.xlabel('False Positive Rate')
plt.ylabel('True Positive Rate')
plt.title('Receiver Operating Characteristic')
plt.legend(loc="lower right")
plt.show()
