In [None]:
import pandas as pd
import numpy as np
import tensorflow as tf
from tensorflow import keras
from tensorflow.keras import layers
from tensorflow.keras.callbacks import EarlyStopping, ReduceLROnPlateau

from sklearn.model_selection import train_test_split
from sklearn.preprocessing import MinMaxScaler, StandardScaler
from imblearn.over_sampling import SMOTE

In [None]:
# Loading the cleaned data from the CSV file for training
cancer_data = pd.read_csv('../datasets/cleaned_breast_cancer_data.csv')

In [None]:
# We first convert our data frame into numpy arrays
X_initial = np.array(cancer_data.drop(columns=["breast_cancer_history"])).astype("float32")
y_initial = np.array(cancer_data["breast_cancer_history"]).astype("float32")
 

In [None]:
# Apply SMOTE (Synthetic Minority Over-sampling Technique) to balance the dataset
# SMOTE increases the samples of the minority class to achieve a balanced distribution

smote = SMOTE(random_state=12)  
# Generate a balanced dataset with SMOTE
X, y = smote.fit_resample(X_initial, y_initial)  

In [None]:
#We split the data into training and test sets
X_train,X_test,y_train,y_test = train_test_split(X,y,random_state=12)


In [None]:
#Since some algorithms require scaled data for better performance, we are going to also prepare scaled data
# scaler = MinMaxScaler();
# scaler.fit(X_train);
# X_train_scaled = scaler.transform(X_train)
# X_test_scaled = scaler.transform(X_test)

scaler = StandardScaler()
X_train = scaler.fit_transform(X_train)
X_test = scaler.transform(X_test)

In [None]:
# Build MLP Model with Optimizations
model = tf.keras.Sequential([
    # Input layer with proper Input layer usage
    layers.InputLayer(input_shape=(X_train.shape[1],)),  # or layers.Input(shape=(X_train.shape[1],))
    
     # Hidden Layer 1 with Dropout and BatchNormalization
    layers.Dense(128, activation='relu'),
    layers.BatchNormalization(),
    layers.Dropout(0.2),  # Apply dropout to prevent overfitting
    
    # Hidden Layer 1 with Dropout and BatchNormalization
    layers.Dense(64, activation='relu'),
    layers.BatchNormalization(),
    layers.Dropout(0.2),  # Apply dropout to prevent overfitting
    
    # Hidden Layer 2 with Dropout and BatchNormalization
    layers.Dense(32, activation='relu'),
    layers.BatchNormalization(),
    layers.Dropout(0.2),  # Apply dropout to prevent overfitting
    
    # Output layer for binary classification
    layers.Dense(1, activation='sigmoid')  # 1 neuron for binary classification
])


In [None]:
# Compile the model with AdamW optimizer
model.compile(
    optimizer=tf.keras.optimizers.AdamW(learning_rate=0.001),  # AdamW optimizer for better performance
    loss='binary_crossentropy',  # Binary classification
    metrics=['accuracy']
)

In [None]:
# Add EarlyStopping and ReduceLROnPlateau callbacks for better training control
early_stopping = EarlyStopping(monitor='val_loss', patience=5, restore_best_weights=True)
reduce_lr = ReduceLROnPlateau(monitor='val_loss', factor=0.2, patience=3, min_lr=1e-6)

In [None]:
# Summary of the model architecture
model.summary()


In [None]:
# Train the model (example)
history = model.fit(
    X_train, y_train, 
    epochs=50, 
    batch_size=32, 
    validation_data=(X_test, y_test),
    callbacks=[early_stopping, reduce_lr]
)

In [None]:
# Evaluate the model
test_loss, test_accuracy = model.evaluate(X_test, y_test)
print(f'Test accuracy: {test_accuracy}')

In [None]:
# 8. Save model
model.save('./saved_models/breast_cancer_model.h5')

In [None]:
# Load
model = tf.keras.models.load_model('./saved_models/breast_cancer_model.h5')

# Convert
converter = tf.lite.TFLiteConverter.from_keras_model(model)
tflite_model = converter.convert()

# Save
with open('./saved_models/breast_cancer_model.tflite', 'wb') as f:
    f.write(tflite_model)
