In [1]:
# Data handling
import os
import json
import pandas as pd
import numpy as np

# Model building (TensorFlow / Keras)
import tensorflow as tf
from tensorflow.keras import Sequential
from tensorflow.keras.layers import Dense, Dropout
from tensorflow.keras.callbacks import ModelCheckpoint

# Machine learning models
from sklearn.svm import SVC

# Data splitting and evaluation metrics
from sklearn.model_selection import train_test_split
from sklearn.metrics import (
    accuracy_score, precision_score, recall_score,
    f1_score, roc_auc_score, confusion_matrix, classification_report
)


In [2]:
# Load the Coimbra dataset from the specified CSV file path
filePath = '/content/dataR2.csv'

df = pd.read_csv(filePath)


In [3]:
# Display basic statistical summary
df.describe()


Unnamed: 0,Age,BMI,Glucose,Insulin,HOMA,Leptin,Adiponectin,Resistin,MCP.1,Classification
count,116.0,116.0,116.0,116.0,116.0,116.0,116.0,116.0,116.0,116.0
mean,57.301724,27.582111,97.793103,10.012086,2.694988,26.61508,10.180874,14.725966,534.647,1.551724
std,16.112766,5.020136,22.525162,10.067768,3.642043,19.183294,6.843341,12.390646,345.912663,0.499475
min,24.0,18.37,60.0,2.432,0.467409,4.311,1.65602,3.21,45.843,1.0
25%,45.0,22.973205,85.75,4.35925,0.917966,12.313675,5.474283,6.881763,269.97825,1.0
50%,56.0,27.662416,92.0,5.9245,1.380939,20.271,8.352692,10.82774,471.3225,2.0
75%,71.0,31.241442,102.0,11.18925,2.857787,37.3783,11.81597,17.755207,700.085,2.0
max,89.0,38.578759,201.0,58.46,25.050342,90.28,38.04,82.1,1698.44,2.0


In [4]:
df.head()

Unnamed: 0,Age,BMI,Glucose,Insulin,HOMA,Leptin,Adiponectin,Resistin,MCP.1,Classification
0,48,23.5,70,2.707,0.467409,8.8071,9.7024,7.99585,417.114,1
1,83,20.690495,92,3.115,0.706897,8.8438,5.429285,4.06405,468.786,1
2,82,23.12467,91,4.498,1.009651,17.9393,22.43204,9.27715,554.697,1
3,68,21.367521,77,3.226,0.612725,9.8827,7.16956,12.766,928.22,1
4,86,21.111111,92,3.549,0.805386,6.6994,4.81924,10.57635,773.92,1


In [5]:
'''
initial notes:

  * i will need to scale the AGE , BMI , Glucose ,  Insulin , Leptin , Adiponectin , Resistin , MCP.1
  * the dataset useability is very high based on the dec there are no missing values

'''

'\ninitial notes:\n\n  * i will need to scale the AGE , BMI , Glucose ,  Insulin , Leptin , Adiponectin , Resistin , MCP.1\n  * the dataset useability is very high based on the dec there are no missing values\n\n'

In [6]:
df.isnull().sum()

Unnamed: 0,0
Age,0
BMI,0
Glucose,0
Insulin,0
HOMA,0
Leptin,0
Adiponectin,0
Resistin,0
MCP.1,0
Classification,0


In [7]:
df.duplicated()


Unnamed: 0,0
0,False
1,False
2,False
3,False
4,False
...,...
111,False
112,False
113,False
114,False


In [8]:
df=df.drop_duplicates()

df.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 116 entries, 0 to 115
Data columns (total 10 columns):
 #   Column          Non-Null Count  Dtype  
---  ------          --------------  -----  
 0   Age             116 non-null    int64  
 1   BMI             116 non-null    float64
 2   Glucose         116 non-null    int64  
 3   Insulin         116 non-null    float64
 4   HOMA            116 non-null    float64
 5   Leptin          116 non-null    float64
 6   Adiponectin     116 non-null    float64
 7   Resistin        116 non-null    float64
 8   MCP.1           116 non-null    float64
 9   Classification  116 non-null    int64  
dtypes: float64(7), int64(3)
memory usage: 9.2 KB


In [9]:
# Apply min-max normalization to all columns in the dataset
# This scales each feature to a 0–1 range based on its own minimum and maximum value
for column in df.columns:
    df[column] = (df[column] - df[column].min()) / (df[column].max() - df[column].min())

# Display the first 5 rows of the normalized dataset
df.head()


Unnamed: 0,Age,BMI,Glucose,Insulin,HOMA,Leptin,Adiponectin,Resistin,MCP.1,Classification
0,0.369231,0.25385,0.070922,0.004908,0.0,0.052299,0.221152,0.060665,0.224659,0.0
1,0.907692,0.114826,0.22695,0.01219,0.009742,0.052726,0.103707,0.010826,0.255926,0.0
2,0.892308,0.235278,0.219858,0.036874,0.022058,0.158526,0.571021,0.076906,0.307912,0.0
3,0.676923,0.148328,0.120567,0.014171,0.005911,0.064811,0.151538,0.121131,0.533934,0.0
4,0.953846,0.13564,0.22695,0.019936,0.013748,0.027782,0.08694,0.093375,0.440565,0.0


In [10]:
# Display the unique values in the 'Classification' column to check the distinct classes (e.g., 0 = No Cancer, 1 = Cancer)
df['Classification'].unique()


array([0., 1.])

In [11]:
# Separate the target variable 'Classification' into y
y = df['Classification']

# Remove the target column from the features and store the result in x
x = df.drop(['Classification'], axis=1)


In [12]:
# Count the number of instances for each class label (0 = No Cancer, 1 = Cancer) in the target variable
y.value_counts()


Unnamed: 0_level_0,count
Classification,Unnamed: 1_level_1
1.0,64
0.0,52


In [13]:
# Split the dataset into training and testing sets
# 90% of the data will be used for training and 10% for testing
# 'stratify=y' ensures the class distribution remains balanced in both sets
xtrain, xtest, ytrain, ytest = train_test_split(x, y, test_size=0.1, stratify=y)

# Print the shapes of the resulting training and testing sets
print(xtrain.shape)  # Shape of training features
print(xtest.shape)   # Shape of testing features
print(ytrain.shape)  # Shape of training labels
print(ytest.shape)   # Shape of testing labels


(104, 9)
(12, 9)
(104,)
(12,)


In [14]:
# Initialize an SVM (Support Vector Machine) model with a polynomial kernel and gamma set to 8
svm_model = SVC(kernel='poly', gamma=8)

# Train the SVM model on the training data
svm_model.fit(xtrain, ytrain)


In [15]:
# Predict labels on both training and test data
y_train_pred = svm_model.predict(xtrain)
y_test_pred = svm_model.predict(xtest)

# Get decision function scores for test data (used for ROC-AUC calculation)
# SVM with poly kernel doesn't support probability estimates by default
y_test_score = svm_model.decision_function(xtest)

# Compute the confusion matrix for the training data
train_conf_mat = confusion_matrix(ytrain, y_train_pred)
print("Training Confusion Matrix:")
print(train_conf_mat)

# Compute the confusion matrix for the test data
test_conf_mat = confusion_matrix(ytest, y_test_pred)
tn, fp, fn, tp = test_conf_mat.ravel()

# Calculate evaluation metrics for medical analysis
accuracy = accuracy_score(ytest, y_test_pred)
precision = precision_score(ytest, y_test_pred)     # Positive Predictive Value
recall = recall_score(ytest, y_test_pred)           # Sensitivity
specificity = tn / (tn + fp) if (tn + fp) > 0 else 0  # True Negative Rate
f1 = f1_score(ytest, y_test_pred)
roc_auc = roc_auc_score(ytest, y_test_score)

# Print evaluation results
print("\nMedical Evaluation Metrics:")
print(f"Accuracy     : {accuracy:.3f}")
print(f"Precision    : {precision:.3f} (Positive Predictive Value)")
print(f"Recall       : {recall:.3f} (Sensitivity)")
print(f"Specificity  : {specificity:.3f}")
print(f"F1 Score     : {f1:.3f}")
print(f"ROC-AUC      : {roc_auc:.3f}")

# Display the confusion matrix for the test set
print("\nValidation Confusion Matrix:")
print(test_conf_mat)

# Display a detailed classification report
print("\nClassification Report:")
print(classification_report(ytest, y_test_pred, target_names=["No Cancer", "Cancer"]))

# Print training and testing accuracy separately
print(f"\nTraining Accuracy: {svm_model.score(xtrain, ytrain) * 100:.2f}%")
print(f"Testing Accuracy : {svm_model.score(xtest, ytest) * 100:.2f}%")


Training Confusion Matrix:
[[45  2]
 [ 2 55]]

Medical Evaluation Metrics:
Accuracy     : 0.667
Precision    : 0.800 (Positive Predictive Value)
Recall       : 0.571 (Sensitivity)
Specificity  : 0.800
F1 Score     : 0.667
ROC-AUC      : 0.771

Validation Confusion Matrix:
[[4 1]
 [3 4]]

Classification Report:
              precision    recall  f1-score   support

   No Cancer       0.57      0.80      0.67         5
      Cancer       0.80      0.57      0.67         7

    accuracy                           0.67        12
   macro avg       0.69      0.69      0.67        12
weighted avg       0.70      0.67      0.67        12


Training Accuracy: 96.15%
Testing Accuracy : 66.67%


In [16]:
# Print the number of samples in each class (0 = No Cancer, 1 = Cancer)
print(df['Classification'].value_counts())

# Filter the dataset into two separate DataFrames based on the class
df_class_0 = df[df['Classification'] == 0]  # All samples labeled as No Cancer
df_class_1 = df[df['Classification'] == 1]  # All samples labeled as Cancer


Classification
1.0    64
0.0    52
Name: count, dtype: int64


In [17]:
# Perform random oversampling to balance the dataset

# Sample 250 cancer cases with replacement (duplicate entries allowed)
df_class_1_over = df_class_1.sample(250, replace=True)

# Sample 250 no-cancer cases with replacement
df_class_0_over = df_class_0.sample(250, replace=True)

# Combine the oversampled cancer and no-cancer samples into a new balanced dataset
df_test_over = pd.concat([df_class_0_over, df_class_1_over], axis=0)

# Display summary information about the new balanced dataset
df_test_over.info()


<class 'pandas.core.frame.DataFrame'>
Index: 500 entries, 2 to 65
Data columns (total 10 columns):
 #   Column          Non-Null Count  Dtype  
---  ------          --------------  -----  
 0   Age             500 non-null    float64
 1   BMI             500 non-null    float64
 2   Glucose         500 non-null    float64
 3   Insulin         500 non-null    float64
 4   HOMA            500 non-null    float64
 5   Leptin          500 non-null    float64
 6   Adiponectin     500 non-null    float64
 7   Resistin        500 non-null    float64
 8   MCP.1           500 non-null    float64
 9   Classification  500 non-null    float64
dtypes: float64(10)
memory usage: 43.0 KB


In [18]:
# Separate the target variable 'Classification' from the oversampled dataset
y1 = df_test_over['Classification']

# Remove the target column from the features dataframe
df_test_over = df_test_over.drop(['Classification'], axis=1)

# Assign the remaining feature columns to X1
X1 = df_test_over


In [19]:
# Split the balanced dataset into training and testing subsets
# 75% training data, 25% testing data
# 'random_state=0' ensures reproducible splits
# 'shuffle=True' randomizes the order before splitting
# 'stratify=y1' maintains class balance in both sets
X1_s_train, X1_s_test, y1_s_train, y1_s_test = train_test_split(
    X1, y1,
    test_size=0.25,
    random_state=0,
    shuffle=True,
    stratify=y1
)

# Print the shapes of training and testing datasets and labels
print('Training data shape is : {}.'.format(X1_s_train.shape))
print('Training label shape is : {}.'.format(y1_s_train.shape))
print('Testing data shape is : {}.'.format(X1_s_test.shape))
print('Testing label shape is : {}.'.format(y1_s_test.shape))


Training data shape is : (375, 9).
Training label shape is : (375,).
Testing data shape is : (125, 9).
Testing label shape is : (125,).


In [20]:
# Initialize an SVM model with RBF kernel and gamma set to 8
svc_s_model = SVC(kernel='rbf', gamma=8)

# Train the SVM model on the balanced training data
svc_s_model.fit(X1_s_train, y1_s_train)


In [21]:
from sklearn.metrics import (
    accuracy_score, precision_score, recall_score, f1_score,
    roc_auc_score, confusion_matrix, classification_report
)

# Predict labels for training and test data
y_train_pred = svc_s_model.predict(X1_s_train)
y_test_pred = svc_s_model.predict(X1_s_test)

# Use decision_function scores for ROC-AUC calculation since probability=True is not set
y_test_score = svc_s_model.decision_function(X1_s_test)

# Compute confusion matrix for training data
train_conf_mat = confusion_matrix(y1_s_train, y_train_pred)
print("Training Confusion Matrix:")
print(train_conf_mat)

# Compute confusion matrix for test data
test_conf_mat = confusion_matrix(y1_s_test, y_test_pred)
tn, fp, fn, tp = test_conf_mat.ravel()

# Calculate evaluation metrics
accuracy = accuracy_score(y1_s_test, y_test_pred)
precision = precision_score(y1_s_test, y_test_pred)       # Positive Predictive Value
recall = recall_score(y1_s_test, y_test_pred)             # Sensitivity
specificity = tn / (tn + fp) if (tn + fp) > 0 else 0       # True Negative Rate
f1 = f1_score(y1_s_test, y_test_pred)
roc_auc = roc_auc_score(y1_s_test, y_test_score)

# Display the evaluation metrics
print("\nMedical Evaluation Metrics:")
print(f"Accuracy     : {accuracy:.3f}")
print(f"Precision    : {precision:.3f} (Positive Predictive Value)")
print(f"Recall       : {recall:.3f} (Sensitivity)")
print(f"Specificity  : {specificity:.3f}")
print(f"F1 Score     : {f1:.3f}")
print(f"ROC-AUC      : {roc_auc:.3f}")

# Display confusion matrix for test data
print("\nValidation Confusion Matrix:")
print(test_conf_mat)

# Display detailed classification report
print("\nClassification Report:")
print(classification_report(y1_s_test, y_test_pred, target_names=["No Cancer", "Cancer"]))

# Print training and testing accuracy scores
print(f"\nTraining Accuracy: {svc_s_model.score(X1_s_train, y1_s_train) * 100:.2f}%")
print(f"Testing Accuracy : {svc_s_model.score(X1_s_test, y1_s_test) * 100:.2f}%")


Training Confusion Matrix:
[[185   2]
 [  5 183]]

Medical Evaluation Metrics:
Accuracy     : 0.944
Precision    : 0.937 (Positive Predictive Value)
Recall       : 0.952 (Sensitivity)
Specificity  : 0.937
F1 Score     : 0.944
ROC-AUC      : 0.982

Validation Confusion Matrix:
[[59  4]
 [ 3 59]]

Classification Report:
              precision    recall  f1-score   support

   No Cancer       0.95      0.94      0.94        63
      Cancer       0.94      0.95      0.94        62

    accuracy                           0.94       125
   macro avg       0.94      0.94      0.94       125
weighted avg       0.94      0.94      0.94       125


Training Accuracy: 98.13%
Testing Accuracy : 94.40%


In [22]:
# 1. Define the Neural Network Architecture
dl_model = Sequential([
    Dense(256, activation='relu', input_shape=(X1_s_train.shape[1],)),  # Input layer with dynamic input shape
    Dropout(0.3),
    Dense(512, activation='relu'),
    Dropout(0.3),
    Dense(1, activation='sigmoid')  # Output layer for binary classification
])

# Display model architecture summary
dl_model.summary()

# 2. Compile the Model
dl_model.compile(
    optimizer='adam',
    loss='binary_crossentropy',
    metrics=['accuracy', tf.keras.metrics.Precision(), tf.keras.metrics.Recall(), tf.keras.metrics.AUC()]
)

# 3. Set Up Checkpoint Callback to save best model weights
checkpoint = ModelCheckpoint(
    'best_model.h5',
    monitor='val_accuracy',
    verbose=1,
    save_best_only=True,
    mode='max'
)

# 4. Train the Model
history = dl_model.fit(
    X1_s_train, y1_s_train,
    epochs=20,
    batch_size=16,
    validation_data=(X1_s_test, y1_s_test),
    callbacks=[checkpoint],
    verbose=1
)

# 5. Load Best Weights from checkpoint
dl_model.load_weights('best_model.h5')

# 6. Predict & Evaluate on the test set
y_test_pred_prob = dl_model.predict(X1_s_test).ravel()
y_test_pred = (y_test_pred_prob > 0.5).astype(int)

# Compute confusion matrix and metrics
conf_mat = confusion_matrix(y1_s_test, y_test_pred)
tn, fp, fn, tp = conf_mat.ravel()

accuracy = accuracy_score(y1_s_test, y_test_pred)
precision = precision_score(y1_s_test, y_test_pred)
recall = recall_score(y1_s_test, y_test_pred)
specificity = tn / (tn + fp) if (tn + fp) > 0 else 0
f1 = f1_score(y1_s_test, y_test_pred)
roc_auc = roc_auc_score(y1_s_test, y_test_pred_prob)

# 7. Print Results
print("\nMedical Evaluation Metrics (Neural Network):")
print(f"Accuracy     : {accuracy:.3f}")
print(f"Precision    : {precision:.3f} (Positive Predictive Value)")
print(f"Recall       : {recall:.3f} (Sensitivity)")
print(f"Specificity  : {specificity:.3f}")
print(f"F1 Score     : {f1:.3f}")
print(f"ROC-AUC      : {roc_auc:.3f}")

print("\nConfusion Matrix:")
print(conf_mat)

print("\nClassification Report:")
print(classification_report(y1_s_test, y_test_pred, target_names=["No Cancer", "Cancer"]))


  super().__init__(activity_regularizer=activity_regularizer, **kwargs)


Epoch 1/20
[1m23/24[0m [32m━━━━━━━━━━━━━━━━━━━[0m[37m━[0m [1m0s[0m 21ms/step - accuracy: 0.5585 - auc: 0.5899 - loss: 0.6840 - precision: 0.5430 - recall: 0.5759
Epoch 1: val_accuracy improved from -inf to 0.72000, saving model to best_model.h5




[1m24/24[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m9s[0m 121ms/step - accuracy: 0.5629 - auc: 0.5946 - loss: 0.6829 - precision: 0.5511 - recall: 0.5707 - val_accuracy: 0.7200 - val_auc: 0.8541 - val_loss: 0.6224 - val_precision: 0.6753 - val_recall: 0.8387
Epoch 2/20
[1m23/24[0m [32m━━━━━━━━━━━━━━━━━━━[0m[37m━[0m [1m0s[0m 5ms/step - accuracy: 0.6661 - auc: 0.7814 - loss: 0.6276 - precision: 0.6411 - recall: 0.7971
Epoch 2: val_accuracy improved from 0.72000 to 0.79200, saving model to best_model.h5




[1m24/24[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 11ms/step - accuracy: 0.6670 - auc: 0.7799 - loss: 0.6262 - precision: 0.6432 - recall: 0.7904 - val_accuracy: 0.7920 - val_auc: 0.8507 - val_loss: 0.5395 - val_precision: 0.8462 - val_recall: 0.7097
Epoch 3/20
[1m23/24[0m [32m━━━━━━━━━━━━━━━━━━━[0m[37m━[0m [1m0s[0m 5ms/step - accuracy: 0.7741 - auc: 0.7926 - loss: 0.5671 - precision: 0.8190 - recall: 0.6816
Epoch 3: val_accuracy did not improve from 0.79200
[1m24/24[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 10ms/step - accuracy: 0.7730 - auc: 0.7930 - loss: 0.5656 - precision: 0.8183 - recall: 0.6816 - val_accuracy: 0.6960 - val_auc: 0.8670 - val_loss: 0.5096 - val_precision: 0.6579 - val_recall: 0.8065
Epoch 4/20
[1m23/24[0m [32m━━━━━━━━━━━━━━━━━━━[0m[37m━[0m [1m0s[0m 5ms/step - accuracy: 0.7392 - auc: 0.8362 - loss: 0.5



[1m24/24[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 11ms/step - accuracy: 0.7411 - auc: 0.8373 - loss: 0.5122 - precision: 0.7719 - recall: 0.6932 - val_accuracy: 0.8160 - val_auc: 0.8900 - val_loss: 0.4241 - val_precision: 0.8824 - val_recall: 0.7258
Epoch 5/20
[1m23/24[0m [32m━━━━━━━━━━━━━━━━━━━[0m[37m━[0m [1m0s[0m 5ms/step - accuracy: 0.7440 - auc: 0.8535 - loss: 0.4698 - precision: 0.7827 - recall: 0.7022
Epoch 5: val_accuracy did not improve from 0.81600
[1m24/24[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 9ms/step - accuracy: 0.7447 - auc: 0.8539 - loss: 0.4695 - precision: 0.7831 - recall: 0.7013 - val_accuracy: 0.8000 - val_auc: 0.9041 - val_loss: 0.3923 - val_precision: 0.8246 - val_recall: 0.7581
Epoch 6/20
[1m21/24[0m [32m━━━━━━━━━━━━━━━━━[0m[37m━━━[0m [1m0s[0m 5ms/step - accuracy: 0.7640 - auc: 0.8817 - loss: 0.44



[1m24/24[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 10ms/step - accuracy: 0.7673 - auc: 0.8823 - loss: 0.4387 - precision: 0.7880 - recall: 0.6869 - val_accuracy: 0.8480 - val_auc: 0.9339 - val_loss: 0.3731 - val_precision: 0.8413 - val_recall: 0.8548
Epoch 7/20
[1m22/24[0m [32m━━━━━━━━━━━━━━━━━━[0m[37m━━[0m [1m0s[0m 5ms/step - accuracy: 0.7478 - auc: 0.8786 - loss: 0.4351 - precision: 0.7603 - recall: 0.7309
Epoch 7: val_accuracy did not improve from 0.84800
[1m24/24[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 9ms/step - accuracy: 0.7511 - auc: 0.8803 - loss: 0.4323 - precision: 0.7665 - recall: 0.7293 - val_accuracy: 0.8480 - val_auc: 0.9407 - val_loss: 0.3551 - val_precision: 0.8413 - val_recall: 0.8548
Epoch 8/20
[1m20/24[0m [32m━━━━━━━━━━━━━━━━[0m[37m━━━━[0m [1m0s[0m 6ms/step - accuracy: 0.8363 - auc: 0.9350 - loss: 0.36



[1m24/24[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 11ms/step - accuracy: 0.8656 - auc: 0.9356 - loss: 0.3372 - precision: 0.8702 - recall: 0.8485 - val_accuracy: 0.8720 - val_auc: 0.9608 - val_loss: 0.3022 - val_precision: 0.8966 - val_recall: 0.8387
Epoch 10/20
[1m22/24[0m [32m━━━━━━━━━━━━━━━━━━[0m[37m━━[0m [1m0s[0m 5ms/step - accuracy: 0.8156 - auc: 0.9192 - loss: 0.3671 - precision: 0.8527 - recall: 0.7739
Epoch 10: val_accuracy improved from 0.87200 to 0.89600, saving model to best_model.h5




[1m24/24[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 11ms/step - accuracy: 0.8164 - auc: 0.9190 - loss: 0.3679 - precision: 0.8533 - recall: 0.7740 - val_accuracy: 0.8960 - val_auc: 0.9689 - val_loss: 0.2837 - val_precision: 0.9153 - val_recall: 0.8710
Epoch 11/20
[1m22/24[0m [32m━━━━━━━━━━━━━━━━━━[0m[37m━━[0m [1m0s[0m 5ms/step - accuracy: 0.8310 - auc: 0.9421 - loss: 0.3393 - precision: 0.8267 - recall: 0.8540
Epoch 11: val_accuracy did not improve from 0.89600
[1m24/24[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 9ms/step - accuracy: 0.8312 - auc: 0.9401 - loss: 0.3409 - precision: 0.8293 - recall: 0.8488 - val_accuracy: 0.8800 - val_auc: 0.9731 - val_loss: 0.2719 - val_precision: 0.9434 - val_recall: 0.8065
Epoch 12/20
[1m19/24[0m [32m━━━━━━━━━━━━━━━[0m[37m━━━━━[0m [1m0s[0m 6ms/step - accuracy: 0.8165 - auc: 0.9321 - loss: 0



[1m24/24[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 10ms/step - accuracy: 0.8652 - auc: 0.9439 - loss: 0.3207 - precision: 0.8798 - recall: 0.8504 - val_accuracy: 0.9040 - val_auc: 0.9749 - val_loss: 0.2439 - val_precision: 0.9167 - val_recall: 0.8871
Epoch 14/20
[1m21/24[0m [32m━━━━━━━━━━━━━━━━━[0m[37m━━━[0m [1m0s[0m 5ms/step - accuracy: 0.8755 - auc: 0.9387 - loss: 0.3279 - precision: 0.8727 - recall: 0.8647
Epoch 14: val_accuracy did not improve from 0.90400
[1m24/24[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 10ms/step - accuracy: 0.8755 - auc: 0.9407 - loss: 0.3240 - precision: 0.8775 - recall: 0.8612 - val_accuracy: 0.8960 - val_auc: 0.9803 - val_loss: 0.2334 - val_precision: 0.9153 - val_recall: 0.8710
Epoch 15/20
[1m21/24[0m [32m━━━━━━━━━━━━━━━━━[0m[37m━━━[0m [1m0s[0m 5ms/step - accuracy: 0.8805 - auc: 0.9459 - loss: 



[1m24/24[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 11ms/step - accuracy: 0.8315 - auc: 0.9401 - loss: 0.3167 - precision: 0.8674 - recall: 0.7839 - val_accuracy: 0.9120 - val_auc: 0.9854 - val_loss: 0.2073 - val_precision: 0.9636 - val_recall: 0.8548
Epoch 17/20
[1m22/24[0m [32m━━━━━━━━━━━━━━━━━━[0m[37m━━[0m [1m0s[0m 5ms/step - accuracy: 0.9006 - auc: 0.9613 - loss: 0.2585 - precision: 0.9131 - recall: 0.8860
Epoch 17: val_accuracy improved from 0.91200 to 0.92000, saving model to best_model.h5




[1m24/24[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 11ms/step - accuracy: 0.8999 - auc: 0.9609 - loss: 0.2599 - precision: 0.9118 - recall: 0.8862 - val_accuracy: 0.9200 - val_auc: 0.9832 - val_loss: 0.2093 - val_precision: 0.9643 - val_recall: 0.8710
Epoch 18/20
[1m23/24[0m [32m━━━━━━━━━━━━━━━━━━━[0m[37m━[0m [1m0s[0m 5ms/step - accuracy: 0.8821 - auc: 0.9638 - loss: 0.2648 - precision: 0.9147 - recall: 0.8578
Epoch 18: val_accuracy improved from 0.92000 to 0.94400, saving model to best_model.h5




[1m24/24[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 11ms/step - accuracy: 0.8817 - auc: 0.9631 - loss: 0.2658 - precision: 0.9131 - recall: 0.8577 - val_accuracy: 0.9440 - val_auc: 0.9846 - val_loss: 0.2070 - val_precision: 0.9231 - val_recall: 0.9677
Epoch 19/20
[1m18/24[0m [32m━━━━━━━━━━━━━━━[0m[37m━━━━━[0m [1m0s[0m 6ms/step - accuracy: 0.9354 - auc: 0.9543 - loss: 0.2566 - precision: 0.9132 - recall: 0.9731
Epoch 19: val_accuracy improved from 0.94400 to 0.95200, saving model to best_model.h5




[1m24/24[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 11ms/step - accuracy: 0.9296 - auc: 0.9581 - loss: 0.2535 - precision: 0.9143 - recall: 0.9562 - val_accuracy: 0.9520 - val_auc: 0.9841 - val_loss: 0.1934 - val_precision: 0.9375 - val_recall: 0.9677
Epoch 20/20
[1m21/24[0m [32m━━━━━━━━━━━━━━━━━[0m[37m━━━[0m [1m0s[0m 5ms/step - accuracy: 0.9151 - auc: 0.9757 - loss: 0.2281 - precision: 0.9371 - recall: 0.8844
Epoch 20: val_accuracy improved from 0.95200 to 0.96800, saving model to best_model.h5




[1m24/24[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 11ms/step - accuracy: 0.9124 - auc: 0.9746 - loss: 0.2299 - precision: 0.9339 - recall: 0.8829 - val_accuracy: 0.9680 - val_auc: 0.9872 - val_loss: 0.1842 - val_precision: 0.9677 - val_recall: 0.9677
[1m4/4[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 19ms/step

Medical Evaluation Metrics (Neural Network):
Accuracy     : 0.968
Precision    : 0.968 (Positive Predictive Value)
Recall       : 0.968 (Sensitivity)
Specificity  : 0.968
F1 Score     : 0.968
ROC-AUC      : 0.987

Confusion Matrix:
[[61  2]
 [ 2 60]]

Classification Report:
              precision    recall  f1-score   support

   No Cancer       0.97      0.97      0.97        63
      Cancer       0.97      0.97      0.97        62

    accuracy                           0.97       125
   macro avg       0.97      0.97      0.97       125
weighted avg       0.97      0.97      0.97       125



In [23]:


# 1. Save the full Keras model (architecture + weights)
dl_model.save('coimbra_model_package/model.keras')

# 2. Make sure the directory exists
os.makedirs('coimbra_model_package', exist_ok=True)

# 3. Save the reference column names used for training
ref_columns = list(X1_s_train.columns)
with open('coimbra_model_package/ref_columns.json', 'w') as f:
    json.dump(ref_columns, f)
print("Saved ref_columns.json")

# 4. Save the target column name
with open('coimbra_model_package/target_name.txt', 'w') as f:
    f.write('Classification')
print("Saved target_name.txt")

# 5. Save the full original dataset (processed or raw as you want)
df.to_csv('coimbra_model_package/training_data.csv', index=False)
print("Saved training_data.csv")

# 6. Load the original raw dataset (unscaled) to get correct min/max
df_raw = pd.read_csv('/content/dataR2.csv')

# 7. Calculate and save the scaling info (min and max per feature)
scaling_info = {
    col: {
        "min": float(df_raw[col].min()),
        "max": float(df_raw[col].max())
    }
    for col in df_raw.columns if col != "Classification"
}

with open('coimbra_model_package/scaling_info.json', 'w') as f:
    json.dump(scaling_info, f, indent=4)
print("Saved correct scaling_info.json")


Saved ref_columns.json
Saved target_name.txt
Saved training_data.csv
Saved correct scaling_info.json
