In [1]:
# Librerías necesarias
import os
import random
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import cv2
import tensorflow as tf

# Scikit-learn
from sklearn.metrics import mean_squared_error, f1_score, roc_curve, auc
from sklearn.model_selection import StratifiedKFold
from sklearn.utils import shuffle, class_weight

# TensorFlow y Keras
from tensorflow.keras.applications import DenseNet121
from tensorflow.keras.models import Model, Sequential, load_model
from tensorflow.keras.layers import Dense, Conv2D, Flatten, MaxPool2D, Dropout, GlobalAveragePooling2D
from tensorflow.keras.optimizers import Adam
from tensorflow.keras.preprocessing.image import ImageDataGenerator, img_to_array, load_img
from tensorflow.keras.callbacks import ModelCheckpoint, EarlyStopping, ReduceLROnPlateau

# Ignorar warnings
import warnings
warnings.filterwarnings("ignore")

# Semillas para reproducibilidad
SEED = 42
np.random.seed(SEED)
random.seed(SEED)
tf.random.set_seed(SEED)




In [2]:
## Contar Archivos
def GetDatasetSize(path):
    num_of_image = {}
    for folder in os.listdir(path):
        num_of_image[folder] = len(os.listdir(os.path.join(path, folder)))
    return num_of_image

path = r'C:\Users\enavarro\Desktop\Tesis MGTT\Entrenamientos\Codigos\DataSet\Dataset_Tesis\Data Procesada Kaagle'
DatasetSize = GetDatasetSize(path)
print(DatasetSize)



{'cancer': 437, 'non-cancer': 437}


In [3]:
## Crear modelo con CAM
def crea_modelo_con_CAM():
    base_model = DenseNet121(weights='imagenet', include_top=False, input_shape=(256, 256, 3))
    for layer in base_model.layers[:-50]:
        layer.trainable = False

    x = base_model.output
    x = GlobalAveragePooling2D()(x)
    x = Dense(1024, activation='relu', kernel_regularizer=tf.keras.regularizers.l2(0.01))(x)
    x = Dropout(0.6)(x)
    predictions = Dense(1, activation='sigmoid', name="output_layer")(x)

    model = Model(inputs=base_model.input, outputs=predictions)

    optimizer = Adam(learning_rate=0.00005)
    model.compile(
        optimizer=optimizer,
        loss='binary_crossentropy',
        metrics=['accuracy', tf.keras.metrics.AUC(name='auc'),
                 tf.keras.metrics.Precision(name='precision'), tf.keras.metrics.Recall(name='recall')]
    )
    return model

## Función para cargar imágenes
def load_images(image_paths, image_size):
    images = []
    for img_path in image_paths:
        img = load_img(img_path, target_size=image_size)
        img_array = img_to_array(img) / 255.0
        images.append(img_array)
    return np.array(images)

## Función para generar CAM
def generate_cam(model, img_array, layer_name="conv5_block16_2_conv"):
    cam_model = Model(inputs=model.input, outputs=[model.get_layer(layer_name).output, model.output])
    conv_output, predictions = cam_model.predict(img_array)
    predicted_class = int(predictions > 0.5)
    output_weights = model.get_layer("output_layer").get_weights()[0]
    cam = np.dot(conv_output[0], output_weights[:, predicted_class])
    cam = np.maximum(cam, 0)
    cam = cam / np.max(cam)
    cam = cv2.resize(cam, (256, 256))
    return cam, predicted_class

## Configuración
root_dir = r'C:\Users\enavarro\Desktop\Tesis MGTT\Entrenamientos\Codigos\DataSet\Dataset_Tesis\Data Procesada Kaagle'
classes_dir = ['cancer', 'non-cancer']
n_splits = 5
image_size = (256, 256)

# Leer imágenes y etiquetas
image_paths = []
labels = []
for cls in classes_dir:
    cls_dir = os.path.join(root_dir, cls)
    for img_name in os.listdir(cls_dir):
        image_paths.append(os.path.join(cls_dir, img_name))
        labels.append(0 if cls == 'non-cancer' else 1)

image_paths, labels = shuffle(np.array(image_paths), np.array(labels), random_state=42)

# Configuración de validación cruzada
kf = StratifiedKFold(n_splits=n_splits, shuffle=True, random_state=42)

fold_results = []
training_histories = []
roc_data = []

for fold, (train_index, val_index) in enumerate(kf.split(image_paths, labels), start=1):
    print(f"Entrenando en el pliegue {fold}/{n_splits}...")

    train_paths, val_paths = image_paths[train_index], image_paths[val_index]
    train_labels, val_labels = labels[train_index], labels[val_index]

    # Cargar imágenes
    train_images = load_images(train_paths, image_size)
    val_images = load_images(val_paths, image_size)

    # Generador de aumentación de datos
    datagen = ImageDataGenerator(
        rotation_range=20,
        width_shift_range=0.2,
        height_shift_range=0.2,
        shear_range=0.2,
        zoom_range=0.2,
        horizontal_flip=True,
        fill_mode='nearest'
    )
    train_datagen = datagen.flow(train_images, train_labels, batch_size=16)

    # Calcular pesos de las clases
    class_weights = class_weight.compute_class_weight(
        class_weight='balanced',
        classes=np.unique(train_labels),
        y=train_labels
    )
    class_weights = dict(enumerate(class_weights))

    # Crear modelo
    model = crea_modelo_con_CAM()

    # Callbacks
    checkpoint_callback = ModelCheckpoint(
        filepath=f"best_model_fold{fold}.keras",
        monitor="val_auc",
        save_best_only=True,
        verbose=1
    )
    early_stopping_callback = EarlyStopping(
        monitor="val_auc",
        patience=5,
        verbose=1,
        restore_best_weights=True
    )
    lr_scheduler = ReduceLROnPlateau(
        monitor='val_loss',
        factor=0.5,
        patience=2,
        min_lr=1e-7,
        verbose=1
    )

    # Entrenamiento
    history = model.fit(
        train_datagen,
        steps_per_epoch=len(train_images) // 16,
        epochs=25,
        validation_data=(val_images, val_labels),
        class_weight=class_weights,
        callbacks=[checkpoint_callback, early_stopping_callback, lr_scheduler]
    )

    training_histories.append(history.history)

    # Evaluar y calcular métricas
    val_predictions = model.predict(val_images)
    val_predictions_binary = (val_predictions > 0.5).astype(int)
    f1 = f1_score(val_labels, val_predictions_binary)
    print(f"F1-score en el pliegue {fold}: {f1}")

    fpr, tpr, thresholds = roc_curve(val_labels, val_predictions)
    roc_auc = auc(fpr, tpr)

    roc_data.append({'fold': fold, 'fpr': fpr, 'tpr': tpr, 'roc_auc': roc_auc})

    val_metrics = model.evaluate(val_images, val_labels)
    fold_results.append({
        'fold': fold,
        'val_loss': val_metrics[0],
        'val_accuracy': val_metrics[1],
        'val_auc': val_metrics[2],
        'f1_score': f1,
        'roc_auc': roc_auc
    })


Entrenando en el pliegue 1/5...
Epoch 1/25
[1m43/43[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 974ms/step - accuracy: 0.6086 - auc: 0.6576 - loss: 10.7173 - precision: 0.6261 - recall: 0.7165
Epoch 1: val_auc improved from inf to 0.90387, saving model to best_model_fold1.keras
[1m43/43[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m84s[0m 1s/step - accuracy: 0.6100 - auc: 0.6594 - loss: 10.7119 - precision: 0.6267 - recall: 0.7167 - val_accuracy: 0.7086 - val_auc: 0.9039 - val_loss: 10.0190 - val_precision: 0.8750 - val_recall: 0.4828 - learning_rate: 5.0000e-05
Epoch 2/25
[1m 1/43[0m [37m━━━━━━━━━━━━━━━━━━━━[0m [1m43s[0m 1s/step - accuracy: 0.4375 - auc: 0.6111 - loss: 10.2018 - precision: 0.5000 - recall: 0.2222
Epoch 2: val_auc did not improve from 0.90387
[1m43/43[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m12s[0m 269ms/step - accuracy: 0.4375 - auc: 0.6111 - loss: 10.2018 - precision: 0.5000 - recall: 0.2222 - val_accuracy: 0.7143 - val_auc: 0.9048 - val_l

In [4]:

# Resultados finales
results_df = pd.DataFrame(fold_results)
print("\nResultados de validación cruzada:")
print(results_df)

print("\nPromedios de métricas:")
print(results_df.mean())

print("\nDesviaciones estándar de métricas:")
print(results_df.std())



Resultados de validación cruzada:
   fold  val_loss  val_accuracy   val_auc  f1_score   roc_auc
0     1  4.436189      0.908571  0.983738  0.906977  0.983673
1     2  5.330735      0.920000  0.969436  0.919540  0.969175
2     3  4.495671      0.897143  0.983934  0.886076  0.984065
3     4  4.411258      0.914286  0.975705  0.913295  0.975575
4     5  4.404651      0.936782  0.985863  0.934911  0.985996

Promedios de métricas:
fold            3.000000
val_loss        4.615701
val_accuracy    0.915356
val_auc         0.979735
f1_score        0.912160
roc_auc         0.979696
dtype: float64

Desviaciones estándar de métricas:
fold            1.581139
val_loss        0.401324
val_accuracy    0.014659
val_auc         0.006956
f1_score        0.017893
roc_auc         0.007111
dtype: float64


In [5]:
# Guardar modelo final
model.save("Densenet121_CAM_model.hdf5")

