<a href="https://colab.research.google.com/github/jlrocam/my-first-binder/blob/main/LesionPiel.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [None]:
# IMPORTANT: SOME KAGGLE DATA SOURCES ARE PRIVATE
# RUN THIS CELL IN ORDER TO IMPORT YOUR KAGGLE DATA SOURCES.
import kagglehub
kagglehub.login()


In [None]:
# IMPORTANT: RUN THIS CELL IN ORDER TO IMPORT YOUR KAGGLE DATA SOURCES,
# THEN FEEL FREE TO DELETE THIS CELL.
# NOTE: THIS NOTEBOOK ENVIRONMENT DIFFERS FROM KAGGLE'S PYTHON
# ENVIRONMENT SO THERE MAY BE MISSING LIBRARIES USED BY YOUR
# NOTEBOOK.

jlrocam_skin_images_path = kagglehub.dataset_download('jlrocam/skin-images')
jlrocam_skin_metadata_path = kagglehub.dataset_download('jlrocam/skin-metadata')

print('Data source import complete.')


In [None]:
import tensorflow as tf
import numpy as np
import pandas as pd
import os
import cv2
from tensorflow.keras.models import Model, load_model
from tensorflow.keras.layers import Dense, Conv2D, MaxPooling2D, Flatten, Dropout, Input
from sklearn.model_selection import train_test_split
from tensorflow.keras.regularizers import l2
from tensorflow.keras.utils import to_categorical
from sklearn.preprocessing import LabelEncoder

os.environ["TF_ENABLE_ONEDNN_OPTS"] = "0"

DATASET_DIR = "/kaggle/input/skin-images"
METADATA_FILE = "/kaggle/input/skin-metadata/metadata.csv"
MODEL_PATH = "/kaggle/working/skin_lesion_model"
PROCESSED_IMAGES_FILE = "processed_images.txt"
BATCH_SIZE = 16
IMAGES_PER_BATCH = 500  # Procesar imágenes en lotes de 50,000
EPOCHS = 10

# Cargar metadatos
df = pd.read_csv(METADATA_FILE, low_memory=False)
df['isic_id'] = df['isic_id'].apply(lambda x: x + '.jpg')

# Codificar etiquetas
le_1 = LabelEncoder()
le_3 = LabelEncoder()
df['diagnosis_1_encoded'] = le_1.fit_transform(df['diagnosis_1'])
df['diagnosis_3_encoded'] = le_3.fit_transform(df['diagnosis_3'])

# Cargar imágenes procesadas previamente
if os.path.exists(PROCESSED_IMAGES_FILE):
    with open(PROCESSED_IMAGES_FILE, "r") as f:
        processed_images = set(f.read().splitlines())
else:
    processed_images = set()

df = df[~df['isic_id'].isin(processed_images)]

# Buscar el último modelo guardado
model_files = [f for f in os.listdir("/kaggle/working") if f.startswith("skin_lesion_model_iter") or f == "skin_lesion_model_FINAL.keras"]
model_files.sort()
latest_model = model_files[-1] if model_files else None

if latest_model:
    print(f"Cargando modelo existente: {latest_model}")
    model = load_model(f"/kaggle/working/{latest_model}")
else:
    print("No se encontró modelo previo, iniciando desde cero.")
    inputs = Input(shape=(224, 224, 3))
    x = Conv2D(32, (3,3), activation="relu", kernel_regularizer=l2(0.01))(inputs)
    x = MaxPooling2D(2,2)(x)
    x = Conv2D(64, (3,3), activation="relu")(x)
    x = MaxPooling2D(2,2)(x)
    x = Conv2D(128, (3,3), activation="relu")(x)
    x = MaxPooling2D(2,2)(x)
    x = Flatten()(x)
    x = Dense(128, activation="relu")(x)
    x = Dropout(0.5)(x)
    output1 = Dense(len(le_1.classes_), activation="softmax", name="diagnosis_1")(x)
    output2 = Dense(len(le_3.classes_), activation="softmax", name="diagnosis_3")(x)

    model = Model(inputs=inputs, outputs=[output1, output2])
    model.compile(optimizer="adam",
                  loss={"diagnosis_1": "sparse_categorical_crossentropy",
                        "diagnosis_3": "sparse_categorical_crossentropy"},
                  metrics={"diagnosis_1": "accuracy", "diagnosis_3": "accuracy"})

# Función para generar datos
def generate_data(batch_df, directory):
    while True:  # Agregar un loop infinito para evitar que el dataset se agote
        for _, row in batch_df.iterrows():
            img_path = os.path.join(directory, row['isic_id'])
            if os.path.exists(img_path):
                img = cv2.imread(img_path)
                img = cv2.cvtColor(img, cv2.COLOR_BGR2RGB)
                img = cv2.resize(img, (224, 224))
                img = img / 255.0
                yield img, (row['diagnosis_1_encoded'], row['diagnosis_3_encoded'])

total_images = len(df)
num_batches = (total_images // IMAGES_PER_BATCH) + 1

for batch_idx in range(num_batches):
    start_idx = batch_idx * IMAGES_PER_BATCH
    end_idx = min(start_idx + IMAGES_PER_BATCH, total_images)
    batch_df = df.iloc[start_idx:end_idx].copy()

    train_df, val_df = train_test_split(batch_df, test_size=0.2, random_state=42)

    train_dataset = tf.data.Dataset.from_generator(lambda: generate_data(train_df, DATASET_DIR),
                                                   output_signature=(
                                                       tf.TensorSpec(shape=(224, 224, 3), dtype=tf.float32),
                                                       (tf.TensorSpec(shape=(), dtype=tf.int32),
                                                        tf.TensorSpec(shape=(), dtype=tf.int32))
                                                   ))
    val_dataset = tf.data.Dataset.from_generator(lambda: generate_data(val_df, DATASET_DIR),
                                                 output_signature=(
                                                     tf.TensorSpec(shape=(224, 224, 3), dtype=tf.float32),
                                                     (tf.TensorSpec(shape=(), dtype=tf.int32),
                                                      tf.TensorSpec(shape=(), dtype=tf.int32))
                                                 ))

    train_dataset = train_dataset.repeat().batch(BATCH_SIZE).prefetch(tf.data.AUTOTUNE)
    val_dataset = val_dataset.repeat().batch(BATCH_SIZE).prefetch(tf.data.AUTOTUNE)

    steps_per_epoch = IMAGES_PER_BATCH

    print(f"Limitar imagenes por epoch {steps_per_epoch} imágenes")

    print(f"Entrenando batch {batch_idx + 1}/{num_batches} con {len(batch_df)} imágenes")

    model.fit(train_dataset, validation_data=val_dataset, epochs=EPOCHS, steps_per_epoch=steps_per_epoch)

    model_filename = f"{MODEL_PATH}_iter{batch_idx+1}.keras"
    if batch_idx + 1 == num_batches:
        model_filename = f"{MODEL_PATH}_FINAL.keras"
    model.save(model_filename)
    print(f"Modelo guardado en {model_filename}")

    with open(PROCESSED_IMAGES_FILE, "a") as f:
        f.write("\n".join(batch_df['isic_id']) + "\n")

    continuar = input("¿Desea continuar con la siguiente iteración? (s/n): ")
    if continuar.lower() != "s":
        print("Entrenamiento detenido por el usuario.")
        break


E0000 00:00:1742886869.652767      10 common_lib.cc:612] Could not set metric server port: INVALID_ARGUMENT: Could not find SliceBuilder port 8471 in any of the 0 ports provided in `tpu_process_addresses`="local"
=== Source Location Trace: ===
learning/45eac/tfrc/runtime/common_lib.cc:230


No se encontró modelo previo, iniciando desde cero.


I0000 00:00:1742886892.025606      10 service.cc:148] XLA service 0x58fa5b056980 initialized for platform TPU (this does not guarantee that XLA will be used). Devices:
I0000 00:00:1742886892.025672      10 service.cc:156]   StreamExecutor device (0): TPU, 2a886c8
I0000 00:00:1742886892.025683      10 service.cc:156]   StreamExecutor device (1): TPU, 2a886c8
I0000 00:00:1742886892.025690      10 service.cc:156]   StreamExecutor device (2): TPU, 2a886c8
I0000 00:00:1742886892.025696      10 service.cc:156]   StreamExecutor device (3): TPU, 2a886c8
I0000 00:00:1742886892.025702      10 service.cc:156]   StreamExecutor device (4): TPU, 2a886c8
I0000 00:00:1742886892.025709      10 service.cc:156]   StreamExecutor device (5): TPU, 2a886c8
I0000 00:00:1742886892.025712      10 service.cc:156]   StreamExecutor device (6): TPU, 2a886c8
I0000 00:00:1742886892.025716      10 service.cc:156]   StreamExecutor device (7): TPU, 2a886c8


Limitar imagenes por epoch 500 imágenes
Entrenando batch 1/803 con 500 imágenes
Epoch 1/10


NotFoundError: Graph execution error:

Detected at node StatefulPartitionedCall defined at (most recent call last):
  File "/usr/local/lib/python3.10/runpy.py", line 196, in _run_module_as_main

  File "/usr/local/lib/python3.10/runpy.py", line 86, in _run_code

  File "/usr/local/lib/python3.10/site-packages/ipykernel_launcher.py", line 18, in <module>

  File "/usr/local/lib/python3.10/site-packages/traitlets/config/application.py", line 1075, in launch_instance

  File "/usr/local/lib/python3.10/site-packages/ipykernel/kernelapp.py", line 737, in start

  File "/usr/local/lib/python3.10/site-packages/anyio/_core/_eventloop.py", line 74, in run

  File "/usr/local/lib/python3.10/site-packages/anyio/_backends/_asyncio.py", line 2303, in run

  File "/usr/local/lib/python3.10/site-packages/anyio/_backends/_asyncio.py", line 216, in run

  File "/usr/local/lib/python3.10/asyncio/base_events.py", line 636, in run_until_complete

  File "/usr/local/lib/python3.10/asyncio/base_events.py", line 603, in run_forever

  File "/usr/local/lib/python3.10/asyncio/base_events.py", line 1909, in _run_once

  File "/usr/local/lib/python3.10/asyncio/events.py", line 80, in _run

  File "/usr/local/lib/python3.10/site-packages/ipykernel/kernelbase.py", line 428, in process_shell

  File "/usr/local/lib/python3.10/site-packages/ipykernel/kernelbase.py", line 501, in process_shell_message

  File "/usr/local/lib/python3.10/site-packages/ipykernel/ipkernel.py", line 337, in execute_request

  File "/usr/local/lib/python3.10/site-packages/ipykernel/kernelbase.py", line 752, in execute_request

  File "/usr/local/lib/python3.10/site-packages/ipykernel/ipkernel.py", line 433, in do_execute

  File "/usr/local/lib/python3.10/site-packages/ipykernel/zmqshell.py", line 582, in run_cell

  File "/usr/local/lib/python3.10/site-packages/IPython/core/interactiveshell.py", line 3077, in run_cell

  File "/usr/local/lib/python3.10/site-packages/IPython/core/interactiveshell.py", line 3132, in _run_cell

  File "/usr/local/lib/python3.10/site-packages/IPython/core/async_helpers.py", line 128, in _pseudo_sync_runner

  File "/usr/local/lib/python3.10/site-packages/IPython/core/interactiveshell.py", line 3336, in run_cell_async

  File "/usr/local/lib/python3.10/site-packages/IPython/core/interactiveshell.py", line 3519, in run_ast_nodes

  File "/usr/local/lib/python3.10/site-packages/IPython/core/interactiveshell.py", line 3579, in run_code

  File "/tmp/ipykernel_10/2326872957.py", line 115, in <module>

  File "/usr/local/lib/python3.10/site-packages/keras/src/utils/traceback_utils.py", line 117, in error_handler

  File "/usr/local/lib/python3.10/site-packages/keras/src/backend/tensorflow/trainer.py", line 371, in fit

  File "/usr/local/lib/python3.10/site-packages/keras/src/backend/tensorflow/trainer.py", line 219, in function

  File "/usr/local/lib/python3.10/site-packages/keras/src/backend/tensorflow/trainer.py", line 132, in multi_step_on_iterator

could not find registered transfer manager for platform Host -- check target linkage
	 [[{{node StatefulPartitionedCall}}]] [Op:__inference_multi_step_on_iterator_2770]