<a href="https://colab.research.google.com/github/jlrocam/my-first-binder/blob/main/train_model.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [None]:
import tensorflow as tf
import numpy as np
import pandas as pd
import os
os.environ["TF_ENABLE_ONEDNN_OPTS"] = "0"
import cv2
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense, Conv2D, MaxPooling2D, Flatten, Dropout
from tensorflow.keras.preprocessing.image import ImageDataGenerator

# Directorios de datos
DATASET_DIR = "/content/data/train_images"
METADATA_FILE = "/content/data/metadata.csv"
MODEL_PATH = "/content/model/skin_lesion_model.keras"

# Cargar metadatos
df = pd.read_csv(METADATA_FILE)

# Preprocesar imágenes y etiquetas
images = []
labels = []

for idx, row in df.iterrows():
    img_path = os.path.join(DATASET_DIR, row["isic_id"]) + ".jpg"
    image = cv2.imread(img_path)
    # Check if the image was loaded successfully
    if image is not None:
        image = cv2.cvtColor(image, cv2.COLOR_BGR2RGB)
        image = cv2.resize(image, (224, 224))
        images.append(image / 255.0)
        labels.append(1 if row["diagnosis_1"] == "Malignant" else 0)
    else:
        print(f"Warning: Could not load image at path: {img_path}") # Print a warning for debugging

# Convertir a arrays numpy
X = np.array(images)
y = np.array(labels)

# División en entrenamiento y validación
from sklearn.model_selection import train_test_split
X_train, X_val, y_train, y_val = train_test_split(X, y, test_size=0.2, random_state=42)

# Crear el modelo
model = Sequential([
    Conv2D(32, (3,3), activation="relu", input_shape=(224, 224, 3)),
    MaxPooling2D(2,2),
    Conv2D(64, (3,3), activation="relu"),
    MaxPooling2D(2,2),
    Conv2D(128, (3,3), activation="relu"),
    MaxPooling2D(2,2),
    Flatten(),
    Dense(128, activation="relu"),
    Dropout(0.5),
    Dense(1, activation="sigmoid")
])

# Compilar el modelo
model.compile(optimizer="adam", loss="binary_crossentropy", metrics=["accuracy"])

# Entrenamiento
history = model.fit(X_train, y_train, validation_data=(X_val, y_val), epochs=10, batch_size=16)

# Guardar modelo
model.save(MODEL_PATH)
print("Modelo guardado en", MODEL_PATH)


In [10]:
import tensorflow as tf
import numpy as np
import pandas as pd
import os
os.environ["TF_ENABLE_ONEDNN_OPTS"] = "0"
import cv2
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense, Conv2D, MaxPooling2D, Flatten, Dropout
from tensorflow.keras.preprocessing.image import ImageDataGenerator
from sklearn.model_selection import train_test_split

# Directorios de datos
DATASET_DIR = "/content/data/train_images"
METADATA_FILE = "/content/data/metadata.csv"
MODEL_PATH = "/content/model/skin_lesion_model.keras"

# --- Verificar la ruta de las imágenes ---
# Listar los archivos de imagen en el directorio
image_files = [f for f in os.listdir(DATASET_DIR) if os.path.isfile(os.path.join(DATASET_DIR, f))]
print(f"Encontradas {len(image_files)} imágenes en {DATASET_DIR}")
# Imprimir los primeros 10 nombres de archivo de imagen para verificación
print("Primeros 10 nombres de archivo de imagen:", image_files[:10])

# Cargar metadatos
df = pd.read_csv(METADATA_FILE)

# --- Usando ImageDataGenerator ---
train_datagen = ImageDataGenerator(rescale=1./255,
                                   validation_split=0.2) # Dividir en entrenamiento y validación

# Filter the dataframe to include only 'Benign' and 'Malignant' in 'diagnosis_1'
df = df[df['diagnosis_1'].isin(['Benign', 'Malignant'])]

df['diagnosis_1'] = df['diagnosis_1'].astype(str)

print("Unique values in 'diagnosis_1':", df['diagnosis_1'].unique())

# Add filename extension '.jpg' to the 'isic_id' column before passing it to the generator
df['isic_id'] = df['isic_id'].apply(lambda x: x + '.jpg')

train_generator = train_datagen.flow_from_dataframe(
    df,
    directory=DATASET_DIR,
    x_col="isic_id",
    y_col="diagnosis_1",
    target_size=(128, 128), # Reducir resolución de imagen
    batch_size=16, # Reducir batch size
    class_mode='binary',
    subset='training'
)

validation_generator = train_datagen.flow_from_dataframe(
    df,
    directory=DATASET_DIR,
    x_col="isic_id",
    y_col="diagnosis_1",
    target_size=(128, 128),
    batch_size=16,
    class_mode='binary',
    subset='validation'
)

# --- Crear el modelo ---
model = Sequential([
    Conv2D(32, (3,3), activation="relu", input_shape=(128, 128, 3)),
    MaxPooling2D(2,2),
    Conv2D(64, (3,3), activation="relu"),
    MaxPooling2D(2,2),
    Conv2D(128, (3,3), activation="relu"),
    MaxPooling2D(2,2),
    Flatten(),
    Dense(128, activation="relu"),
    Dropout(0.5),
    Dense(1, activation="sigmoid")
])

# --- Compilar el modelo ---
model.compile(optimizer="adam", loss="binary_crossentropy", metrics=["accuracy"])

# --- Entrenamiento ---
history = model.fit(
    train_generator,
    validation_data=validation_generator,
    epochs=10
)

# --- Guardar modelo ---
model.save(MODEL_PATH)
print("Modelo guardado en", MODEL_PATH)

# --- Liberar memoria ---
del df, train_datagen, train_generator, validation_generator
import gc
gc.collect()

Encontradas 18946 imágenes en /content/data/train_images
Primeros 10 nombres de archivo de imagen: ['ISIC_0067826.jpg', 'ISIC_0067865.jpg', 'ISIC_0060841.jpg', 'ISIC_0063801.jpg', 'ISIC_0057086.jpg', 'ISIC_0070564.jpg', 'ISIC_0053534.jpg', 'ISIC_0054565.jpg', 'ISIC_0068319.jpg', 'ISIC_0066448.jpg']
Unique values in 'diagnosis_1': ['Benign' 'Malignant']
Found 13362 validated image filenames belonging to 2 classes.
Found 3340 validated image filenames belonging to 2 classes.


  super().__init__(activity_regularizer=activity_regularizer, **kwargs)
  self._warn_if_super_not_called()


Epoch 1/10
[1m836/836[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m605s[0m 720ms/step - accuracy: 0.5913 - loss: 0.6786 - val_accuracy: 0.6817 - val_loss: 0.6166
Epoch 2/10
[1m836/836[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m584s[0m 697ms/step - accuracy: 0.6612 - loss: 0.6323 - val_accuracy: 0.6841 - val_loss: 0.6066
Epoch 3/10
[1m836/836[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m567s[0m 677ms/step - accuracy: 0.6716 - loss: 0.6167 - val_accuracy: 0.6985 - val_loss: 0.5983
Epoch 4/10
[1m836/836[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m550s[0m 658ms/step - accuracy: 0.6858 - loss: 0.6083 - val_accuracy: 0.6877 - val_loss: 0.6007
Epoch 5/10
[1m836/836[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m548s[0m 655ms/step - accuracy: 0.6974 - loss: 0.5894 - val_accuracy: 0.7021 - val_loss: 0.5940
Epoch 6/10
[1m836/836[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m584s[0m 699ms/step - accuracy: 0.6994 - loss: 0.5817 - val_accuracy: 0.7135 - val_loss: 0.5921
Epoc

6832