In [1]:
!pip install tensorflow keras scikit-learn matplotlib pandas numpy opencv-python keras-tuner

Collecting keras-tuner
  Downloading keras_tuner-1.4.7-py3-none-any.whl.metadata (5.4 kB)
Collecting kt-legacy (from keras-tuner)
  Downloading kt_legacy-1.0.5-py3-none-any.whl.metadata (221 bytes)
Downloading keras_tuner-1.4.7-py3-none-any.whl (129 kB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m129.1/129.1 kB[0m [31m10.4 MB/s[0m eta [36m0:00:00[0m
[?25hDownloading kt_legacy-1.0.5-py3-none-any.whl (9.6 kB)
Installing collected packages: kt-legacy, keras-tuner
Successfully installed keras-tuner-1.4.7 kt-legacy-1.0.5


In [2]:
!pip install gdown



In [3]:
from google.colab import drive
drive.mount('/content/drive')

Mounted at /content/drive


In [4]:
!unzip "/content/drive/MyDrive/sipakmed/SIPaKMeD.zip" -d "/content/sipakmed"

[1;30;43mSe han truncado las últimas 5000 líneas del flujo de salida.[0m
  inflating: /content/sipakmed/im_Parabasal/im_Parabasal/CROPPED/075_12_cyt.dat  
  inflating: /content/sipakmed/im_Parabasal/im_Parabasal/CROPPED/075_12_nuc.dat  
  inflating: /content/sipakmed/im_Parabasal/im_Parabasal/CROPPED/075_13.bmp  
  inflating: /content/sipakmed/im_Parabasal/im_Parabasal/CROPPED/075_13_cyt.dat  
  inflating: /content/sipakmed/im_Parabasal/im_Parabasal/CROPPED/075_13_nuc.dat  
  inflating: /content/sipakmed/im_Parabasal/im_Parabasal/CROPPED/075_14.bmp  
  inflating: /content/sipakmed/im_Parabasal/im_Parabasal/CROPPED/075_14_cyt.dat  
  inflating: /content/sipakmed/im_Parabasal/im_Parabasal/CROPPED/075_14_nuc.dat  
  inflating: /content/sipakmed/im_Parabasal/im_Parabasal/CROPPED/076_01.bmp  
  inflating: /content/sipakmed/im_Parabasal/im_Parabasal/CROPPED/076_01_cyt.dat  
  inflating: /content/sipakmed/im_Parabasal/im_Parabasal/CROPPED/076_01_nuc.dat  
  inflating: /content/sipakmed/im_P

In [5]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import tensorflow as tf
from tensorflow import keras
from sklearn.model_selection import train_test_split
from sklearn.metrics import classification_report, confusion_matrix
import seaborn as sns
import cv2
import os
from time import time
import kerastuner as kt
class_mapping = {
    "normal": 0,
    "superficial_intermediate": 1,
    "parabasal": 2,
    "metaplastic": 3,
    "adenocarcinoma": 4
}

# %% [code]
# Preprocesamiento de imágenes cervicales
import cv2
import os
import numpy as np

def preprocess_cervical_images(data_dir, img_size=(64, 64)):
    X = []
    y = []
    class_mapping = {}
    class_id = 0

    for class_folder in sorted(os.listdir(data_dir)):
        class_path = os.path.join(data_dir, class_folder)
        if os.path.isdir(class_path):
            # Subcarpeta con el mismo nombre
            inner_path = os.path.join(class_path, class_folder)
            if not os.path.isdir(inner_path):
                continue

            print(f"🔍 Procesando clase: {class_folder}...")
            class_mapping[class_folder] = class_id

            for root, _, files in os.walk(inner_path):
                for file in files:
                    if file.lower().endswith(('.bmp', '.jpg', '.jpeg', '.png')):
                        file_path = os.path.join(root, file)
                        img = cv2.imread(file_path)
                        if img is None:
                            print(f"❌ Imagen inválida o no se pudo leer: {file_path}")
                            continue
                        img = cv2.cvtColor(img, cv2.COLOR_BGR2RGB)
                        img = cv2.resize(img, img_size)
                        X.append(img)
                        y.append(class_id)

            class_id += 1

    print(f"✅ Total de imágenes cargadas: {len(X)}")
    return np.array(X), np.array(y)

# Cargar datos
X, y = preprocess_cervical_images("/content/sipakmed")
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, stratify=y)

# %% [code]
# Modelos a evaluar
def build_cnn_model(input_shape, num_classes):
    model = keras.Sequential([
        keras.layers.Conv2D(32, (3, 3), activation="relu", input_shape=input_shape),
        keras.layers.MaxPooling2D((2, 2)),
        keras.layers.Conv2D(64, (3, 3), activation="relu"),
        keras.layers.MaxPooling2D((2, 2)),
        keras.layers.Conv2D(128, (3, 3), activation="relu"),
        keras.layers.MaxPooling2D((2, 2)),
        keras.layers.Flatten(),
        keras.layers.Dense(128, activation="relu"),
        keras.layers.Dropout(0.5),
        keras.layers.Dense(num_classes, activation="softmax")
    ])
    return model

def build_resnet_model(input_shape, num_classes):
    base_model = keras.applications.ResNet50(
        include_top=False,
        weights="imagenet",
        input_shape=input_shape
    )
    base_model.trainable = False
    inputs = keras.Input(shape=input_shape)
    x = base_model(inputs, training=False)
    x = keras.layers.GlobalAveragePooling2D()(x)
    outputs = keras.layers.Dense(num_classes, activation="softmax")(x)
    return keras.Model(inputs, outputs)

# %% [code]
# Evaluación de modelos
models = {
    "CNN Simple": build_cnn_model(X_train.shape[1:], len(class_mapping)),
    "ResNet50": build_resnet_model(X_train.shape[1:], len(class_mapping))
}

results = {}

for name, model in models.items():
    print(f"Entrenando {name}...")
    start_time = time()

    model.compile(
        optimizer="adam",
        loss="sparse_categorical_crossentropy",
        metrics=["accuracy"]
    )

    history = model.fit(
        X_train, y_train,
        epochs=5,
        batch_size=64,
        validation_split=0.2,
        callbacks=[
            keras.callbacks.EarlyStopping(patience=3),
            keras.callbacks.ReduceLROnPlateau(factor=0.1, patience=2)
        ]
    )

    # Evaluación
    test_loss, test_acc = model.evaluate(X_test, y_test)
    y_pred = np.argmax(model.predict(X_test), axis=1)

    # Guardar resultados
    results[name] = {
    "model": model,
    "history": history,  # ✅ Agregado aquí
    "test_accuracy": test_acc,
    "classification_report": classification_report(y_test, y_pred, target_names=class_mapping.keys()),
    "confusion_matrix": confusion_matrix(y_test, y_pred),
    "training_time": time() - start_time
     }

# %% [code]
# Selección del mejor modelo
best_model_name = max(results, key=lambda x: results[x]["test_accuracy"])
best_model = results[best_model_name]["model"]
best_model.save("models/best_cervical_model.h5")

# %% [code]
# Optimización de hiperparámetros
def hyperparameter_tuning(hp):
    model = keras.Sequential()
    model.add(keras.layers.Conv2D(
        hp.Int("conv1_filters", 32, 128, step=32),
        (3, 3), activation="relu", input_shape=X_train.shape[1:]
    ))
    model.add(keras.layers.MaxPooling2D((2, 2)))

    for i in range(hp.Int("num_conv_layers", 1, 3)):
        model.add(keras.layers.Conv2D(
            hp.Int(f"conv{i+2}_filters", 64, 256, step=32),
            (3, 3), activation="relu"
        ))
        model.add(keras.layers.MaxPooling2D((2, 2)))

    model.add(keras.layers.Flatten())
    model.add(keras.layers.Dense(
        hp.Int("dense_units", 64, 256, step=32),
        activation="relu"
    ))
    model.add(keras.layers.Dense(len(class_mapping), activation="softmax"))

    model.compile(
        optimizer=keras.optimizers.Adam(hp.Float("learning_rate", 1e-4, 1e-2)),
        loss="sparse_categorical_crossentropy",
        metrics=["accuracy"]
    )
    return model

tuner = kt.RandomSearch(
    hyperparameter_tuning,
    objective="val_accuracy",
    max_trials=5,
    directory="tuning",
    project_name="cervical_cancer"
)

tuner.search(X_train, y_train, epochs=10, validation_split=0.2)

# Guardar modelo optimizado
best_hp_model = tuner.get_best_models(num_models=1)[0]
best_hp_model.save("models/optimized_cervical_model.h5")

# %% [code]
# Generar reportes gráficos
def generate_reports():
    os.makedirs("reports", exist_ok=True)

    # Matriz de confusión
    plt.figure(figsize=(10, 8))
    sns.heatmap(results[best_model_name]["confusion_matrix"], annot=True, fmt="d")
    plt.title("Matriz de Confusión")
    plt.xlabel("Predicción")
    plt.ylabel("Real")
    plt.savefig("reports/confusion_matrix.png")
    plt.close()

    # Curva de aprendizaje
    plt.figure(figsize=(10, 6))
    plt.plot(results[best_model_name]["history"].history["accuracy"], label="Train Accuracy")
    plt.plot(results[best_model_name]["history"].history["val_accuracy"], label="Validation Accuracy")
    plt.title("Curva de Aprendizaje")
    plt.xlabel("Época")
    plt.ylabel("Precisión")
    plt.legend()
    plt.savefig("reports/learning_curve.png")
    plt.close()

generate_reports()


Trial 5 Complete [00h 00m 24s]
val_accuracy: 0.7945205569267273

Best val_accuracy So Far: 0.7945205569267273
Total elapsed time: 00h 02m 03s


  saveable.load_own_variables(weights_store.get(inner_path))


In [None]:
import os

for root, dirs, files in os.walk("/content/sipakmed"):
    print(f"📂 Carpeta: {root}")
    for d in dirs:
        print(f"  📁 Subcarpeta: {d}")
    for f in files[:3]:  # Mostrar solo 3 archivos por carpeta
        print(f"  🖼️ Archivo: {f}")

📂 Carpeta: /content/sipakmed
  📁 Subcarpeta: im_Superficial-Intermediate
  📁 Subcarpeta: im_Dyskeratotic
  📁 Subcarpeta: im_Koilocytotic
  📁 Subcarpeta: im_Metaplastic
  📁 Subcarpeta: im_Parabasal
📂 Carpeta: /content/sipakmed/im_Superficial-Intermediate
  📁 Subcarpeta: im_Superficial-Intermediate
📂 Carpeta: /content/sipakmed/im_Superficial-Intermediate/im_Superficial-Intermediate
  📁 Subcarpeta: CROPPED
  🖼️ Archivo: 005_nuc05.dat
  🖼️ Archivo: 036_nuc02.dat
  🖼️ Archivo: 101_cyt05.dat
📂 Carpeta: /content/sipakmed/im_Superficial-Intermediate/im_Superficial-Intermediate/CROPPED
  🖼️ Archivo: 044_02_nuc.dat
  🖼️ Archivo: 053_02_cyt.dat
  🖼️ Archivo: 007_01_nuc.dat
📂 Carpeta: /content/sipakmed/im_Dyskeratotic
  📁 Subcarpeta: im_Dyskeratotic
📂 Carpeta: /content/sipakmed/im_Dyskeratotic/im_Dyskeratotic
  📁 Subcarpeta: CROPPED
  🖼️ Archivo: 005_nuc05.dat
  🖼️ Archivo: 036_nuc02.dat
  🖼️ Archivo: 101_cyt05.dat
📂 Carpeta: /content/sipakmed/im_Dyskeratotic/im_Dyskeratotic/CROPPED
  🖼️ Archivo: 

In [8]:
requirements = """
streamlit==1.22.0
tensorflow==2.10.0
numpy==1.23.5
opencv-python==4.6.0.66
matplotlib==3.6.2
seaborn==0.12.1
pillow==9.3.0
scikit-learn==1.2.0
keras-tuner==1.1.3
"""

with open("requirements.txt", "w") as f:
    f.write(requirements.strip())

In [8]:
dockerfile = """
FROM python:3.9-slim

WORKDIR /app

COPY requirements.txt .
COPY app.py .
COPY models/ ./models/
COPY reports/ ./reports/

RUN apt-get update && apt-get install -y \
    libgl1-mesa-glx \
    libglib2.0-0 \
    && rm -rf /var/lib/apt/lists/*

RUN pip install --no-cache-dir -r requirements.txt

EXPOSE 8501

CMD ["streamlit", "run", "app.py", "--server.port=8501", "--server.address=0.0.0.0"]


"""

with open("Dockerfile", "w") as f:
    f.write(dockerfile.strip())

In [11]:
codigo_main = """
import tensorflow as tf
# Tu código de carga de modelo, inferencia o entrenamiento aquí
print("✅ Proyecto ejecutado desde Docker correctamente.")
"""

with open("main.py", "w") as f:
    f.write(codigo_main.strip())

In [12]:
main_code = '''
import numpy as np
import os
import cv2
import tensorflow as tf
from tensorflow import keras
from sklearn.metrics import classification_report
from sklearn.model_selection import train_test_split

# Mapeo de clases
class_mapping = {
    "normal": 0,
    "superficial_intermediate": 1,
    "parabasal": 2,
    "metaplastic": 3,
    "adenocarcinoma": 4
}

# Preprocesamiento de imágenes
def preprocess_cervical_images(data_dir, img_size=(224, 224)):
    X, y = [], []
    class_id = 0
    for class_folder in sorted(os.listdir(data_dir)):
        class_path = os.path.join(data_dir, class_folder)
        inner_path = os.path.join(class_path, class_folder)
        if not os.path.isdir(inner_path): continue

        for root, _, files in os.walk(inner_path):
            for file in files:
                if file.lower().endswith(('.jpg', '.png', '.jpeg', '.bmp')):
                    img = cv2.imread(os.path.join(root, file))
                    if img is None:
                        print("❌ Error al leer:", file)
                        continue
                    img = cv2.cvtColor(img, cv2.COLOR_BGR2RGB)
                    img = cv2.resize(img, img_size)
                    X.append(img)
                    y.append(class_id)
        class_id += 1
    return np.array(X), np.array(y)

# Ejecutar flujo principal
def main():
    print("📁 Cargando imágenes...")
    X, y = preprocess_cervical_images("sipakmed")
    X = X / 255.0  # Normalización
    X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, stratify=y)

    print("📦 Cargando modelo...")
    model = keras.models.load_model("models/optimized_cervical_model.h5")

    print("🔎 Evaluando...")
    loss, acc = model.evaluate(X_test, y_test)
    print(f"✅ Precisión en test: {acc:.4f}")

    y_pred = np.argmax(model.predict(X_test), axis=1)
    print("\\n📊 Reporte de Clasificación:")
    print(classification_report(y_test, y_pred, target_names=class_mapping.keys()))

if __name__ == "__main__":
    main()
'''

with open("main.py", "w") as f:
    f.write(main_code.strip())