In [None]:
# %% [markdown]
# # 1. PRELIMINARES

# %%
import tensorflow as tf
from pathlib import Path
import tensorflow.keras as keras
import numpy as np
import pandas as pd
import os
from collections import Counter
from tensorflow.keras.models import load_model
from tensorflow.keras.applications import EfficientNetB0
from tensorflow.keras.layers import GlobalAveragePooling2D, Dense, Input
from tensorflow.keras import Model
from sklearn.model_selection import train_test_split

# %%
# GPU configuration
device_type = "GPU"
devices = tf.config.list_physical_devices(device_type)
if not devices:
    raise RuntimeError(f"No {device_type} devices are used in the host.")

# %%
seed = 42

# %% [markdown]
# # 2. CARGA DE DATOS

# %%
# Rutas de los datos
TRAIN_CSV = '/kaggle/input/dataset/labels/train_data.csv'
TRAIN_IMG_DIR = '/kaggle/input/dataset/images'
TEST_CSV = '/kaggle/input/test-all/labels/test_data.csv'
TEST_IMG_DIR = '/kaggle/input/test-all/images'

# %%
batch_size = 32
image_size = (224, 224)  # EfficientNetB0 input size

# Load CSV data
train_df = pd.read_csv(TRAIN_CSV)
test_df = pd.read_csv(TEST_CSV)

# Create combined labels for stratified split
train_df['combined_label'] = train_df['transitable'].astype(str) + "_" + train_df['Inundado'].astype(str)

# Split into train and validation
train_df, val_df = train_test_split(
    train_df,
    test_size=0.2,
    random_state=seed,
    stratify=train_df['combined_label']
)

# %% [markdown]
# # 3. PREPARACIÓN DE DATOS

# %%
def create_dataset(df, img_dir, batch_size, shuffle=True):
    def load_and_process_image(file_path, label):
        img = tf.io.read_file(file_path)
        img = tf.image.decode_jpeg(img, channels=3)
        img = tf.image.resize(img, image_size)
        img = tf.cast(img, tf.float32) / 255.0  # Normalize to [0,1]
        return img, {'transitable': label[0], 'inundado': label[1]}
    
    # Create file paths and labels
    file_paths = [os.path.join(img_dir, f"{id}.jpg") for id in df['ID']]
    labels = df[['transitable', 'Inundado']].values.astype(np.float32)
    
    dataset = tf.data.Dataset.from_tensor_slices((file_paths, labels))
    
    if shuffle:
        dataset = dataset.shuffle(buffer_size=len(file_paths), seed=seed)
    
    dataset = dataset.map(load_and_process_image, num_parallel_calls=tf.data.AUTOTUNE)
    dataset = dataset.batch(batch_size)
    dataset = dataset.prefetch(tf.data.AUTOTUNE)
    
    return dataset

# For test dataset
test_file_paths = [os.path.join(TEST_IMG_DIR, f"{id}.jpg") for id in test_df['ID']]
test_ds = tf.data.Dataset.from_tensor_slices(test_file_paths)

def load_and_process_image_test(file_path):
    img = tf.io.read_file(file_path)
    img = tf.image.decode_jpeg(img, channels=3)
    img = tf.image.resize(img, image_size)
    img = tf.cast(img, tf.float32) / 255.0
    return img

test_ds = test_ds.map(load_and_process_image_test, num_parallel_calls=tf.data.AUTOTUNE)
test_ds = test_ds.batch(batch_size)
test_ds = test_ds.prefetch(tf.data.AUTOTUNE)
# %% [markdown]
# # 4. CARGA DE MODELOS (VERSIÓN CORREGIDA)

# %%
model_files = [
    "/kaggle/input/modelo_20250329_184118/keras/default/1/modelo_20250329_184118.keras"
]

# SOLUCIÓN 1: Usar tf.keras en lugar de keras directo
try:
    models = [tf.keras.models.load_model(model_file) for model_file in model_files]
    print("MODELOS CARGADOS CON tf.keras.models.load_model")
except Exception as e:
    print(f"Error al cargar con tf.keras: {e}")
    
    # SOLUCIÓN 2: Usar TFSMLayer para Keras 3
    try:
        from keras.layers import TFSMLayer
        from keras import Input, Model
        
        models = []
        for model_file in model_files:
            # Crear capa que envuelve el SavedModel
            sm_layer = TFSMLayer(model_file, call_endpoint='serving_default')
            
            # Reconstruir el modelo completo
            inputs = Input(shape=(224, 224, 3))
            outputs = sm_layer(inputs)
            model = Model(inputs, outputs)
            models.append(model)
        
        print("MODELOS CARGADOS COMO TFSMLayer")
    except Exception as e2:
        print(f"Error al cargar con TFSMLayer: {e2}")
        raise

# %% [markdown]
# # 5. PREDICCIÓN PARA MODELO MULTI-SALIDA (ACTUALIZADO)

# %%
def predict_multi_output_model(model, test_ds):
    try:
        # Get predictions
        predictions = model.predict(test_ds)
        
        # Manejar diferentes formatos de salida
        if isinstance(predictions, dict):
            # Formato de diccionario (common en SavedModel)
            transitable_probs = predictions.get('transitable', predictions.get('output_0')).flatten()
            inundado_probs = predictions.get('inundado', predictions.get('output_1')).flatten()
        elif isinstance(predictions, list):
            # Formato de lista
            transitable_probs = predictions[0].flatten()
            inundado_probs = predictions[1].flatten()
        else:
            # Formato único (puede ser tensor)
            transitable_probs = predictions[:, 0].flatten()
            inundado_probs = predictions[:, 1].flatten()
        
        # Convertir a predicciones binarias
        transitable_preds = (transitable_probs > 0.5).astype(int)
        inundado_preds = (inundado_probs > 0.5).astype(int)
        
        # Crear etiquetas descriptivas
        labels = []
        for t, i in zip(transitable_preds, inundado_preds):
            if t == 1 and i == 0:
                labels.append("transitable_si-inundado_no")
            elif t == 0 and i == 1:
                labels.append("transitable_no-inundado_si")
            elif t == 1 and i == 1:
                labels.append("transitable_si-inundado_si")
            else:
                labels.append("transitable_no-inundado_no")
        
        return labels
    except Exception as e:
        print(f"Error en predict_multi_output_model: {e}")
        raise
# %% [markdown]
# # 6. ENSAMBLE Y GUARDADO DE RESULTADOS

# %%
def ensemble_predict_and_save(models, test_ds, output_path):
    # For single model case (no actual ensemble needed)
    if len(models) == 1:
        model = models[0]
        if isinstance(model.output, list) and len(model.output) > 1:
            predictions = predict_multi_output_model(model, test_ds)
        else:
            probabilities = model.predict(test_ds)
            predictions = np.argmax(probabilities, axis=1)
            if hasattr(model, 'class_names'):
                predictions = [model.class_names[p] for p in predictions]
        
        # Prepare results
        identifiers = test_df['ID'].values
        data = {"Id": identifiers, "Label": predictions}
        submission = pd.DataFrame(data).set_index("Id")
        submission.to_csv(output_path)
        return
    
    # For multiple models (ensemble)
    all_predictions = []
    
    for model in models:
        if isinstance(model.output, list) and len(model.output) > 1:
            predictions = predict_multi_output_model(model, test_ds)
        else:
            probabilities = model.predict(test_ds)
            predictions = np.argmax(probabilities, axis=1)
            if hasattr(model, 'class_names'):
                predictions = [model.class_names[p] for p in predictions]
        
        all_predictions.append(predictions)
    
    # Majority voting
    final_predictions = []
    for preds in zip(*all_predictions):
        most_common = Counter(preds).most_common(1)[0][0]
        final_predictions.append(most_common)
    
    # Save results
    identifiers = test_df['ID'].values
    data = {"Id": identifiers, "Label": final_predictions}
    submission = pd.DataFrame(data).set_index("Id")
    submission.to_csv(output_path)

# %% [markdown]
# # 7. EJECUCIÓN

# %%
# Execute prediction
ensemble_predict_and_save(models, test_ds, "submission.csv")
print("Predicciones completadas y guardadas en submission.csv")

# Show sample predictions
sample = pd.read_csv("submission.csv")
print("\nMuestra de predicciones:")
print(sample.head())