In [1]:
# Montar Google Drive si los archivos están allí
from google.colab import drive
drive.mount('/content/drive')
print("Google Drive montado.")

Mounted at /content/drive
Google Drive montado.


In [18]:
# @title Celda de Configuración y Carga de Artefactos

import os
import tensorflow as tf
import joblib
import pandas as pd
import numpy as np
import typing # Para anotaciones de tipo
import random
print(f"TensorFlow Version: {tf.__version__}")
print(f"Keras Version (via TF): {tf.keras.__version__}")

# Esta es la ruta donde tienes tu carpeta mlp_v1 con los archivos
# Basado en tu código de guardado:
base_model_path = '/content/drive/MyDrive/Proyecto API/RedNeuronal/mlp_v1'


TF_SAVED_MODEL_PATH = base_model_path
OHE_PKL_FILENAME = "ohe_mlp_cat.pkl"
SCALER_PKL_FILENAME = "scaler_mlp_num.pkl"
OHE_PATH = os.path.join(base_model_path, OHE_PKL_FILENAME)
SCALER_PATH = os.path.join(base_model_path, SCALER_PKL_FILENAME)

print(f"\nRuta al SavedModel: {TF_SAVED_MODEL_PATH}")
print(f"Ruta al OHE: {OHE_PATH}")
print(f"Ruta al Scaler: {SCALER_PATH}")


# --- CONFIGURACIÓN DEL MODELO (Debe coincidir con la app de Streamlit) ---
_NUMERICAL_COLS_FOR_MODEL_PREPROCESSING = ['price_t0_log', 'days_diff']
_CATEGORICAL_COLS_FOR_MODEL_PREPROCESSING = [
    'artist_name', 'pokemon_name', 'rarity',
    'set_name', 'types', 'supertype', 'subtypes'
]
_MODEL_INPUT_TENSOR_KEY_NAME = 'inputs' # De saved_model_cli
_MODEL_OUTPUT_TENSOR_KEY_NAME = 'output_0' # De saved_model_cli
_TARGET_PREDICTED_IS_LOG_TRANSFORMED = True # Tu modelo predice log1p
DEFAULT_DAYS_DIFF_FOR_PREDICTION = 29.0    # El valor constante usado en el entrenamiento MLP


# --- Carga de Artefactos ---
def load_tf_model_as_layer(model_path):
    saved_model_pb_path = os.path.join(model_path, "saved_model.pb")
    if not os.path.exists(saved_model_pb_path):
        print(f"ERROR: 'saved_model.pb' no encontrado en la ruta: {model_path}")
        return None
    try:
        model_as_layer_obj = tf.keras.layers.TFSMLayer(model_path, call_endpoint='serving_default')
        print(f"✅ SavedModel cargado exitosamente como TFSMLayer.")
        try: print(f"   Call Signature: {model_as_layer_obj._call_signature}")
        except AttributeError: print("   No se pudo acceder a '_call_signature'.")
        print(f"   Clave de salida configurada: '{_MODEL_OUTPUT_TENSOR_KEY_NAME}'.")
        return model_as_layer_obj
    except Exception as e:
        print(f"❌ ERROR al cargar SavedModel como TFSMLayer desde {model_path}: {e}")
        return None

def load_preprocessor(file_path, preprocessor_name="Preprocessor"):
    if not os.path.exists(file_path):
        print(f"❌ ERROR: Archivo '{preprocessor_name}' no encontrado en: {file_path}")
        return None
    try:
        preprocessor = joblib.load(file_path)
        print(f"✅ {preprocessor_name} cargado exitosamente desde: {file_path}")
        return preprocessor
    except Exception as e:
        print(f"❌ ERROR al cargar {preprocessor_name} desde {file_path}: {e}")
        return None

# Cargar los artefactos
print("\nCargando artefactos del modelo...")
local_tf_model_layer = load_tf_model_as_layer(TF_SAVED_MODEL_PATH)
ohe_local_preprocessor = load_preprocessor(OHE_PATH, "OneHotEncoder")
scaler_local_preprocessor = load_preprocessor(SCALER_PATH, "ScalerNumérico")

if local_tf_model_layer and ohe_local_preprocessor and scaler_local_preprocessor:
    print("\n✅ Todos los artefactos del modelo cargados correctamente.")
else:
    print("\n❌ Error: No se pudieron cargar todos los artefactos. No se podrán realizar pruebas.")

TensorFlow Version: 2.18.0
Keras Version (via TF): 3.8.0

Ruta al SavedModel: /content/drive/MyDrive/Proyecto API/RedNeuronal/mlp_v1
Ruta al OHE: /content/drive/MyDrive/Proyecto API/RedNeuronal/mlp_v1/ohe_mlp_cat.pkl
Ruta al Scaler: /content/drive/MyDrive/Proyecto API/RedNeuronal/mlp_v1/scaler_mlp_num.pkl

Cargando artefactos del modelo...
✅ SavedModel cargado exitosamente como TFSMLayer.
   Call Signature: (inputs, training=False, **kwargs)
   Clave de salida configurada: 'output_0'.
✅ OneHotEncoder cargado exitosamente desde: /content/drive/MyDrive/Proyecto API/RedNeuronal/mlp_v1/ohe_mlp_cat.pkl
✅ ScalerNumérico cargado exitosamente desde: /content/drive/MyDrive/Proyecto API/RedNeuronal/mlp_v1/scaler_mlp_num.pkl

✅ Todos los artefactos del modelo cargados correctamente.


In [19]:
# @title Celda de la Función de Predicción

def predict_price_with_local_tf_layer(
    model_layer: tf.keras.layers.TFSMLayer,
    ohe: typing.Any, # OneHotEncoder
    scaler: typing.Any, # StandardScaler
    card_data_series: pd.Series # Una Series de Pandas con los datos de la carta
) -> float | None:
    print(f"\n--- Iniciando predicción para carta ID: {card_data_series.get('id', 'N/A')} ---")

    if not model_layer or not ohe or not scaler:
        print("ERROR: Modelo TFSMLayer o preprocesadores no disponibles.")
        return None

    try:
        # --- PASO 1: Preparar DataFrame de entrada para preprocesamiento ---
        # Replicamos exactamente la lógica de mapeo desde los datos de entrada
        data_for_preprocessing_df_dict = {}

        # Columnas Numéricas
        current_price = card_data_series.get('price')
        if pd.notna(current_price) and current_price > 0:
            data_for_preprocessing_df_dict['price_t0_log'] = np.log1p(current_price)
        else:
            data_for_preprocessing_df_dict['price_t0_log'] = np.log1p(0)
            print(f"WARNING: Precio actual no válido ('{current_price}') para 'price_t0_log', usando np.log1p(0).")
        data_for_preprocessing_df_dict['days_diff'] = float(DEFAULT_DAYS_DIFF_FOR_PREDICTION)


        # Columnas Categóricas
        data_for_preprocessing_df_dict['artist_name'] = str(card_data_series.get('artist', 'Unknown_Artist'))
        data_for_preprocessing_df_dict['pokemon_name'] = str(card_data_series.get('pokemon_name', 'Unknown_Pokemon'))
        data_for_preprocessing_df_dict['rarity'] = str(card_data_series.get('rarity', 'Unknown_Rarity'))
        data_for_preprocessing_df_dict['set_name'] = str(card_data_series.get('set_name', 'Unknown_Set'))
        data_for_preprocessing_df_dict['supertype'] = str(card_data_series.get('supertype', 'Unknown_Supertype'))

        types_val = card_data_series.get('types')
        if isinstance(types_val, list) and types_val: data_for_preprocessing_df_dict['types'] = str(types_val[0]) if pd.notna(types_val[0]) else 'Unknown_Type'
        elif pd.notna(types_val): data_for_preprocessing_df_dict['types'] = str(types_val)
        else: data_for_preprocessing_df_dict['types'] = 'Unknown_Type'

        subtypes_val = card_data_series.get('subtypes')
        if isinstance(subtypes_val, list) and subtypes_val:
            cleaned_subtypes = [str(s) for s in subtypes_val if pd.notna(s)]
            data_for_preprocessing_df_dict['subtypes'] = ', '.join(sorted(list(set(cleaned_subtypes)))) if cleaned_subtypes else 'None'
        elif pd.notna(subtypes_val): data_for_preprocessing_df_dict['subtypes'] = str(subtypes_val)
        else: data_for_preprocessing_df_dict['subtypes'] = 'None'


        current_input_df_for_preprocessing = pd.DataFrame([data_for_preprocessing_df_dict])
        ordered_cols_for_df = _NUMERICAL_COLS_FOR_MODEL_PREPROCESSING + _CATEGORICAL_COLS_FOR_MODEL_PREPROCESSING
        try:
            current_input_df_for_preprocessing = current_input_df_for_preprocessing[ordered_cols_for_df]
        except KeyError as e_key:
            missing_keys_in_df = [col for col in ordered_cols_for_df if col not in current_input_df_for_preprocessing.columns]
            print(f"ERROR: Error al ordenar columnas para preprocesamiento. Faltan: {missing_keys_in_df}. Error: {e_key}")
            return None
        print(f"INFO: DataFrame para preprocesamiento (1 fila): {current_input_df_for_preprocessing.shape}. Cols: {list(current_input_df_for_preprocessing.columns)}")
        print(f"DEBUG: Valores: {current_input_df_for_preprocessing.iloc[0].to_dict()}")


        # --- PASO 2: Aplicar preprocesamiento (Scaler y OneHotEncoder) ---
        processed_feature_parts = []
        if _NUMERICAL_COLS_FOR_MODEL_PREPROCESSING:
            num_df_slice = current_input_df_for_preprocessing[_NUMERICAL_COLS_FOR_MODEL_PREPROCESSING]
            if num_df_slice.isnull().values.any():
                print(f"WARNING: NaNs ANTES de escalar: {num_df_slice.isnull().sum().to_dict()}. Imputando con 0.")
                num_df_slice = num_df_slice.fillna(0)
            numerical_features_scaled_array = scaler.transform(num_df_slice)
            processed_feature_parts.append(numerical_features_scaled_array)
            print(f"INFO: Numéricas escaladas (shape): {numerical_features_scaled_array.shape}")

        if _CATEGORICAL_COLS_FOR_MODEL_PREPROCESSING:
            cat_df_slice = current_input_df_for_preprocessing[_CATEGORICAL_COLS_FOR_MODEL_PREPROCESSING].astype(str)
            categorical_features_encoded_dense_array = ohe.transform(cat_df_slice)
            processed_feature_parts.append(categorical_features_encoded_dense_array)
            print(f"INFO: Categóricas codificadas (shape): {categorical_features_encoded_dense_array.shape}")

        if not processed_feature_parts:
            print("ERROR: No se procesaron características.")
            return None

        # --- PASO 3: Combinar ---
        final_input_array_for_model = np.concatenate(processed_feature_parts, axis=1)
        print(f"INFO: Array final para modelo (shape): {final_input_array_for_model.shape}")

        EXPECTED_NUM_FEATURES = 4865 # Confirmado por saved_model_cli
        if final_input_array_for_model.shape[1] != EXPECTED_NUM_FEATURES:
            print(f"ERROR: ¡¡¡DESAJUSTE DE SHAPE EN LA ENTRADA DEL MODELO!!!")
            print(f"    Modelo espera: {EXPECTED_NUM_FEATURES} características.")
            print(f"    Array preprocesado tiene: {final_input_array_for_model.shape[1]} características.")
            if 'numerical_features_scaled_array' in locals(): print(f"    Shape numéricas escaladas: {numerical_features_scaled_array.shape}")
            if 'categorical_features_encoded_dense_array' in locals(): print(f"    Shape categóricas OHE: {categorical_features_encoded_dense_array.shape}")
            return None

        # --- PASO 4: Predicción ---
        final_input_tensor_for_model = tf.convert_to_tensor(final_input_array_for_model, dtype=tf.float32)
        print(f"INFO: Tensor de entrada para TFSMLayer (shape): {final_input_tensor_for_model.shape}, dtype: {final_input_tensor_for_model.dtype}")

        model_input_feed_dict = {_MODEL_INPUT_TENSOR_KEY_NAME: final_input_tensor_for_model}
        print(f"INFO: Llamando a TFSMLayer con diccionario desempaquetado: Clave='{_MODEL_INPUT_TENSOR_KEY_NAME}'")
        raw_prediction_output = model_layer(**model_input_feed_dict)

        print(f"INFO: Salida cruda de TFSMLayer (tipo {type(raw_prediction_output)}): {raw_prediction_output}")

        if not isinstance(raw_prediction_output, dict):
            if tf.is_tensor(raw_prediction_output): predicted_value_tensor = raw_prediction_output
            else:
                print(f"ERROR: Salida de TFSMLayer no es ni dict ni tensor, es {type(raw_prediction_output)}.")
                return None
        elif not raw_prediction_output:
            print("ERROR: Dict de salida vacío.")
            return None
        elif _MODEL_OUTPUT_TENSOR_KEY_NAME not in raw_prediction_output:
            available_keys = list(raw_prediction_output.keys())
            print(f"ERROR: Clave '{_MODEL_OUTPUT_TENSOR_KEY_NAME}' NO en dict. Claves: {available_keys}")
            return None
        else: predicted_value_tensor = raw_prediction_output[_MODEL_OUTPUT_TENSOR_KEY_NAME]
        print(f"INFO: Tensor (clave '{_MODEL_OUTPUT_TENSOR_KEY_NAME}' si dict). Shape: {predicted_value_tensor.shape}")

        if predicted_value_tensor.shape == (1, 1) or predicted_value_tensor.shape == (1,):
            predicted_value_numeric = predicted_value_tensor.numpy()[0][0] if len(predicted_value_tensor.shape) == 2 else predicted_value_tensor.numpy()[0]
        else:
            print(f"ERROR: Shape tensor predicción: {predicted_value_tensor.shape}. Esperada (1,1) o (1,).")
            return None
        print(f"INFO: Valor numérico extraído: {predicted_value_numeric}")

        # --- PASO 5: Postprocesar ---
        if _TARGET_PREDICTED_IS_LOG_TRANSFORMED:
             final_predicted_price = np.expm1(predicted_value_numeric) # Usar expm1
        else: final_predicted_price = predicted_value_numeric
        print(f"INFO: Predicción final: {final_predicted_price}")

        return float(final_predicted_price)

    except Exception as e:
        print(f"ERROR: Excepción en predicción: {e}")
        import traceback
        traceback.print_exc()
        return None

In [20]:
# @title Celda de Prueba de Predicción con 10 Cartas Aleatorias

# --- PASO 1: Cargar o Usar all_card_metadata_df ---
# (Asegúrate que all_card_metadata_df esté cargado y contenga 'name' y otras columnas de metadatos)
if 'all_card_metadata_df' not in locals() or all_card_metadata_df.empty:
    print("ERROR: El DataFrame 'all_card_metadata_df' no está cargado o está vacío.")
    print("Por favor, asegúrate de cargar tus metadatos antes de ejecutar esta celda.")
    # Aquí podrías añadir el código para cargar all_card_metadata_df si es necesario
    # Ejemplo (si tienes bq_client configurado en Colab y la función get_card_metadata_with_base_names):
    # print("Intentando cargar all_card_metadata_df desde BigQuery...")
    # all_card_metadata_df = get_card_metadata_with_base_names(bq_client)
    # if all_card_metadata_df.empty:
    #     print("Fallo al cargar metadatos desde BQ para la prueba.")
    # else:
    #     print(f"Metadatos cargados desde BQ para la prueba: {len(all_card_metadata_df)} filas.")

else: # all_card_metadata_df está disponible
    print(f"\n--- Ejecutando pruebas de predicción con 10 cartas aleatorias ---")
    print(f"Metadatos disponibles: {len(all_card_metadata_df)} cartas.")

    # --- PASO 2: Seleccionar 10 cartas aleatorias con imagen ---
    cards_with_image = all_card_metadata_df[
        pd.notna(all_card_metadata_df['images_large']) & (all_card_metadata_df['images_large'] != '')
    ].copy()

    if cards_with_image.empty:
        print("ERROR: No se encontraron cartas con URLs de imagen en los metadatos.")
    else:
        num_cards_to_sample = min(10, len(cards_with_image))
        print(f"Seleccionando {num_cards_to_sample} cartas aleatorias con imagen.")
        sample_indices = random.sample(cards_with_image.index.tolist(), num_cards_to_sample)

        # Crear el DataFrame de prueba usando las columnas de all_card_metadata_df
        # Y asegurando que los nombres coincidan con lo que espera card_data_series
        # en la función de predicción.
        test_cards_df = cards_with_image.loc[sample_indices].copy()
        test_cards_df.rename(columns={'name': 'pokemon_name_original_meta'}, inplace=True) # Renombrar 'name' para evitar confusión si hay otra
                                                                                     # columna 'pokemon_name' que se use para el OHE

        # Simular precios (o cargar precios reales si los tienes)
        test_cards_df['price'] = np.random.uniform(1.0, 50.0, size=len(test_cards_df)) # Precios más realistas para prueba
        print("\nDEBUG: Precios simulados (rango 1-50) añadidos para prueba.")
        print("DEBUG: Columnas en test_cards_df:", test_cards_df.columns.tolist())


        # --- PASO 3: Ejecutar la predicción para cada carta ---
        print("\n--- Ejecutando predicciones ---")
        predictions = {}

        if local_tf_model_layer and ohe_local_preprocessor and scaler_local_preprocessor:
            for index, card_row in test_cards_df.iterrows():
                # Crear la card_data_series para la función de predicción
                # Asegúrate de que las claves aquí coincidan con lo que card_data_series.get() espera
                # dentro de predict_price_with_local_tf_layer

                # Mapeo explícito para asegurar que los nombres son los correctos
                # para la función de predicción.
                input_series_data = {
                    'id': card_row.get('id'),
                    'pokemon_name': card_row.get('pokemon_name_original_meta'), # Usar el nombre original de la carta
                    'supertype': card_row.get('supertype'),
                    'subtypes': card_row.get('subtypes'),
                    'types': card_row.get('types'),
                    'rarity': card_row.get('rarity'),
                    'set_name': card_row.get('set_name'),
                    'artist': card_row.get('artist'), # 'artist' es el nombre en all_card_metadata_df
                    'price': card_row.get('price')
                    # Añade otras columnas si tu función de predicción las espera de card_data_series
                }
                card_data_series_for_prediction = pd.Series(input_series_data)

                card_name_display = card_row.get('pokemon_name_original_meta')
                current_price = card_row.get('price')

                print(f"\nPrediciendo para: {card_name_display} (ID: {card_row.get('id')}, Precio Actual Sim.: {current_price:.2f}€)")
                print(f"DEBUG (Pre-Predicción): card_data_series_for_prediction['pokemon_name'] = {card_data_series_for_prediction.get('pokemon_name')}")


                if pd.notna(current_price):
                    predicted_price = predict_price_with_local_tf_layer(
                        local_tf_model_layer,
                        ohe_local_preprocessor,
                        scaler_local_preprocessor,
                        card_data_series_for_prediction # Usar la Series preparada
                    )

                    if predicted_price is not None:
                        predictions[card_row.get('id')] = {
                            'name': card_name_display,
                            'current_price': current_price,
                            'predicted_price': predicted_price,
                            'delta': predicted_price - current_price
                        }
                        print(f"INFO: Predicción completada. Predicho: {predicted_price:.2f}€")
                    else:
                        print(f"WARNING: Falló la predicción para {card_name_display} ({card_row.get('id')}).")
                else:
                    print(f"INFO: Saltando predicción para {card_name_display} ({card_row.get('id')}) - Precio actual no disponible.")

            # --- PASO 4: Mostrar un resumen de los resultados ---
            print("\n--- Resumen de Predicciones ---")
            if predictions:
                summary_df = pd.DataFrame.from_dict(predictions, orient='index')
                print(summary_df[['name', 'current_price', 'predicted_price', 'delta']].to_string())
            else:
                print("\nNo se realizaron predicciones exitosas.")
        else:
            print("\n❌ Error: Los artefactos del modelo no se cargaron. No se pueden realizar predicciones.")


--- Ejecutando pruebas de predicción con 10 cartas aleatorias ---
Metadatos disponibles: 18876 cartas.
Seleccionando 10 cartas aleatorias con imagen.

DEBUG: Precios simulados (rango 1-50) añadidos para prueba.
DEBUG: Columnas en test_cards_df: ['id', 'pokemon_name_original_meta', 'supertype', 'subtypes', 'rarity', 'set_id', 'set_name', 'number', 'artist', 'types', 'ancientTrait', 'images_small', 'images_large', 'cardmarket_url', 'cardmarket_updatedAt', 'tcgplayer_url', 'tcgplayer_updatedAt', 'price']

--- Ejecutando predicciones ---

Prediciendo para: M Mewtwo-EX (ID: xy8-63, Precio Actual Sim.: 8.37€)
DEBUG (Pre-Predicción): card_data_series_for_prediction['pokemon_name'] = M Mewtwo-EX

--- Iniciando predicción para carta ID: xy8-63 ---
INFO: DataFrame para preprocesamiento (1 fila): (1, 9). Cols: ['price_t0_log', 'days_diff', 'artist_name', 'pokemon_name', 'rarity', 'set_name', 'types', 'supertype', 'subtypes']
DEBUG: Valores: {'price_t0_log': 2.238026713912398, 'days_diff': 29.0, 

In [22]:
# @title Celda 1: Carga de Pipelines LightGBM y Threshold JSON

import json
import joblib
import pandas as pd
import numpy as np
import os
from google.colab import drive
import typing


# --- RUTAS (¡¡¡AJUSTA ESTAS!!!) ---
BASE_PROJECT_PATH = '/content/drive/MyDrive/Proyecto API/Modelos' # Carpeta que contiene High/, Low/, threshold.json

# ASUME que guardaste el 'best_high' si hiciste RandomizedSearchCV, o 'pipe_high' si no.
# Si guardaste 'best_high', el archivo .pkl debe contener ese objeto.
MODEL_HIGH_PKL_PATH = os.path.join('/content/drive/MyDrive/Proyecto API/Modelos/High/modelo_pipe_high.pkl')
MODEL_LOW_PKL_PATH = os.path.join('/content/drive/MyDrive/Proyecto API/Modelos/Low/modelo_pipe_low.pkl')
THRESHOLD_JSON_PATH = os.path.join('/content/drive/MyDrive/Proyecto API/Modelos/threshold.json')

# --- Cargar Pipelines .pkl ---
pipeline_high_loaded = None
pipeline_low_loaded = None

try:
    if os.path.exists(MODEL_HIGH_PKL_PATH):
        pipeline_high_loaded = joblib.load(MODEL_HIGH_PKL_PATH)
        print(f"✅ Pipeline High (.pkl) cargado desde: {MODEL_HIGH_PKL_PATH}")
        print(f"   Tipo de Pipeline High: {type(pipeline_high_loaded)}")
        if hasattr(pipeline_high_loaded, 'steps'): print(f"   Pasos del Pipeline High: {pipeline_high_loaded.steps}")
    else: print(f"AVISO: Archivo de Pipeline High no encontrado en {MODEL_HIGH_PKL_PATH}")
except Exception as e: print(f"❌ ERROR al cargar Pipeline High (.pkl): {e}")

try:
    if os.path.exists(MODEL_LOW_PKL_PATH):
        pipeline_low_loaded = joblib.load(MODEL_LOW_PKL_PATH)
        print(f"✅ Pipeline Low (.pkl) cargado desde: {MODEL_LOW_PKL_PATH}")
        print(f"   Tipo de Pipeline Low: {type(pipeline_low_loaded)}")
        if hasattr(pipeline_low_loaded, 'steps'): print(f"   Pasos del Pipeline Low: {pipeline_low_loaded.steps}")
    else: print(f"AVISO: Archivo de Pipeline Low no encontrado en {MODEL_LOW_PKL_PATH}")
except Exception as e: print(f"❌ ERROR al cargar Pipeline Low (.pkl): {e}")


# --- Cargar Threshold JSON ---
try:
    if os.path.exists(THRESHOLD_JSON_PATH):
        with open(THRESHOLD_JSON_PATH, 'r') as f:
            threshold_config_loaded = json.load(f)
        print(f"✅ Threshold JSON cargado: {threshold_config_loaded}")
    else:
        print(f"AVISO: Archivo Threshold JSON no encontrado en {THRESHOLD_JSON_PATH}")
        threshold_config_loaded = {"threshold": 30.0} # Default si no se encuentra
except Exception as e:
    print(f"❌ ERROR al cargar Threshold JSON: {e}")
    threshold_config_loaded = {"threshold": 30.0}

# --- CONFIGURACIÓN BASADA EN TU SCRIPT DE ENTRENAMIENTO ---
# Columnas que esperan los pipelines en el DataFrame de entrada
NUMERIC_FEATURES_INPUT = ['cm_avg1', 'cm_avg7', 'cm_avg30', 'cm_trendPrice']
CATEGORICAL_FEATURES_INPUT = ['rarity', 'supertype', 'subtypes', 'types', 'set_name']
ALL_INPUT_COLS_FOR_PIPELINES = NUMERIC_FEATURES_INPUT + CATEGORICAL_FEATURES_INPUT

TARGET_IS_LOG_TRANSFORMED = True # Ambos pipelines predicen log1p

if pipeline_high_loaded or pipeline_low_loaded:
    print("\n✅ Al menos un pipeline cargado.")
else:
    print("\n❌ Error: Ningún pipeline (High o Low) pudo ser cargado. No se podrán realizar pruebas.")

✅ Pipeline High (.pkl) cargado desde: /content/drive/MyDrive/Proyecto API/Modelos/High/modelo_pipe_high.pkl
   Tipo de Pipeline High: <class 'sklearn.pipeline.Pipeline'>
   Pasos del Pipeline High: [('preprocessor', ColumnTransformer(transformers=[('num', StandardScaler(),
                                 ['cm_avg1', 'cm_avg7', 'cm_avg30',
                                  'cm_trendPrice']),
                                ('cat', OneHotEncoder(handle_unknown='ignore'),
                                 ['rarity', 'supertype', 'subtypes', 'types',
                                  'set_name'])])), ('regressor', LGBMRegressor(force_row_wise=True, n_estimators=200, objective='mae',
              random_state=42, verbosity=-1))]
✅ Pipeline Low (.pkl) cargado desde: /content/drive/MyDrive/Proyecto API/Modelos/Low/modelo_pipe_low.pkl
   Tipo de Pipeline Low: <class 'sklearn.pipeline.Pipeline'>
   Pasos del Pipeline Low: [('preprocessor', ColumnTransformer(remainder='passthrough',
           

In [23]:
# @title Celda 2: Lógica de Decisión y Predicción con Pipelines .pkl

import pandas as pd # Asegurar que pandas esté importado en esta celda si se ejecuta de forma aislada
import numpy as np  # Asegurar que numpy esté importado
import typing       # Asegurar que typing esté importado

def get_prediction_with_lgbm_pipelines(
    input_df_row: pd.DataFrame,
    config: dict,
    # Estas variables vienen del scope global del notebook (definidas en Celda 1)
    pipe_high, # Debería ser pipeline_high_loaded
    pipe_low   # Debería ser pipeline_low_loaded
) -> typing.Tuple[typing.Optional[float], str]:

    chosen_pipeline_name = "None"
    final_prediction = None

    # Verificar si los pipelines fueron cargados (vienen de Celda 1)
    if pipe_high is None and pipe_low is None:
        print("ERROR en get_prediction: Ningún pipeline (High o Low) está cargado.")
        return None, "Error_No_Pipelines"

    try:
        # INPUT_COLS_FOR_PIPELINES también debe estar disponible (definida en Celda 1)
        X_new_predict = input_df_row[INPUT_COLS_FOR_PIPELINES]
    except NameError:
        print("ERROR en get_prediction: INPUT_COLS_FOR_PIPELINES no definida. Ejecuta la Celda 1.")
        return None, "Error_Config_Missing"
    except KeyError as e:
        print(f"ERROR en get_prediction: Faltan columnas en input_df_row: {e}")
        return None, "Error_Input_Cols"

    cm_avg7_value_for_decision = X_new_predict['cm_avg7'].iloc[0]
    # threshold_config_loaded debe estar disponible (de Celda 1)
    threshold_val_from_config = config.get('threshold', 30.0)
    active_pipe = None

    if pd.notna(cm_avg7_value_for_decision) and cm_avg7_value_for_decision >= threshold_val_from_config:
        if pipe_high:
            active_pipe = pipe_high
            chosen_pipeline_name = "High"
            print(f"INFO: Usando Pipeline High (cm_avg7: {cm_avg7_value_for_decision:.2f}€ >= {threshold_val_from_config:.2f}€)")
        elif pipe_low:
            active_pipe = pipe_low
            chosen_pipeline_name = "Low (Fallback from High)"
            print(f"INFO: Pipeline High no disponible, usando Pipeline Low como fallback.")
        else: print("ERROR: Pipeline High requerido pero no disponible (pipe_high es None).")
    else:
        if pipe_low:
            active_pipe = pipe_low
            chosen_pipeline_name = "Low"
            print(f"INFO: Usando Pipeline Low (cm_avg7: {cm_avg7_value_for_decision:.2f}€ < {threshold_val_from_config:.2f}€ o NaN)")
        elif pipe_high:
            active_pipe = pipe_high
            chosen_pipeline_name = "High (Fallback from Low)"
            print(f"INFO: Pipeline Low no disponible, usando Pipeline High como fallback.")
        else: print("ERROR: Pipeline Low requerido pero no disponible (pipe_low es None).")

    if active_pipe:
        try:
            print(f"  DEBUG: DataFrame de entrada para pipeline '{chosen_pipeline_name}':\n{X_new_predict.to_string(index=False)}")
            pred_log = active_pipe.predict(X_new_predict)
            pred_numeric = pred_log[0]
            # TARGET_IS_LOG_TRANSFORMED debe estar disponible (de Celda 1)
            final_prediction = np.expm1(pred_numeric) if TARGET_IS_LOG_TRANSFORMED else pred_numeric
            print(f"  INFO: Predicción cruda (log): {pred_numeric:.4f}, Predicción final (€): {final_prediction:.2f}")
        except Exception as e_pipe:
            print(f"  ERROR en Pipeline '{chosen_pipeline_name}': {e_pipe}")
            import traceback
            traceback.print_exc()
            final_prediction = None
    else:
        print("ERROR: Ningún pipeline fue seleccionado (active_pipe es None).")

    return final_prediction, chosen_pipeline_name

In [24]:
# @title Celda 3: Cargar Datos de Prueba y Ejecutar Predicciones con Pipelines .pkl

import pandas as pd # Asegurar que pandas esté importado
import numpy as np  # Asegurar que numpy esté importado
import random       # Asegurar que random esté importado
from sklearn.metrics import mean_absolute_error # Para MAE

# --- Definiciones de Columnas Necesarias (Asegúrate que coincidan con Celda 1 o tu entrenamiento) ---
NUMERIC_FEATURES_INPUT = ['cm_avg1', 'cm_avg7', 'cm_avg30', 'cm_trendPrice']
CATEGORICAL_FEATURES_INPUT = ['rarity', 'supertype', 'subtypes', 'types', 'set_name']
INPUT_COLS_FOR_PIPELINES = NUMERIC_FEATURES_INPUT + CATEGORICAL_FEATURES_INPUT
# --- Fin Definiciones de Columnas ---

# Inicializar df_test_data_full como un DataFrame vacío por si falla la carga
df_test_data_full = pd.DataFrame()

# --- Cargar tus datos de prueba ---
try:
    csv_path = '/content/drive/MyDrive/Proyecto API/Final Csv/all_cards_combined_with_tcg.csv'
    if os.path.exists(csv_path):
        df_test_data_full = pd.read_csv(csv_path)
        print(f"Datos de prueba cargados desde CSV: {len(df_test_data_full)} filas")

        for col in INPUT_COLS_FOR_PIPELINES:
            if col not in df_test_data_full.columns:
                print(f"WARNING: Columna '{col}' no encontrada. Rellenando con placeholder.")
                if col in NUMERIC_FEATURES_INPUT: df_test_data_full[col] = 0.0
                else: df_test_data_full[col] = 'Unknown_Placeholder'

        for col in NUMERIC_FEATURES_INPUT:
            if col in df_test_data_full.columns and df_test_data_full[col].isnull().any():
                df_test_data_full[col] = pd.to_numeric(df_test_data_full[col], errors='coerce')
                median_val = df_test_data_full[col].median(); median_val = 0.0 if pd.isna(median_val) else median_val
                df_test_data_full[col] = df_test_data_full[col].fillna(median_val)
                print(f"INFO: NaNs en '{col}' imputados con mediana {median_val:.2f}")

        for col in CATEGORICAL_FEATURES_INPUT:
            if col in df_test_data_full.columns:
                df_test_data_full[col] = df_test_data_full[col].fillna('Missing_Value').astype(str)
    else:
        print(f"ERROR: Archivo CSV de prueba no encontrado: '{csv_path}'")
except FileNotFoundError: print(f"ERROR: Archivo CSV de prueba no encontrado. Verifica ruta: '{csv_path}'")
except Exception as e_load: print(f"ERROR al cargar/preprocesar datos CSV: {e_load}")


if not df_test_data_full.empty:
    print(f"\n--- Ejecutando pruebas de predicción con pipelines .pkl ---")
    num_to_test_run_pkl = min(10, len(df_test_data_full))

    if num_to_test_run_pkl > 0:
        sample_test_df = pd.DataFrame()
        if len(df_test_data_full) >= num_to_test_run_pkl :
            sample_test_df = df_test_data_full.sample(n=num_to_test_run_pkl, random_state=42, replace=False)
        elif len(df_test_data_full) > 0:
             sample_test_df = df_test_data_full.copy(); num_to_test_run_pkl = len(sample_test_df)

        if not sample_test_df.empty:
            print(f"Probando con {num_to_test_run_pkl} cartas aleatorias.")

            # Forzar algunas cartas al pipeline High para prueba
            # Asegurarse que threshold_config_loaded esté definido (de Celda 1)
            if 'threshold_config_loaded' not in locals():
                print("ERROR CRÍTICO: threshold_config_loaded no definido. Ejecuta la Celda 1.")
                threshold_config_loaded_local_scope = {"threshold": 30.0} # Fallback para que no rompa el bucle
            else:
                threshold_config_loaded_local_scope = threshold_config_loaded


            num_to_force_high = min(3, len(sample_test_df))
            if num_to_force_high > 0:
                print(f"\nDEBUG: Forzando cm_avg7 >= threshold para las primeras {num_to_force_high} cartas de la muestra.")
                for i in range(num_to_force_high):
                    original_avg7 = sample_test_df.iloc[i]['cm_avg7']
                    forced_avg7 = threshold_config_loaded_local_scope.get('threshold', 30.0) + np.random.uniform(5.0, 50.0)
                    # Usar .loc para evitar SettingWithCopyWarning
                    sample_test_df.loc[sample_test_df.index[i], 'cm_avg7'] = forced_avg7
                    print(f"  Carta {sample_test_df.iloc[i].get('id', 'N/A')}: cm_avg7 cambiado de {original_avg7} a {forced_avg7:.2f}")

            all_test_predictions_run_pkl = []

            # Comprobación para asegurar que las variables de la Celda 1 existen
            if 'pipeline_high_loaded' not in globals() or 'pipeline_low_loaded' not in globals() or \
               'threshold_config_loaded' not in globals() or '_TARGET_PREDICTED_IS_LOG_TRANSFORMED' not in globals() or \
               'get_prediction_with_lgbm_pipelines' not in globals():
                print("ERROR CRÍTICO: Una o más variables/funciones (pipeline_high_loaded, pipeline_low_loaded, threshold_config_loaded, _TARGET_PREDICTED_IS_LOG_TRANSFORMED, get_prediction_with_lgbm_pipelines) no están definidas. ¡Asegúrate de ejecutar la Celda 1 y Celda 2 primero!")
            else:
                for index, card_row_series in sample_test_df.iterrows():
                    input_df_for_pipeline = pd.DataFrame([card_row_series])
                    card_name_display = card_row_series.get('name', 'N/A')
                    current_price_display = card_row_series.get('cm_averageSellPrice', np.nan)

                    print(f"\nProcesando carta: {card_row_series.get('id')} ({card_name_display})")
                    cm_avg7_val = card_row_series.get('cm_avg7')
                    print(f"  Datos de entrada para decisión (cm_avg7): {cm_avg7_val if pd.notna(cm_avg7_val) else 'N/A'}")
                    print(f"  Precio 'target' real de la carta: {current_price_display:.2f}€" if pd.notna(current_price_display) else "  Precio 'target' real: N/A")

                    # Llamar a la función definida en Celda 2
                    pred_run_pkl, pipeline_used_run_pkl = get_prediction_with_lgbm_pipelines(
                        input_df_for_pipeline,
                        threshold_config_loaded, # De Celda 1
                        pipeline_high_loaded,    # De Celda 1
                        pipeline_low_loaded      # De Celda 1
                    )
                    print(f"  Pipeline: {pipeline_used_run_pkl}, Pred: {pred_run_pkl:.2f}€" if pred_run_pkl is not None else f"  Pipeline: {pipeline_used_run_pkl}, Pred: Falló")
                    if pred_run_pkl is not None:
                         all_test_predictions_run_pkl.append({
                             'card_id': card_row_series.get('id'), 'name': card_name_display,
                             'actual_price': current_price_display, 'predicted_price': pred_run_pkl,
                             'pipeline_used': pipeline_used_run_pkl, 'cm_avg7_used': cm_avg7_val
                         })

            if all_test_predictions_run_pkl:
                summary_test_df_run_pkl = pd.DataFrame(all_test_predictions_run_pkl)
                print("\n--- Resumen de Pruebas con Pipelines .pkl ---")
                if 'actual_price' in summary_test_df_run_pkl.columns and summary_test_df_run_pkl['actual_price'].notna().any():
                    valid_preds = summary_test_df_run_pkl.dropna(subset=['actual_price', 'predicted_price'])
                    if not valid_preds.empty:
                        mae_test = mean_absolute_error(valid_preds['actual_price'], valid_preds['predicted_price'])
                        print(f"\nMAE en estas {len(valid_preds)} predicciones de prueba: {mae_test:.2f}€")
                    else: print("\nNo hay suficientes datos válidos para calcular MAE.")
                print(summary_test_df_run_pkl.to_string(index=False))
        else: print("No se pudieron seleccionar cartas de muestra para probar.")
    else: print("No hay datos de prueba para procesar después de cargar el CSV.")
else: print("El DataFrame de prueba (df_test_data_full) está vacío. Verifica la carga del CSV.")

Datos de prueba cargados desde CSV: 18876 filas

--- Ejecutando pruebas de predicción con pipelines .pkl ---
Probando con 10 cartas aleatorias.

DEBUG: Forzando cm_avg7 >= threshold para las primeras 3 cartas de la muestra.
  Carta base4-87: cm_avg7 cambiado de 2.63 a 51.14
  Carta sm7-104: cm_avg7 cambiado de 0.08 a 62.76
  Carta bw10-95: cm_avg7 cambiado de 17.91 a 70.78

Procesando carta: base4-87 (Pikachu)
  Datos de entrada para decisión (cm_avg7): 51.13771956028894
  Precio 'target' real de la carta: 2.79€
INFO: Usando Pipeline High (cm_avg7: 51.14€ >= 30.00€)
  DEBUG: DataFrame de entrada para pipeline 'High':
 cm_avg1  cm_avg7  cm_avg30  cm_trendPrice rarity supertype subtypes     types   set_name
    4.99 51.13772      2.78           3.53 Common   Pokémon    Basic Lightning Base Set 2
  INFO: Predicción cruda (log): 3.8492, Predicción final (€): 45.95
  Pipeline: High, Pred: 45.95€

Procesando carta: sm7-104 (Bagon)
  Datos de entrada para decisión (cm_avg7): 62.76352256286418