In [1]:
# This Python 3 environment comes with many helpful analytics libraries installed
# It is defined by the kaggle/python Docker image: https://github.com/kaggle/docker-python
# For example, here's several helpful packages to load

import numpy as np # linear algebra
import pandas as pd # data processing, CSV file I/O (e.g. pd.read_csv)

# Input data files are available in the read-only "../input/" directory
# For example, running this (by clicking run or pressing Shift+Enter) will list all files under the input directory

import os
for dirname, _, filenames in os.walk('/kaggle/input'):
    for filename in filenames:
        print(os.path.join(dirname, filename))

# You can write up to 20GB to the current directory (/kaggle/working/) that gets preserved as output when you create a version using "Save & Run All" 
# You can also write temporary files to /kaggle/temp/, but they won't be saved outside of the current session

/kaggle/input/nfl-xgboost-model/scikitlearn/default/1/nfl_xgboost_solution_final.pkl
/kaggle/input/nfl-big-data-bowl-2026-prediction/sample_submission.csv
/kaggle/input/nfl-big-data-bowl-2026-prediction/test_input.csv
/kaggle/input/nfl-big-data-bowl-2026-prediction/test.csv
/kaggle/input/nfl-big-data-bowl-2026-prediction/train/input_2023_w17.csv
/kaggle/input/nfl-big-data-bowl-2026-prediction/train/output_2023_w05.csv
/kaggle/input/nfl-big-data-bowl-2026-prediction/train/output_2023_w10.csv
/kaggle/input/nfl-big-data-bowl-2026-prediction/train/input_2023_w03.csv
/kaggle/input/nfl-big-data-bowl-2026-prediction/train/output_2023_w18.csv
/kaggle/input/nfl-big-data-bowl-2026-prediction/train/input_2023_w05.csv
/kaggle/input/nfl-big-data-bowl-2026-prediction/train/output_2023_w11.csv
/kaggle/input/nfl-big-data-bowl-2026-prediction/train/output_2023_w12.csv
/kaggle/input/nfl-big-data-bowl-2026-prediction/train/output_2023_w16.csv
/kaggle/input/nfl-big-data-bowl-2026-prediction/train/output_2

In [2]:
# --- CÓDIGO DE INFERENCIA PARA KAGGLE (SOLUCIÓN FINAL) ---
import joblib
import pandas as pd
import numpy as np
import os
import gc

print(" Iniciando sistema de inferencia...")

# ==========================================
# 1. BUSCAR Y CARGAR EL MODELO (.pkl)
# ==========================================
model_path = ""
print(" Buscando archivo del modelo...")

# Buscamos en todas las carpetas de input por si el nombre cambia
for dirname, _, filenames in os.walk('/kaggle/input'):
    for filename in filenames:
        if filename.endswith('.pkl'):
            model_path = os.path.join(dirname, filename)
            print(f"✅ Modelo encontrado en: {model_path}")
            break
    if model_path: break

if model_path == "":
    raise FileNotFoundError("❌ NO SE ENCONTRÓ EL MODELO. Revisa que hayas subido el archivo .pkl en 'Add Input'.")

# Cargar los artefactos
try:
    artifacts = joblib.load(model_path)
    models_x = artifacts['models_x']
    models_y = artifacts['models_y']
    encoders = artifacts['encoders']
    means_dict = artifacts.get('means_dict', {})
    features = artifacts['features']
    cat_cols = artifacts.get('cat_cols', [])
    num_cols = artifacts.get('num_cols', [])
    print(" Modelo desempaquetado correctamente.")
except Exception as e:
    raise RuntimeError(f"❌ Error al abrir el archivo .pkl: {e}")

# ==========================================
# 2. CARGAR DATOS DE TEST OFICIALES
# ==========================================
test_path = '/kaggle/input/nfl-big-data-bowl-2026-prediction/test.csv'

if os.path.exists(test_path):
    print(" Cargando test.csv oficial...")
    test_df = pd.read_csv(test_path)
else:
    print(" MODO PRUEBA: No se encontró test.csv oficial (Probablemente estás fuera de la competencia).")
    print("   -> Creando datos falsos para verificar que el código corre sin errores...")
    # Crear dummy data con la estructura correcta
    test_df = pd.DataFrame(columns=features + ['game_id','play_id','nfl_id','frame_id'])
    test_df['game_id'] = [2022090800]*5; test_df['play_id']=[1]*5; 
    test_df['nfl_id']=[12345]*5; test_df['frame_id']=range(1,6)
    for c in features: test_df[c] = 0

# ==========================================
# 3. PREPROCESAMIENTO (REPLICAR TRAIN)
# ==========================================
print(" Procesando datos...")
df_proc = test_df.copy()

# A. Llenar Numéricos (usando medias del entrenamiento)
for col in num_cols:
    if col in df_proc.columns:
        val = means_dict.get(col, 0)
        df_proc[col] = df_proc[col].fillna(val)

# B. Codificar Categóricos (usando encoders entrenados)
for col in cat_cols:
    if col in df_proc.columns:
        df_proc[col] = df_proc[col].fillna("MISSING").astype(str)
        le = encoders.get(col)
        if le:
            # Manejo de clases desconocidas: Asignar 0 si es nueva
            s_classes = set(le.classes_)
            df_proc[col] = df_proc[col].map(lambda x: le.transform([x])[0] if x in s_classes else 0)
        else:
            df_proc[col] = 0

# C. Alinear Columnas
# Asegurar que X_test tenga exactamente las mismas columnas que el modelo vio al entrenar
for col in features:
    if col not in df_proc.columns:
        df_proc[col] = 0
        
X_test = df_proc[features].values

# ==========================================
# 4. PREDICCIÓN (ENSEMBLE PROMEDIO)
# ==========================================
print(" Generando predicciones...")

# Promedio Modelos X
preds_x = np.zeros(len(X_test))
for m in models_x:
    preds_x += m.predict(X_test)
preds_x /= len(models_x)

# Promedio Modelos Y
preds_y = np.zeros(len(X_test))
for m in models_y:
    preds_y += m.predict(X_test)
preds_y /= len(models_y)

# ==========================================
# 5. GENERAR SUBMISSION.CSV
# ==========================================
submission = pd.DataFrame()

# Construcción de ID robusta (detecta formatos variados)
try:
    if 'game_id' in test_df.columns:
        submission['game_play_id'] = (
            test_df['game_id'].astype(str) + '_' + 
            test_df['play_id'].astype(str) + '_' + 
            test_df['nfl_id'].astype(str) + '_' + 
            test_df['frame_id'].astype(str)
        )
    elif 'gameId' in test_df.columns: # Formato alternativo
        submission['game_play_id'] = (
            test_df['gameId'].astype(str) + '_' + 
            test_df['playId'].astype(str) + '_' + 
            test_df['nflId'].astype(str) + '_' + 
            test_df['frameId'].astype(str)
        )
    else:
        submission['id'] = test_df.index
except:
    submission['id'] = test_df.index

submission['predicted_x'] = preds_x
submission['predicted_y'] = preds_y

# Guardar
submission.to_csv('submission.csv', index=False)
print("✅ ¡LISTO! Archivo 'submission.csv' generado exitosamente.")
print(submission.head())

 Iniciando sistema de inferencia...
 Buscando archivo del modelo...
✅ Modelo encontrado en: /kaggle/input/nfl-xgboost-model/scikitlearn/default/1/nfl_xgboost_solution_final.pkl




 Modelo desempaquetado correctamente.
 Cargando test.csv oficial...
 Procesando datos...
 Generando predicciones...
✅ ¡LISTO! Archivo 'submission.csv' generado exitosamente.
            game_play_id  predicted_x  predicted_y
0  2024120805_74_54586_1     6.907886     3.421041
1  2024120805_74_54586_2     6.907886     3.421041
2  2024120805_74_54586_3     6.907886     3.421041
3  2024120805_74_54586_4     6.907886     3.421041
4  2024120805_74_54586_5     6.907886     3.421041
