In [None]:

import pandas as pd
import numpy as np
import lightgbm as lgb
import json
from sklearn.metrics import mean_squared_error
from numerapi import NumerAPI

# 1. Cargar features válidos
with open("features.json", "r") as f:
    feature_metadata = json.load(f)
features = feature_metadata["feature_sets"]["all"]

# 2. Cargar datos de entrenamiento
train_df = pd.read_parquet("train.parquet")
X_train = train_df[features]
y_train = train_df["target"]

# 3. Cargar eras específicas de validation.parquet (para no usar toda la RAM)
eras_a_cargar = [f"era{e}" for e in range(1200, 1205)]
val_blocks = []
for era in eras_a_cargar:
    try:
        df_era = pd.read_parquet("validation.parquet", filters=[("era", "==", era)])
        val_blocks.append(df_era)
        print(f"✅ Era cargada: {era}")
    except Exception as e:
        print(f"⚠️ Error al cargar {era}: {e}")
val_df = pd.concat(val_blocks, ignore_index=True)
X_val = val_df[features]
y_val = val_df["target"]

# 4. Entrenar modelo LightGBM
model = lgb.LGBMRegressor(n_estimators=200, learning_rate=0.05)
model.fit(X_train, y_train, eval_set=[(X_val, y_val)], early_stopping_rounds=10, verbose=False)

# 5. Evaluar modelo
y_pred = model.predict(X_val)
corr = np.corrcoef(y_val, y_pred)[0, 1]
mse = mean_squared_error(y_val, y_pred)
print(f"📈 Correlación validación: {corr:.4f}")
print(f"📉 Error cuadrático medio: {mse:.6f}")

# 6. Predecir sobre datos en vivo y guardar archivo
live_df = pd.read_parquet("live.parquet")
live_df["prediction"] = model.predict(live_df[features])
submission = live_df[["id", "prediction"]]
submission.to_csv("submission.csv", index=False)
print("✅ Archivo 'submission.csv' generado.")

# 7. Enviar archivo a Numerai (⚠️ Reemplaza tus claves aquí)
public_id = "edu12_edu"
secret_key = "Kikillo12345#"
model_id = "edu12_edu1000"

napi = NumerAPI(public_id=public_id, secret_key=secret_key)
napi.upload_predictions("submission.csv", model_id=model_id)
print("📡 Predicciones enviadas correctamente a Numerai.")
