In [31]:

import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split
import xgboost as xgb
from sklearn.metrics import mean_squared_error, r2_score

# 1. Cargar datos ------------------------------------------------------------------
url = ("https://archive.ics.uci.edu/ml/machine-learning-databases/"
       "wine-quality/winequality-red.csv")
data = pd.read_csv(url, sep=';')

X = data.drop('quality', axis=1)   # 11 columnas
y = data['quality']

X_train, X_test, y_train, y_test = train_test_split(
    X, y, test_size=0.2, random_state=42)

# 2. Entrenar XGBoost ---------------------------------------------------------------
model = xgb.XGBRegressor(
    objective='reg:squarederror',
    n_estimators=100,
    max_depth=5,
    learning_rate=0.1,
    subsample=0.8,
    colsample_bytree=0.8,
    random_state=42
)
model.fit(X_train, y_train)

mse_test  = mean_squared_error(y_test, model.predict(X_test))
rmse_test = np.sqrt(mse_test)
print(f"RMSE test : {rmse_test:.4f}")
print(f"R² test   : {r2_score(y_test, model.predict(X_test)):.4f}")

# ---------- PARCHE: normalizar nombres ----------
booster = model.get_booster()
booster.feature_names = [f"f{i}" for i in range(X_train.shape[1])]
# -----------------------------------------------

# 3. Conversión a ONNX --------------------------------------------------------------
from onnxmltools.convert.xgboost.operator_converters.XGBoost import convert_xgboost
from onnxmltools.convert.common.shape_calculator import \
        calculate_linear_regressor_output_shapes
from skl2onnx import convert_sklearn, update_registered_converter
from skl2onnx.common.data_types import FloatTensorType
import onnxruntime as rt

update_registered_converter(
    xgb.XGBRegressor,
    'XGBoostRegressor',
    calculate_linear_regressor_output_shapes,
    convert_xgboost
)

initial_type = [('float_input', FloatTensorType([None, X_train.shape[1]]))]
onnx_model = convert_sklearn(
    model,
    initial_types=initial_type,
    target_opset={'ai.onnx.ml': 3, 'ai.onnx': 15}
)

with open("wine_xgb_no_scaler.onnx", "wb") as f:
    f.write(onnx_model.SerializeToString())

# 4. Verificación ONNX Runtime ------------------------------------------------------
sess = rt.InferenceSession("wine_xgb_no_scaler.onnx",
                           providers=['CPUExecutionProvider'])
input_name  = sess.get_inputs()[0].name
output_name = sess.get_outputs()[0].name

sample_input = X_test.iloc[:5].values.astype(np.float32)  # (5, 11) en float32
py_pred   = model.predict(sample_input)
onnx_pred = sess.run([output_name], {input_name: sample_input})[0].ravel()

print("\nComparación XGB vs ONNX")
for i, (p1, p2) in enumerate(zip(py_pred, onnx_pred), 1):
    print(f"{i}:  XGB={p1:.4f}  ONNX={p2:.4f}  Δ={abs(p1-p2):.6f}")


RMSE test : 0.5680
R² test   : 0.5063

Comparación XGB vs ONNX
1:  XGB=5.3065  ONNX=5.3065  Δ=0.000000
2:  XGB=5.0424  ONNX=5.0424  Δ=0.000000
3:  XGB=5.3567  ONNX=5.3567  Δ=0.000000
4:  XGB=5.2192  ONNX=5.2192  Δ=0.000000
5:  XGB=5.8679  ONNX=5.8679  Δ=0.000001


In [25]:
import xgboost
import google.protobuf
import onnx
import skl2onnx
import onnxmltools
import onnxruntime

print("xgboost:       ", xgboost.__version__)
print("protobuf:      ", google.protobuf.__version__)
print("onnx:          ", onnx.__version__)
print("skl2onnx:      ", skl2onnx.__version__)
print("onnxmltools:   ", onnxmltools.__version__)
print("onnxruntime:   ", onnxruntime.__version__)

xgboost:        2.0.3
protobuf:       4.25.3
onnx:           1.17.0
skl2onnx:       1.17.0
onnxmltools:    1.13.0
onnxruntime:    1.21.0


In [29]:
import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
import xgboost as xgb
from skl2onnx import convert_sklearn
from skl2onnx.common.data_types import FloatTensorType
from sklearn.metrics import mean_squared_error, r2_score
import onnxruntime as rt

# 1. Dataset -----------------------------------------------------------------
url = ("https://archive.ics.uci.edu/ml/machine-learning-databases/"
       "wine-quality/winequality-red.csv")
data = pd.read_csv(url, sep=';')

X = data.drop('quality', axis=1)
y = data['quality']

X_train, X_test, y_train, y_test = train_test_split(
    X, y, test_size=0.2, random_state=42)

# 2. Escalado manual ---------------------------------------------------------
scaler = StandardScaler()
X_train_sc = scaler.fit_transform(X_train)
X_test_sc  = scaler.transform(X_test)

# 3. Entrenamiento XGBoost ---------------------------------------------------
model = xgb.XGBRegressor(
    objective='reg:squarederror',
    n_estimators=100,
    max_depth=5,
    learning_rate=0.1,
    subsample=0.8,
    colsample_bytree=0.8,
    random_state=42
)
model.fit(X_train_sc, y_train)

mse_test  = mean_squared_error(y_test,  model.predict(X_test_sc))
rmse_test = np.sqrt(mse_test)

mse_train  = mean_squared_error(y_train, model.predict(X_train_sc))
rmse_train = np.sqrt(mse_train)

print(f"RMSE train: {rmse_train:.4f}")
print(f"RMSE test : {rmse_test:.4f}")
print(f"R² test   : {r2_score(y_test, model.predict(X_test_sc)):.4f}")


# 4. Conversión a ONNX -------------------------------------------------------
from onnxmltools.convert.xgboost.operator_converters.XGBoost import convert_xgboost
from onnxmltools.convert.common.shape_calculator import \
        calculate_linear_regressor_output_shapes
from skl2onnx import update_registered_converter

update_registered_converter(
    xgb.XGBRegressor,
    'XGBoostRegressor',
    calculate_linear_regressor_output_shapes,
    convert_xgboost,
)

initial_type = [('float_input', FloatTensorType([None, X_train.shape[1]]))]
onnx_model = convert_sklearn(
    model,
    initial_types=initial_type,
    target_opset={'ai.onnx.ml': 3, 'ai.onnx': 15}
)

with open("wine_xgb.onnx", "wb") as f:
    f.write(onnx_model.SerializeToString())

# 5. Inferencia con ONNX Runtime --------------------------------------------
sess = rt.InferenceSession("wine_xgb.onnx", providers=['CPUExecutionProvider'])
input_name  = sess.get_inputs()[0].name
output_name = sess.get_outputs()[0].name

sample_input = X_test_sc[:5].astype(np.float32)          # IMPORTANT‑ísimo → float32
py_pred   = model.predict(sample_input)
onnx_pred = sess.run([output_name], {input_name: sample_input})[0].ravel()

print("\nComparación XGB vs ONNX")
for i, (p1, p2) in enumerate(zip(py_pred, onnx_pred), 1):
    print(f" {i}:  XGB={p1:.4f}  ONNX={p2:.4f}  Δ={abs(p1-p2):.6f}")


RMSE train: 0.2999
RMSE test : 0.5680
R² test   : 0.5063

Comparación XGB vs ONNX
 1:  XGB=5.3065  ONNX=5.3065  Δ=0.000000
 2:  XGB=5.0424  ONNX=5.0424  Δ=0.000000
 3:  XGB=5.3567  ONNX=5.3567  Δ=0.000000
 4:  XGB=5.2192  ONNX=5.2192  Δ=0.000000
 5:  XGB=5.8679  ONNX=5.8679  Δ=0.000001
