In [None]:
import ibis
import ibis_ml as ml
from ibis import _

ibis.options.interactive = True

In [None]:
model_input_table = ibis.read_parquet("solutions/small_model_input_table.parquet")
model_input_table

# Data splitting

In [None]:
# Create data frames for the two sets:
train_data, test_data = ml.train_test_split(
    model_input_table,
    unique_key="game_id",
    # Put 3/4 of the data into the training set
    test_size=0.25,
    num_buckets=4,
    # Set the seed to enable reproducible analysis
    random_seed=111,
)

# Fit a model with a recipe

In [None]:
import xgboost as xgb
from sklearn.pipeline import Pipeline

NUM_MOVES = 40

X_train = train_data.drop("target")
y_train = train_data.target

xgb_reg_pipe = Pipeline(
    [
        (
            "xgb_recipe",
            ml.Recipe(
                ml.Mutate(
                    relative_clock_diff=(_.white_clock - _.black_clock)
                    / (_.base_time + _.increment * NUM_MOVES)
                ),
                ml.DropZeroVariance(ml.everything()),
                ml.Drop(ml.string()),
                ml.Cast(ml.everything(), "float64"),
            ),
        ),
        ("xgb_reg", xgb.XGBRegressor(n_estimators=10)),
    ]
)
xgb_reg_pipe.set_output(transform="pandas")
# The regressor seems to overfit much more quickly than the classifier
# With the default eta=0.3, try n_estimators=10
# With eta=0.1, try n_estimators=20
xgb_reg_pipe.fit(X_train, y_train)

xgb_reg_pipe["xgb_reg"].save_model("solutions/live-win-probability-model.json")

# Perform inference using LETSQL

In [None]:
import letsql as ls

import ibis.selectors as s
from letsql.common.caching import ParquetCacheStorage

con = ls.connect()

duckdb_con = ls.duckdb.connect()
cache = ParquetCacheStorage(source=con, path="letsql-tmp")

t = (
    duckdb_con.read_parquet("solutions/small_model_input_table.parquet")
    .pipe(con.register, "model_input_table")
    .cache(storage=cache)
)
model_path = "solutions/live-win-probability-model.json"

predict_xgb = con.register_xgb_model("live_win_probability", model_path)

In [None]:
t

In [None]:
xgb_reg_pipe["xgb_recipe"].to_ibis(t)

In [None]:
t_pred = (
    xgb_reg_pipe["xgb_recipe"]
    .to_ibis(t)
    .mutate(predicted_live_win_probability=predict_xgb.on_expr)
)
t_pred