In [1]:
!nvcc --version

nvcc: NVIDIA (R) Cuda compiler driver
Copyright (c) 2005-2022 NVIDIA Corporation
Built on Mon_Oct_24_19:12:58_PDT_2022
Cuda compilation tools, release 12.0, V12.0.76
Build cuda_12.0.r12.0/compiler.31968024_0


In [2]:
!pip install --upgrade numpy pandas xgboost scikit-learn
!pip install torch torchvision torchaudio --index-url https://download.pytorch.org/whl/cu121
!pip install \
    --extra-index-url=https://pypi.nvidia.com \
    "cudf-cu12==25.2.*" "dask-cudf-cu12==25.2.*" "cuml-cu12==25.2.*" \
    "cugraph-cu12==25.2.*" "nx-cugraph-cu12==25.2.*" "cuspatial-cu12==25.2.*" \
    "cuproj-cu12==25.2.*" "cuxfilter-cu12==25.2.*" "cucim-cu12==25.2.*" \
    "pylibraft-cu12==25.2.*" "raft-dask-cu12==25.2.*" "cuvs-cu12==25.2.*" \
    "nx-cugraph-cu12==25.2.*"

Collecting numpy
  Using cached numpy-2.2.3-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl.metadata (62 kB)
[0mLooking in indexes: https://download.pytorch.org/whl/cu121
[0mLooking in indexes: https://pypi.org/simple, https://pypi.nvidia.com
[0m

In [3]:
import os
import numpy as np
import pandas as pd
import xgboost as xgb
import torch
import torch.nn.functional as F
from sklearn.preprocessing import StandardScaler
from sklearn.model_selection import KFold
import cudf

print(f"PyTorch version: {torch.__version__}")
print(f"CUDA available: {torch.cuda.is_available()}")
print(f"CUDA version: {torch.version.cuda}")
print(f"GPU device count: {torch.cuda.device_count()}")
print(f"Current device: {torch.cuda.current_device()}")
print(f"Device name: {torch.cuda.get_device_name(0)}")

if torch.cuda.is_available():
    device = "cuda"
else:
    device = "cpu"
    os.environ["OMP_NUM_THREADS"] = "1"
    torch.set_num_threads(1)

print(f"xgb {xgb.__version__}")
kfold = KFold(shuffle=True, random_state=42)

PyTorch version: 2.1.1+cu121
CUDA available: True
CUDA version: 12.1
GPU device count: 1
Current device: 0
Device name: Quadro RTX 4000
xgb 2.1.4


In [4]:
# fn = "train_simple.csv"
fn = "train_orig.csv"
print(f"reading {fn}")
train = pd.read_csv(f"../datasets/net/{fn}")

train = pd.concat(
    [
        train.select_dtypes("int64").astype("int32"),
        train.select_dtypes("float64").astype("float32"),
    ],
    axis=1,
)

print(f"\ntrain: {str(train.shape):>23}")
print(f"{train.columns.to_list()}")

reading train.csv

train:           (202033, 413)
['Season', 'DayNum', 'TeamID_1', 'TeamID_2', 'Margin', 'Score_pg_o_1', 'Score_poss_o_1', 'FGM_pg_o_1', 'FGM_poss_o_1', 'FGA_pg_o_1', 'FGA_poss_o_1', 'FGM3_pg_o_1', 'FGM3_poss_o_1', 'FGA3_pg_o_1', 'FGA3_poss_o_1', 'FTM_pg_o_1', 'FTM_poss_o_1', 'FTA_pg_o_1', 'FTA_poss_o_1', 'OR_pg_o_1', 'OR_poss_o_1', 'DR_pg_o_1', 'DR_poss_o_1', 'Ast_pg_o_1', 'Ast_poss_o_1', 'TO_pg_o_1', 'TO_poss_o_1', 'Stl_pg_o_1', 'Stl_poss_o_1', 'Blk_pg_o_1', 'Blk_poss_o_1', 'PF_pg_o_1', 'PF_poss_o_1', 'Poss_pg_o_1', 'Score_pg_d_1', 'Score_poss_d_1', 'FGM_pg_d_1', 'FGM_poss_d_1', 'FGA_pg_d_1', 'FGA_poss_d_1', 'FGM3_pg_d_1', 'FGM3_poss_d_1', 'FGA3_pg_d_1', 'FGA3_poss_d_1', 'FTM_pg_d_1', 'FTM_poss_d_1', 'FTA_pg_d_1', 'FTA_poss_d_1', 'OR_pg_d_1', 'OR_poss_d_1', 'DR_pg_d_1', 'DR_poss_d_1', 'Ast_pg_d_1', 'Ast_poss_d_1', 'TO_pg_d_1', 'TO_poss_d_1', 'Stl_pg_d_1', 'Stl_poss_d_1', 'Blk_pg_d_1', 'Blk_poss_d_1', 'PF_pg_d_1', 'PF_poss_d_1', 'Poss_pg_d_1', 'FGPct_o_1', 'FGPct3_o_1'

In [8]:
X_df = train.drop(columns=["Season", "DayNum", "TeamID_1", "TeamID_2", "Margin"])
print(f"X_df: {str(X_df.shape):>24}")

X = torch.as_tensor(
    StandardScaler().fit_transform(X_df.values),
    dtype=torch.float32,
    device=device,
)

print(f"X:    {X.shape}")

y_s = train["Margin"]
print(f"y_s: {str(y_s.shape):>22}")
scaler_y = StandardScaler()

y = torch.tensor(
    scaler_y.fit_transform(train[["Margin"]]).flatten(),
    dtype=torch.float32,
    device=device,
)

print(f"y:    {y.shape}")

X_df:            (202033, 408)
X:    torch.Size([202033, 408])
y_s:              (202033,)
y:    torch.Size([202033])


In [10]:
def brier_score(y_pred_np, y_true_s):
    pred_win_prob = 1 / (1 + np.exp(-y_pred_np * 0.1))
    team_1_won = (y_true_s.values > 0).astype(float)
    return np.mean((pred_win_prob - team_1_won) ** 2)

In [11]:
params = {
    "tree_method": "hist",
    "device": "gpu",
    "max_depth": 3,
    "colsample_bytree": 0.5,
    "subsample": 0.8,
    "eta": 0.02,
    "min_child_weight": 80,
    "verbosity": 1,
}

print(f"xgboost")
y_pred_oof = np.zeros(y_s.shape[0])
y_pred_oof2 = np.zeros(y_s.shape[0])
    
for fold_n, (i_fold, i_oof) in enumerate(kfold.split(X_df.index), 1):
    print(f"  fold {fold_n}")
    dm_fold = xgb.DMatrix(X_df.iloc[i_fold], label=y_s.iloc[i_fold])
    dm_oof = xgb.DMatrix(X_df.iloc[i_oof], label=y_s.iloc[i_oof])

    print("  xgb.train")
    m = xgb.train(
        params,
        dm_fold,
        num_boost_round=2000,
        evals=[(dm_fold, "fold"), (dm_oof, "oof")],
        verbose_eval=250,
    )

    y_pred_oof[i_oof] = m.predict(dm_oof)
    
    print("  XGBRegressor")
    m = xgb.XGBRegressor(
        tree_method="hist",
        device="cuda",
        max_depth=3,
        colsample_bytree=0.5,
        subsample=0.8,
        n_estimators=2000,
        learning_rate=0.02,
        min_child_weight=80,
        verbosity=1,
    )
    
    X_fold = cudf.DataFrame.from_pandas(X_df.iloc[i_fold])
    y_fold = cudf.Series(y_s.iloc[i_fold])
    X_oof = cudf.DataFrame.from_pandas(X_df.iloc[i_oof])
    y_oof = cudf.Series(y_s.iloc[i_oof])
    
    m.fit(
        X_fold,
        y_fold,
        verbose=250,
        eval_set=[
            (X_fold, y_fold),
            (X_oof, y_oof)
        ],
    )
    
    y_pred_oof2[i_oof] = m.predict(X_oof)
    
    print()

score = brier_score(y_pred_oof, y_s)
print(f"  score: {score:.4f}")
score = brier_score(y_pred_oof2, y_s)
print(f"  score: {score:.4f}")

xgboost
  fold 1
  xgb.train
[0]	fold-rmse:16.36785	oof-rmse:16.44650
[250]	fold-rmse:11.13632	oof-rmse:11.25991
[500]	fold-rmse:10.89192	oof-rmse:11.03729
[750]	fold-rmse:10.84072	oof-rmse:11.00809
[1000]	fold-rmse:10.81164	oof-rmse:11.00183
[1250]	fold-rmse:10.78834	oof-rmse:11.00055
[1500]	fold-rmse:10.76690	oof-rmse:11.00130
[1750]	fold-rmse:10.74744	oof-rmse:11.00363
[1999]	fold-rmse:10.72917	oof-rmse:11.00673
  XGBRegressor
[0]	validation_0-rmse:16.36785	validation_1-rmse:16.44650
[250]	validation_0-rmse:11.13632	validation_1-rmse:11.25991
[500]	validation_0-rmse:10.89192	validation_1-rmse:11.03729
[750]	validation_0-rmse:10.84072	validation_1-rmse:11.00809
[1000]	validation_0-rmse:10.81164	validation_1-rmse:11.00183
[1250]	validation_0-rmse:10.78834	validation_1-rmse:11.00055
[1500]	validation_0-rmse:10.76690	validation_1-rmse:11.00130
[1750]	validation_0-rmse:10.74744	validation_1-rmse:11.00363
[1999]	validation_0-rmse:10.72917	validation_1-rmse:11.00673

  fold 2
  xgb.train
[

In [12]:
print("torch")
n_epochs = 1_000
hidden_size = 64
loss_fn = torch.nn.MSELoss()

y_pred_oof = torch.zeros(
    y.shape[0],
    dtype=torch.float32,
    requires_grad=False,
    device=device,
)

for fold_n, (i_fold, i_oof) in enumerate(kfold.split(X_df.index), 1):
    print(f"  fold {fold_n}")

    weights1 = torch.nn.Parameter(
        0.1 * torch.randn(X_df.shape[1], hidden_size, device=device)
    )
    bias1 = torch.nn.Parameter(torch.zeros(hidden_size, device=device))
    weights2 = torch.nn.Parameter(0.1 * torch.randn(hidden_size, 1, device=device))
    bias2 = torch.nn.Parameter(torch.zeros(1, device=device))
    optimizer = torch.optim.Adam([weights1, bias1, weights2, bias2], weight_decay=1e-4)

    for epoch_n in range(1, n_epochs + 1):
        y_pred_fold_epoch = F.leaky_relu(X[i_fold] @ weights1 + bias1, negative_slope=0.1) @ weights2 + bias2
        loss_fold_epoch = loss_fn(y_pred_fold_epoch, y[i_fold].view(-1, 1))
        optimizer.zero_grad()
        loss_fold_epoch.backward()
        optimizer.step()

        with torch.no_grad():
            y_pred_oof_epoch = F.leaky_relu(X[i_oof] @ weights1 + bias1, negative_slope=0.1) @ weights2 + bias2
            loss_oof_epoch = loss_fn(y_pred_oof_epoch, y[i_oof].view(-1, 1))

        if epoch_n > (n_epochs - 3):
            print(
                f"    epoch {epoch_n:>6}: "
                f"fold={loss_fold_epoch.item():.4f} "
                f"oof={loss_oof_epoch.item():.4f}"
            )

    with torch.no_grad():
        y_pred_oof[i_oof] = (
            F.leaky_relu(X[i_oof] @ weights1 + bias1, negative_slope=0.1) @ weights2 + bias2
        ).flatten()

    print()

y_pred_oof = scaler_y.inverse_transform(
    y_pred_oof.cpu().numpy().reshape(-1, 1)
).flatten()

score = brier_score(y_pred_oof, y_s)
print(f"  score: {score:.4f}")

torch
  fold 1
    epoch    998: fold=0.4277 oof=0.4486
    epoch    999: fold=0.4277 oof=0.4486
    epoch   1000: fold=0.4276 oof=0.4486

  fold 2
    epoch    998: fold=0.4271 oof=0.4424
    epoch    999: fold=0.4271 oof=0.4424
    epoch   1000: fold=0.4271 oof=0.4424

  fold 3
    epoch    998: fold=0.4231 oof=0.4471
    epoch    999: fold=0.4231 oof=0.4474
    epoch   1000: fold=0.4231 oof=0.4470

  fold 4
    epoch    998: fold=0.4265 oof=0.4440
    epoch    999: fold=0.4265 oof=0.4439
    epoch   1000: fold=0.4265 oof=0.4439

  fold 5
    epoch    998: fold=0.4277 oof=0.4434
    epoch    999: fold=0.4276 oof=0.4434
    epoch   1000: fold=0.4276 oof=0.4434

  score: 0.1655
