# Poker rule induction with TrainSklearn and FastAI TabularModel

Requires `fastai`. Data from https://www.kaggle.com/c/poker-rule-induction/data?select=train.csv.zip

In [4]:
import pandas as pd
import torch
import math
import ray

from torch import nn
from skorch.callbacks import LRScheduler, GradientNormClipping
from sklearn.preprocessing import StandardScaler
from sklearn.metrics import accuracy_score
from sklearn.model_selection import train_test_split

from fastai.tabular.model import TabularModel, emb_sz_rule

from train_sklearn import RayTrainNeuralNet

In [5]:
# Redefine some fastai embedding functions to work with pandas

def _one_emb_sz(n_cat, n):
    "Pick an embedding size for `n` depending on `classes` if not given in `sz_dict`."
    sz_dict = {}
    sz = sz_dict.get(n, int(emb_sz_rule(n_cat)))  # rule of thumb
    return n_cat, sz


def get_emb_sz(sizes: list, columns: list):
    "Get default embedding size from `TabularPreprocessor` `proc` or the ones in `sz_dict`"

    return tuple(_one_emb_sz(size, column) for size, column in zip(sizes, columns))


In [6]:
# Train data from https://www.kaggle.com/c/poker-rule-induction/data?select=train.csv.zip

data = pd.read_csv("train.csv")
target = data["hand"]

# Index categories from 0
data = data.drop("hand", axis=1)-1

cat_cols = data.shape[1]
cat_cols_names = data.columns

In [7]:
# Add numerical versions of C* columns - helps the network learn the order

num_cols = pd.concat([data[col] for col in data.columns if col.startswith("C")], axis=1)
num_cols.columns = [f"n{col}" for col in num_cols.columns]
num_cols_names = num_cols.columns
data = pd.concat((data, num_cols),axis=1)

In [8]:
train_data, val_data, train_label, val_label = train_test_split(data, target, test_size=0.2, random_state=1)
scaler = StandardScaler()
train_data[num_cols.columns] = scaler.fit_transform(train_data[num_cols.columns]).astype("double")
val_data[num_cols.columns] = scaler.transform(val_data[num_cols.columns]).astype("double")

In [9]:
emb_szs = get_emb_sz([len(data[col].unique()) for col in cat_cols_names], list(cat_cols_names))

In [10]:
ray.init(ignore_reinit_error=True)  # specify address= if needed

{'node_ip_address': '172.31.43.110',
 'raylet_ip_address': '172.31.43.110',
 'redis_address': '172.31.43.110:6379',
 'object_store_address': '/tmp/ray/session_2021-12-15_21-12-28_829185_80136/sockets/plasma_store',
 'raylet_socket_name': '/tmp/ray/session_2021-12-15_21-12-28_829185_80136/sockets/raylet',
 'webui_url': None,
 'session_dir': '/tmp/ray/session_2021-12-15_21-12-28_829185_80136',
 'metrics_export_port': 62611,
 'node_id': '41670c44d3cbacf457c521bfb5742f8154f2cec91c0e11f7a5209138'}

In [15]:
num_workers = 2
batch_size = 512 // 2
epochs = 200
device = "cpu"
lr = 0.01

reg = RayTrainNeuralNet(
    TabularModel,
    criterion=nn.CrossEntropyLoss,
    optimizer=torch.optim.AdamW,
    callbacks=[
        GradientNormClipping(1.0),
        LRScheduler(
            torch.optim.lr_scheduler.OneCycleLR,
            max_lr=lr*10,
            step_every="batch",
            pct_start=0.25,
            final_div_factor=100000.0,
            epochs=epochs,
            steps_per_epoch=math.ceil(train_data.shape[0] / batch_size))
    ],
    num_workers=num_workers,
    max_epochs=epochs,
    batch_size=batch_size,
    lr=lr,
    device=device,
    optimizer__weight_decay=1e-5,
    # network configuration
    module__emb_szs=emb_szs,
    module__n_cont=len(num_cols_names),
    module__out_sz=10,
    module__layers=[100, 50, 50],
    module__ps=[0.01, 0.01, 0.02],
    # squeezing required for CrossEntropyLoss
    iterator_train__unsqueeze_label_tensor=False,
    iterator_valid__unsqueeze_label_tensor=False,
    # set correct dtypes
    iterator_train__feature_column_dtypes={
        "x_cat": [torch.long] * len(cat_cols_names),
        "x_cont": [torch.float] * len(num_cols_names)
    },
    iterator_valid__feature_column_dtypes={
        "x_cat": [torch.long] * len(cat_cols_names),
        "x_cont": [torch.float] * len(num_cols_names)
    },
)

In [16]:
# fastai TabularModel takes separate categorical and numerical feature tensors in forward
reg.fit(
    {
        "x_cat": train_data[cat_cols_names],
        "x_cont": train_data[num_cols_names]
    },
    train_label,
    X_val={
        "x_cat": val_data[cat_cols_names],
        "x_cont": val_data[num_cols_names]
    },
    y_val=val_label)


2021-12-15 21:14:33,594	INFO trainer.py:172 -- Trainer logs will be logged in: /home/ubuntu/ray_results/train_2021-12-15_21-14-33
[2m[36m(BaseWorkerMixin pid=136803)[0m 2021-12-15 21:14:35,208	INFO torch.py:66 -- Setting up process group for: env:// [rank=1, world_size=2]
[2m[36m(BaseWorkerMixin pid=136804)[0m 2021-12-15 21:14:35,203	INFO torch.py:66 -- Setting up process group for: env:// [rank=0, world_size=2]
2021-12-15 21:14:35,229	INFO trainer.py:178 -- Run results will be logged in: /home/ubuntu/ray_results/train_2021-12-15_21-14-33/run_001
[2m[36m(BaseWorkerMixin pid=136803)[0m 2021-12-15 21:14:38,443	INFO torch.py:239 -- Moving model to device: cpu
[2m[36m(BaseWorkerMixin pid=136803)[0m 2021-12-15 21:14:38,443	INFO torch.py:242 -- Wrapping provided model in DDP.
[2m[36m(BaseWorkerMixin pid=136804)[0m 2021-12-15 21:14:38,434	INFO torch.py:239 -- Moving model to device: cpu
[2m[36m(BaseWorkerMixin pid=136804)[0m 2021-12-15 21:14:38,435	INFO torch.py:242 -- Wrapp

  epoch    train_loss    valid_loss    dur_s
-------  ------------  ------------  -------
      1        [36m1.8393[0m        [32m1.1965[0m   2.0558
      2        [36m1.0144[0m        [32m0.9368[0m   1.1105
      3        [36m0.9235[0m        [32m0.9162[0m   0.6768
      4        [36m0.9021[0m        [32m0.9087[0m   0.7174
      5        [36m0.8879[0m        [32m0.9012[0m   0.7954
      6        [36m0.8690[0m        [32m0.8795[0m   0.8108
      7        [36m0.8348[0m        [32m0.8621[0m   0.9922
      8        [36m0.8058[0m        [32m0.8378[0m   0.9411
      9        [36m0.7748[0m        [32m0.8285[0m   0.8090
     10        [36m0.7373[0m        [32m0.7751[0m   0.9506
     11        [36m0.6791[0m        [32m0.7250[0m   0.8650
     12        [36m0.6295[0m        [32m0.7011[0m   0.7355
     13        [36m0.5888[0m        [32m0.6692[0m   0.7873
     14        [36m0.5425[0m        [32m0.6519[0m   0.7971
     15        [36m0.5076[

<class 'train_sklearn.base.RayTrainNeuralNet'>[initialized](
  module_=TabularModel(
    (embeds): ModuleList(
      (0): Embedding(4, 3)
      (1): Embedding(13, 7)
      (2): Embedding(4, 3)
      (3): Embedding(13, 7)
      (4): Embedding(4, 3)
      (5): Embedding(13, 7)
      (6): Embedding(4, 3)
      (7): Embedding(13, 7)
      (8): Embedding(4, 3)
      (9): Embedding(13, 7)
    )
    (emb_drop): Dropout(p=0.0, inplace=False)
    (bn_cont): BatchNorm1d(5, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
    (layers): Sequential(
      (0): LinBnDrop(
        (0): Linear(in_features=55, out_features=100, bias=False)
        (1): ReLU(inplace=True)
        (2): BatchNorm1d(100, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
        (3): Dropout(p=0.01, inplace=False)
      )
      (1): LinBnDrop(
        (0): Linear(in_features=100, out_features=50, bias=False)
        (1): ReLU(inplace=True)
        (2): BatchNorm1d(50, eps=1e-05, momentum=0.1, af

In [17]:
X_pred = reg.predict_proba({
    "x_cat": val_data[cat_cols_names],
    "x_cont": val_data[num_cols_names]
}).to_pandas().idxmax(axis=1)


[2m[36m(BaseWorkerMixin pid=137469)[0m 2021-12-15 21:17:39,975	INFO torch.py:66 -- Setting up process group for: env:// [rank=0, world_size=2]
2021-12-15 21:17:40,077	INFO trainer.py:178 -- Run results will be logged in: /home/ubuntu/ray_results/train_2021-12-15_21-14-33/run_002
[2m[36m(BaseWorkerMixin pid=137558)[0m 2021-12-15 21:17:40,050	INFO torch.py:66 -- Setting up process group for: env:// [rank=1, world_size=2]
[2m[36m(BaseWorkerMixin pid=137469)[0m 2021-12-15 21:17:44,288	INFO torch.py:239 -- Moving model to device: cpu
[2m[36m(BaseWorkerMixin pid=137469)[0m 2021-12-15 21:17:44,289	INFO torch.py:242 -- Wrapping provided model in DDP.
[2m[36m(BaseWorkerMixin pid=137558)[0m 2021-12-15 21:17:44,290	INFO torch.py:239 -- Moving model to device: cpu
[2m[36m(BaseWorkerMixin pid=137558)[0m 2021-12-15 21:17:44,290	INFO torch.py:242 -- Wrapping provided model in DDP.


In [18]:
# >0.95 accuracy is expected

print(f"Final accuracy: {accuracy_score(val_label, X_pred)}")

Final accuracy: 0.9802079168332667
