# Description

We will use the preprocessed poker data and compare regression accuracies/errors for Tabnet and Node vs XGBoost

In [10]:
import numpy as np
import pandas as pd
import scipy as scp
from xgboost import XGBClassifier
import os
from sklearn.model_selection import train_test_split
from sklearn.metrics import mean_squared_error as MSE

ROOT = os.path.dirname(os.getcwd())
train_data = pd.read_csv(os.path.join(ROOT, 'data', 'pokerhands', 'train.csv'))
test_data = pd.read_csv(os.path.join(ROOT, 'data', 'pokerhands', 'test.csv'))
test_data_correct = pd.read_csv(os.path.join(ROOT, 'data', 'pokerhands', 'test_correct.csv'))

test_data = test_data.head(10000)
test_data_correct = test_data_correct.head(10000)

## XGBoost

In [7]:
x = train_data.drop('hand', axis =1)
y = train_data['hand']
x = np.array(x).astype(int)

In [8]:
model = XGBClassifier()
model.fit(x, y)
x_test = test_data
x_test = x_test.drop('id', axis = 1)

x_test = np.array(x_test).astype(int)
predictions = model.predict(x_test).astype(int)
output = pd.DataFrame({'id': test_data.id, 'hand_predict': predictions})

output = pd.concat([output, test_data_correct], axis=1)
correct = output[(output['hand'] == output['hand_predict'])].count()
print("XGBoost:", correct / 10000)

XGBoost: id              0.7298
hand_predict    0.7298
id              0.7298
hand            0.7298
dtype: float64


## TabNet

In [32]:
from pytorch_tabnet.tab_model import TabNetRegressor

X_train, X_valid, y_train, y_valid = train_test_split(x, y, test_size=0.2, random_state=123)
model = TabNetRegressor()  #TabNetRegressor()

model.fit( 
    X_train, y_train.to_numpy().reshape(-1, 1),
    eval_set=[(X_valid, y_valid.to_numpy().reshape(-1, 1))],
    eval_metric=['mse'],
    max_epochs = 250
)

Device used : cpu
epoch 0  | loss: 0.8835  | val_0_mse: 2.11242 |  0:00:03s
epoch 1  | loss: 0.63735 | val_0_mse: 0.69461 |  0:00:07s
epoch 2  | loss: 0.61669 | val_0_mse: 0.67415 |  0:00:11s
epoch 3  | loss: 0.61495 | val_0_mse: 0.64121 |  0:00:15s
epoch 4  | loss: 0.61243 | val_0_mse: 0.63444 |  0:00:18s
epoch 5  | loss: 0.60976 | val_0_mse: 0.62683 |  0:00:22s
epoch 6  | loss: 0.6073  | val_0_mse: 0.62563 |  0:00:26s
epoch 7  | loss: 0.60535 | val_0_mse: 0.61957 |  0:00:29s
epoch 8  | loss: 0.60081 | val_0_mse: 0.61905 |  0:00:33s
epoch 9  | loss: 0.59961 | val_0_mse: 0.62124 |  0:00:36s
epoch 10 | loss: 0.59608 | val_0_mse: 0.61493 |  0:00:40s
epoch 11 | loss: 0.59387 | val_0_mse: 0.61418 |  0:00:43s
epoch 12 | loss: 0.58869 | val_0_mse: 0.61003 |  0:00:47s
epoch 13 | loss: 0.58855 | val_0_mse: 0.60166 |  0:00:51s
epoch 14 | loss: 0.58574 | val_0_mse: 0.60383 |  0:00:54s
epoch 15 | loss: 0.58344 | val_0_mse: 0.60886 |  0:00:58s
epoch 16 | loss: 0.58436 | val_0_mse: 0.6013  |  0:01:

In [34]:
predictions = model.predict(x_test).astype(int)
output = pd.DataFrame({'id': test_data.id, 'hand_predict': predictions.squeeze()})

output = pd.concat([output, test_data_correct], axis=1)
correct = output[(output['hand'] == output['hand_predict'])].count()
print("TabNet:", correct / 10000)

TabNet: id              0.5342
hand_predict    0.5342
id              0.5342
hand            0.5342
dtype: float64


## NODE