In [1]:
import pandas as pd

import xgboost as xgb 

from sklearn.model_selection import train_test_split
from sklearn.metrics import accuracy_score
from sklearn.metrics import classification_report
from sklearn.metrics import confusion_matrix

In [3]:
column_names = ["Suit of card #1", "Rank of card #1", "Suit of card #2", "Rank of card #2", "Suit of card #3", "Rank of card #3", "Suit of card #4", "Rank of card #4", "Suit of card #5", "Rank of card #5", "Poker Hand"]
poker_train = pd.read_csv('poker-training.data', header=None, names=column_names)
poker_test = pd.read_csv('poker-testing.data', header=None, names=column_names)
print("First 5 testing data")
poker_test.head()

First 5 testing data


Unnamed: 0,Suit of card #1,Rank of card #1,Suit of card #2,Rank of card #2,Suit of card #3,Rank of card #3,Suit of card #4,Rank of card #4,Suit of card #5,Rank of card #5,Poker Hand
0,1,1,1,13,2,4,2,3,1,12,0
1,3,12,3,2,3,11,4,5,2,5,1
2,1,9,4,6,1,4,3,2,3,9,1
3,1,4,3,13,2,13,2,1,3,6,1
4,3,10,2,7,1,2,2,11,4,9,0


In [4]:
print("First 5 training data")
poker_train.head()

First 5 training data


Unnamed: 0,Suit of card #1,Rank of card #1,Suit of card #2,Rank of card #2,Suit of card #3,Rank of card #3,Suit of card #4,Rank of card #4,Suit of card #5,Rank of card #5,Poker Hand
0,1,10,1,11,1,13,1,12,1,1,9
1,2,11,2,13,2,10,2,12,2,1,9
2,3,12,3,11,3,13,3,10,3,1,9
3,4,10,4,11,4,1,4,13,4,12,9
4,4,1,4,13,4,12,4,11,4,10,9


In [6]:
# Assuming you have already loaded the dataset using Pandas and assigned it to a DataFrame called 'df'
X = poker_train.drop(columns=["Poker Hand"])  # Features (all columns except the target column 'label')
Y = poker_train["Poker Hand"]  # Target column
X_train, X_test, Y_train, Y_test = train_test_split(X, Y, test_size=0.2, random_state=0)


In [8]:
#XGBOOST
# Create the XGBoost classifier model
model = xgb.XGBClassifier(learning_rate=0.1, n_estimators=100, max_depth=6, objective='multi:softmax')

# Fit the model on the training data
model.fit(X_train, Y_train)


In [9]:
# Predict the target labels for the test data
y_pred = model.predict(X_test)


In [11]:
accuracy = accuracy_score(Y_test, y_pred)
print("Accuracy: ",accuracy)

Accuracy:  0.6615353858456617


In [12]:
model.score(X_test, Y_test)

0.6615353858456617

In [14]:
score = classification_report(Y_test, y_pred)
print("Score:", score)

Score:               precision    recall  f1-score   support

           0       0.69      0.85      0.76      2536
           1       0.62      0.56      0.58      2079
           2       0.27      0.01      0.02       259
           3       0.50      0.01      0.02        92
           4       0.00      0.00      0.00        20
           5       0.00      0.00      0.00         5
           6       0.00      0.00      0.00         9
           8       0.00      0.00      0.00         1
           9       0.00      0.00      0.00         1

    accuracy                           0.66      5002
   macro avg       0.23      0.16      0.15      5002
weighted avg       0.63      0.66      0.63      5002



  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))


In [16]:
cm = confusion_matrix(Y_test, y_pred)
print("Matrix:" , cm )

Matrix: [[2150  386    0    0    0    0    0    0    0]
 [ 918 1155    6    0    0    0    0    0    0]
 [  29  227    3    0    0    0    0    0    0]
 [   8   82    1    1    0    0    0    0    0]
 [   2   18    0    0    0    0    0    0    0]
 [   5    0    0    0    0    0    0    0    0]
 [   0    7    1    1    0    0    0    0    0]
 [   0    1    0    0    0    0    0    0    0]
 [   1    0    0    0    0    0    0    0    0]]


In [20]:
#WITH SAMPLING
from imblearn.over_sampling import SMOTE

# Instantiate SMOTE with desired settings
smote = SMOTE(sampling_strategy='auto',k_neighbors=3, random_state=42)

# Apply SMOTE to training data
X_train_resampled, y_train_resampled = smote.fit_resample(X_train, Y_train)


In [22]:
from xgboost import XGBClassifier

# Instantiate the XGBoost model
model = XGBClassifier(n_estimators=100, learning_rate=0.1, max_depth=6)

# Fit the model on the resampled training data
model.fit(X_train_resampled, y_train_resampled)


In [24]:
# Predict on test data
y_pred = model.predict(X_test)

# Calculate accuracy
accuracy = accuracy_score(Y_test, y_pred)
print(accuracy)

0.4880047980807677


In [25]:
model.score(X_test, Y_test)

0.4880047980807677

In [26]:
score = classification_report(Y_test, y_pred)
print("Score:", score)

Score:               precision    recall  f1-score   support

           0       0.67      0.63      0.65      2536
           1       0.54      0.37      0.44      2079
           2       0.10      0.22      0.13       259
           3       0.08      0.23      0.12        92
           4       0.05      0.30      0.08        20
           5       0.06      0.60      0.12         5
           6       0.00      0.00      0.00         9
           7       0.00      0.00      0.00         0
           8       0.00      0.00      0.00         1
           9       0.00      0.00      0.00         1

    accuracy                           0.49      5002
   macro avg       0.15      0.23      0.15      5002
weighted avg       0.57      0.49      0.52      5002



  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))


In [27]:
cm = confusion_matrix(Y_test, y_pred)
print("Matrix:" , cm )

Matrix: [[1593  486  228   89   40   28   49   10   11    2]
 [ 744  762  289  129   67   13   49   12   11    3]
 [  38  114   56   23   13    3   10    1    1    0]
 [  16   36    9   21    6    0    1    2    1    0]
 [   2    2    2    6    6    0    2    0    0    0]
 [   1    0    0    0    1    3    0    0    0    0]
 [   0    4    3    2    0    0    0    0    0    0]
 [   0    0    0    0    0    0    0    0    0    0]
 [   0    0    0    0    0    0    1    0    0    0]
 [   1    0    0    0    0    0    0    0    0    0]]
