In [1]:
import pandas as pd

import lightgbm as lgb

from sklearn.model_selection import train_test_split
from sklearn.metrics import accuracy_score
from sklearn.metrics import classification_report
from sklearn.metrics import confusion_matrix

In [2]:
column_names = ["Suit of card #1", "Rank of card #1", "Suit of card #2", "Rank of card #2", "Suit of card #3", "Rank of card #3", "Suit of card #4", "Rank of card #4", "Suit of card #5", "Rank of card #5", "Poker Hand"]
poker_train = pd.read_csv('poker-training.data', header=None, names=column_names)
poker_test = pd.read_csv('poker-testing.data', header=None, names=column_names)
print("First 5 testing data")
poker_test.head()

First 5 testing data


Unnamed: 0,Suit of card #1,Rank of card #1,Suit of card #2,Rank of card #2,Suit of card #3,Rank of card #3,Suit of card #4,Rank of card #4,Suit of card #5,Rank of card #5,Poker Hand
0,1,1,1,13,2,4,2,3,1,12,0
1,3,12,3,2,3,11,4,5,2,5,1
2,1,9,4,6,1,4,3,2,3,9,1
3,1,4,3,13,2,13,2,1,3,6,1
4,3,10,2,7,1,2,2,11,4,9,0


In [3]:
print("First 5 training data")
poker_train.head()

First 5 training data


Unnamed: 0,Suit of card #1,Rank of card #1,Suit of card #2,Rank of card #2,Suit of card #3,Rank of card #3,Suit of card #4,Rank of card #4,Suit of card #5,Rank of card #5,Poker Hand
0,1,10,1,11,1,13,1,12,1,1,9
1,2,11,2,13,2,10,2,12,2,1,9
2,3,12,3,11,3,13,3,10,3,1,9
3,4,10,4,11,4,1,4,13,4,12,9
4,4,1,4,13,4,12,4,11,4,10,9


In [4]:
# Assuming you have already loaded the dataset using Pandas and assigned it to a DataFrame called 'df'
X = poker_train.drop(columns=["Poker Hand"])  # Features (all columns except the target column 'label')
Y = poker_train["Poker Hand"]  # Target column
X_train, X_test, Y_train, Y_test = train_test_split(X, Y, test_size=0.2, random_state=0)


In [7]:
#LightBoost
# Create the LightGBM classifier model
model = lgb.LGBMClassifier(learning_rate=0.1, num_leaves=31, n_estimators=100, max_depth=6, objective='multiclass')

# Fit the model on the training data
model.fit(X_train, Y_train)


You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 95
[LightGBM] [Info] Number of data points in the train set: 20008, number of used features: 10
[LightGBM] [Info] Start training from score -0.697856
[LightGBM] [Info] Start training from score -0.853716
[LightGBM] [Info] Start training from score -3.050588
[LightGBM] [Info] Start training from score -3.861255
[LightGBM] [Info] Start training from score -5.613428
[LightGBM] [Info] Start training from score -6.012067
[LightGBM] [Info] Start training from score -6.608051
[LightGBM] [Info] Start training from score -8.112128
[LightGBM] [Info] Start training from score -8.517593
[LightGBM] [Info] Start training from score -8.517593


In [8]:
# Predict the target labels for the test data
y_pred = model.predict(X_test)




In [10]:
accuracy = accuracy_score(Y_test, y_pred)
print(f"Accuracy: {accuracy}")

Accuracy: 0.5609756097560976


In [11]:
model.score(X_test, Y_test)



0.5609756097560976

In [12]:
score = classification_report(Y_test, y_pred)
print("Score:", score)

Score:               precision    recall  f1-score   support

           0       0.64      0.69      0.66      2536
           1       0.53      0.50      0.51      2079
           2       0.12      0.05      0.07       259
           3       0.10      0.05      0.07        92
           4       0.04      0.05      0.04        20
           5       0.04      0.40      0.08         5
           6       0.00      0.00      0.00         9
           7       0.00      0.00      0.00         0
           8       0.00      0.00      0.00         1
           9       0.00      0.00      0.00         1

    accuracy                           0.56      5002
   macro avg       0.15      0.17      0.14      5002
weighted avg       0.55      0.56      0.56      5002



  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))


In [14]:
cm = confusion_matrix(Y_test, y_pred)
print("Matrix:" , cm )

Matrix: [[1740  682   33   15   10   21   10   19    3    3]
 [ 881 1045   54   26   13   22   15   17    2    4]
 [  64  166   13    2    1    1    4    7    1    0]
 [  12   66    4    5    2    1    1    1    0    0]
 [   4   12    1    1    1    0    1    0    0    0]
 [   1    2    0    0    0    2    0    0    0    0]
 [   1    7    1    0    0    0    0    0    0    0]
 [   0    0    0    0    0    0    0    0    0    0]
 [   0    1    0    0    0    0    0    0    0    0]
 [   1    0    0    0    0    0    0    0    0    0]]


In [17]:
#WITH SAMPLING 

from imblearn.over_sampling import SMOTE

# Instantiate SMOTE with desired settings
smote = SMOTE(sampling_strategy='auto', k_neighbors=3, random_state=42)

# Apply SMOTE to training data
X_train_resampled, y_train_resampled = smote.fit_resample(X_train, Y_train)


In [18]:
from lightgbm import LGBMClassifier
from sklearn.metrics import accuracy_score

# Instantiate the LightGBM model
model = LGBMClassifier(n_estimators=100, learning_rate=0.1, max_depth=6)

# Fit the model on the resampled training data
model.fit(X_train_resampled, y_train_resampled)

# Predict on test data
y_pred = model.predict(X_test)

# Calculate accuracy
accuracy = accuracy_score(Y_test, y_pred)


You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 95
[LightGBM] [Info] Number of data points in the train set: 99570, number of used features: 10
[LightGBM] [Info] Start training from score -2.302585
[LightGBM] [Info] Start training from score -2.302585
[LightGBM] [Info] Start training from score -2.302585
[LightGBM] [Info] Start training from score -2.302585
[LightGBM] [Info] Start training from score -2.302585
[LightGBM] [Info] Start training from score -2.302585
[LightGBM] [Info] Start training from score -2.302585
[LightGBM] [Info] Start training from score -2.302585
[LightGBM] [Info] Start training from score -2.302585
[LightGBM] [Info] Start training from score -2.302585


In [19]:
# Calculate accuracy
accuracy = accuracy_score(Y_test, y_pred)

print(accuracy)

0.5255897640943623


In [21]:
model.score(X_test, Y_test)



0.5255897640943623

In [23]:
score = classification_report(Y_test, y_pred)
print("Score:", score)

Score:               precision    recall  f1-score   support

           0       0.68      0.68      0.68      2536
           1       0.55      0.39      0.46      2079
           2       0.10      0.22      0.14       259
           3       0.08      0.25      0.13        92
           4       0.05      0.20      0.08        20
           5       0.29      1.00      0.45         5
           6       0.00      0.00      0.00         9
           7       0.00      0.00      0.00         0
           8       0.00      0.00      0.00         1
           9       0.00      0.00      0.00         1

    accuracy                           0.53      5002
   macro avg       0.18      0.27      0.19      5002
weighted avg       0.58      0.53      0.55      5002



  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))


In [24]:
cm = confusion_matrix(Y_test, y_pred)
print("Matrix:" , cm )

Matrix: [[1737  469  197   80   20    7   18    2    4    2]
 [ 759  803  310  133   44    4   23    0    1    2]
 [  32  127   57   27    9    1    5    1    0    0]
 [  14   41    9   23    3    0    0    2    0    0]
 [   2    5    2    5    4    0    2    0    0    0]
 [   0    0    0    0    0    5    0    0    0    0]
 [   0    3    3    3    0    0    0    0    0    0]
 [   0    0    0    0    0    0    0    0    0    0]
 [   0    0    0    0    0    0    1    0    0    0]
 [   0    1    0    0    0    0    0    0    0    0]]
