# Can we predict a poker hand?
Columns 1 - 10 Represent alternating card suit and then card value

Column 11 represents poker hand found:

- 0: Nothing in hand; not a recognized poker hand 
- 1: One pair; one pair of equal ranks within five cards
- 2: Two pairs; two pairs of equal ranks within five cards
- 3: Three of a kind; three equal ranks within five cards
- 4: Straight; five cards, sequentially ranked with no gaps
- 5: Flush; five cards with the same suit
- 6: Full house; pair + different rank three of a kind
- 7: Four of a kind; four equal ranks within five cards
- 8: Straight flush; straight + flush
- 9: Royal flush; {Ace, King, Queen, Jack, Ten} + flush

In [1]:
import pandas as pd
from pandas.plotting import scatter_matrix
import matplotlib.pyplot as plt
from sklearn import model_selection
from sklearn.metrics import classification_report
from sklearn.metrics import confusion_matrix
from sklearn.metrics import accuracy_score
from sklearn.linear_model import LogisticRegression
from sklearn.tree import DecisionTreeClassifier
from sklearn.neighbors import KNeighborsClassifier
from sklearn.discriminant_analysis import LinearDiscriminantAnalysis
from sklearn.naive_bayes import GaussianNB
from sklearn.svm import SVC

In [2]:
url = 'https://archive.ics.uci.edu/ml/machine-learning-databases/poker/poker-hand-testing.data'

In [3]:
dataset = pd.read_csv(
        url,
        header=None,
        names=['suit_1', 'val_1','suit_2', 'val_2','suit_3', 'val_3','suit_4', 'val_4','suit_5', 'val_5', 'hand']
    )

In [4]:
print(dataset.shape)

(1000000, 11)


In [5]:
dataset.head(10)


Unnamed: 0,suit_1,val_1,suit_2,val_2,suit_3,val_3,suit_4,val_4,suit_5,val_5,hand
0,1,1,1,13,2,4,2,3,1,12,0
1,3,12,3,2,3,11,4,5,2,5,1
2,1,9,4,6,1,4,3,2,3,9,1
3,1,4,3,13,2,13,2,1,3,6,1
4,3,10,2,7,1,2,2,11,4,9,0
5,1,3,4,5,3,4,1,12,4,6,0
6,2,6,4,11,2,3,4,9,1,7,0
7,3,2,4,9,3,7,4,3,4,5,0
8,4,4,3,13,1,8,3,9,3,10,0
9,1,9,3,8,4,4,1,7,3,5,0


In [6]:
array = dataset.values
X = array[:10000, 0:10]
Y = array[:10000, 10]
# X = array[:, 0:10]
# Y = array[:, 10]

validation_size = 0.20
training_seed = 7
X_train, X_validation, Y_train, Y_validation = model_selection.train_test_split(X, Y, test_size=validation_size, random_state=training_seed)


In [7]:
from collections import Counter

counts = Counter()
for hand in Y:
    counts[hand] += 1
counts
# See above for explination of keys in the dictionary

Counter({0: 5092, 1: 4096, 2: 506, 3: 220, 5: 27, 4: 38, 6: 15, 7: 6})

In [8]:
from sklearn.svm import SVC
from sklearn.model_selection import cross_validate

clf = SVC(kernel='linear', C=1)
cv_results = cross_validate(clf, X_train, Y_train, cv=5, return_estimator=True)


In [17]:
test_hands = [
    [1,6,2,6,4,12,3,6,1,12,6],
    [4,12,4,7,1,5,2,3,3,13,0],
    [1,9,2,1,3,11,1,3,1,10,0],
    [4,5,3,12,4,10,3,10,2,12,2]
]

for i in range(0, len(test_hands)):
    actual = cv_results['estimator'][0].predict([test_hands[i][0:10]])
    expected = test_hands[i][10]
    print(f"Expected: {expected}, Actual {actual}")


Expected: 6, Actual [0]
Expected: 0, Actual [0]
Expected: 0, Actual [0]
Expected: 2, Actual [0]
