In [None]:
import pandas as pd
import matplotlib.pyplot as plt
from matplotlib import rcParams
rcParams['figure.figsize'] = 10,7
import pickle
import numpy as np
%matplotlib inline

In [None]:
from IPython.core.display import HTML
css = open('poker-data-css-style.css').read()
HTML('<style>{}</style>'.format(css))

In [None]:
features = ['S1', 'C1', 'S2', 'C2', 'S3', 'C3', 'S4', 'C4', 'S5', 'C5', 'CLASS']
# Dataset: https://archive.ics.uci.edu/ml/datasets/Poker+Hand
train_data = pd.read_csv('data/poker/poker-hand-training.data', names=features)
test_data = pd.read_csv('data/poker/poker-hand-testing.data', names=features)


In [None]:
train_data.head(10)

In [None]:
print(len(train_data.index))
print(len(train_data.columns))

In [None]:
train_data.tail(10)

In [None]:
# Changing data representation to match this project's conventions
# data['S1'].replace([1,2,3,4], ['h','s','d','c'],inplace=True)
# data['S2'].replace([1,2,3,4], ['h','s','d','c'],inplace=True)
# data['S3'].replace([1,2,3,4], ['h','s','d','c'],inplace=True)
# data['S4'].replace([1,2,3,4], ['h','s','d','c'],inplace=True)
# data['S5'].replace([1,2,3,4], ['h','s','d','c'],inplace=True)

# data['C1'].replace([1,11,12,13], ['A','J','Q','K'],inplace=True)
# data['C2'].replace([1,11,12,13], ['A','J','Q','K'],inplace=True)
# data['C3'].replace([1,11,12,13], ['A','J','Q','K'],inplace=True)
# data['C4'].replace([1,11,12,13], ['A','J','Q','K'],inplace=True)
# data['C5'].replace([1,11,12,13], ['A','J','Q','K'],inplace=True)
# data.head(10)

In [None]:
# Reorder features
# cols = data.columns.tolist()
# cols = ['C1', 'S1', 'C2', 'S2', 'C3', 'S3', 'C4', 'S4', 'C5', 'S5', 'CLASS']
# data = data[cols]
# data.head(10)

In [None]:
poker_hands = [0, 1, 2, 3, 4, 5, 6, 7, 8, 9]
hand_name = {
    0: 'Nothing in hand',
    1: 'One pair',
    2: 'Two pairs',
    3: 'Three of a kind',
    4: 'Straight',
    5: 'Flush',
    6: 'Full house',
    7: 'Four of a kind',
    8: 'Straight flush',
    9: 'Royal flush',
}
nb_classes = 10  # we have 10 classes of poker hands
cls = {}
for i in range(nb_classes):
    cls[i] = len(train_data[train_data.CLASS==i])
print(cls)
for i in hand_name.keys():
    print("%s: %d" % (hand_name[i], cls[i]))

In [None]:
plt.bar(range(9), [cls[i] for i in range(9)], align='center')
plt.xlabel('Poker hand id')
plt.ylabel('Number of instances')
# Data is extremely inbalanced because Royal Flush etc. are very rare hands in poker

In [None]:
# Parse data as-is. Fine for NeuralNet models, results in bad accuracy in Random Forests.
X_train = data.iloc[:,0:10].values
y_train = data.iloc[:,10].values

In [None]:
# Adding more useful features for random forest classification model.
# Credit: https://github.com/CodeMySky/poker-hands
from tqdm._tqdm_notebook import tqdm_notebook
tqdm_notebook.pandas(desc="Bar")
def transform(data):
    original_data = data.iloc[:, 0:-1]
    label = data.iloc[:, -1]
    card_value_std = original_data.iloc[:, 1:10:2].std(axis=1)
    card_type_count = original_data.iloc[:, 0:10:2].progress_apply(pd.value_counts, axis=1).fillna(0)
    card_type_count = card_type_count.progress_apply(pd.value_counts, axis=1).fillna(0)
    card_value_count = original_data.iloc[:, 1:10:2].progress_apply(pd.value_counts, axis=1).fillna(0)
    card_value_count = card_value_count.progress_apply(pd.value_counts, axis=1).fillna(0)
    

    return pd.concat([card_type_count, card_value_count, card_value_std], axis=1), label

In [None]:
X_train, y_train = transform(train_data)
X_test, y_test = transform(test_data)

In [None]:
pickle_out = open("data/poker/new features/X_train.pickle", "wb")
pickle.dump(X_train, pickle_out)
pickle_out.close()
pickle_out = open("data/poker/new features/y_train.pickle", "wb")
pickle.dump(y_train, pickle_out)
pickle_out.close()
pickle_out = open("data/poker/new features/X_test.pickle", "wb")
pickle.dump(X_test, pickle_out)
pickle_out.close()
pickle_out = open("data/poker/new features/y_test.pickle", "wb")
pickle.dump(y_test, pickle_out)
pickle_out.close()

In [None]:
print(X_train[0][0])