# Kaggle Learn-and-compete `Poker Rule Induction`

## Data Description

You are provided with 25,010 poker hands in train.csv and 1,000,000 in test.csv. Each hand consists of five cards with a given suit and rank, drawn from a standard deck of 52. Suits and ranks are represented as ordinal categories:

```
S1 “Suit of card #1”
Ordinal (1-4) representing {Hearts, Spades, Diamonds, Clubs}
C1 “Rank of card #1”
Numerical (1-13) representing (Ace, 2, 3, ... , Queen, King)

...

S5 “Suit of card #5”
C5 “Rank of card #5”
```

Each row in the training set has the accompanying class label for the poker hand it comprises. The hands are omitted from the test set and must be predicted by participants. Hands are classified into the following ordinal categories:


```
0: Nothing in hand; not a recognized poker hand 
1: One pair; one pair of equal ranks within five cards
2: Two pairs; two pairs of equal ranks within five cards
3: Three of a kind; three equal ranks within five cards
4: Straight; five cards, sequentially ranked with no gaps
5: Flush; five cards with the same suit
6: Full house; pair + different rank three of a kind
7: Four of a kind; four equal ranks within five cards
8: Straight flush; straight + flush
9: Royal flush; {Ace, King, Queen, Jack, Ten} + flush
```
Note that the Straight flush and Royal flush hands are not representative of
the true domain because they have been over-sampled. The straight flush
is 14.43 times more likely to occur in the training set, while the royal flush is 129.82 times more likely.

In [7]:
import tensorflow as tf
from tensorflow import keras
import numpy as np
import pandas as pd
import matplotlib
import matplotlib.pyplot as plt
from sklearn.preprocessing import OneHotEncoder
from sklearn import preprocessing
from functools import reduce

#### Read data

In [8]:
test_dataset = pd.read_csv('test.csv', delimiter=',')

In [9]:
test_dataset

Unnamed: 0,id,S1,C1,S2,C2,S3,C3,S4,C4,S5,C5
0,1,1,10,2,2,3,3,3,8,1,1
1,2,2,13,3,5,3,7,4,6,1,4
2,3,1,3,1,11,2,8,2,1,2,4
3,4,1,6,3,3,4,7,1,8,3,11
4,5,2,10,3,4,1,6,2,12,2,6
5,6,1,4,3,10,2,11,2,6,1,7
6,7,1,10,3,8,1,4,3,11,3,9
7,8,2,11,3,8,1,1,1,11,2,3
8,9,3,4,1,1,1,3,3,5,3,6
9,10,3,12,2,1,1,3,1,2,3,10


In [10]:
train_dataset = pd.read_csv('train.csv', delimiter=',')

In [11]:
data = train_dataset[['S1', 'C1','S2', 'C2','S3', 'C3','S4', 'C4','S5', 'C5']]
labels = train_dataset[['hand']]

In [12]:
data[0:10]

Unnamed: 0,S1,C1,S2,C2,S3,C3,S4,C4,S5,C5
0,4,9,2,1,2,2,4,7,2,8
1,1,4,3,6,1,12,3,11,2,7
2,1,11,4,1,3,7,4,11,2,1
3,2,9,2,4,3,6,1,9,4,9
4,1,8,2,4,2,11,2,2,2,1
5,2,5,1,5,2,13,2,3,3,13
6,3,10,4,6,1,4,2,13,4,5
7,4,10,3,1,2,13,4,2,4,7
8,3,2,4,10,3,3,4,4,1,9
9,2,7,3,8,4,8,2,13,2,12


In [13]:
labels_vect = tf.keras.utils.to_categorical(labels)

#### One hot encoding

In [14]:
def data_hotencode(data, columns, encoder, mode='transform'):
    encoded = None
    if (mode == 'fit'):
        encoded = encoder.fit_transform(data[columns])
    elif (mode == 'transform'):
        encoded = encoder.transform(data[columns])
        print(encoded.toarray)
    return pd.concat([data.drop(columns, axis=1), pd.DataFrame(encoded.toarray())], axis=1)

def hand_hotencode(hand, columns, encoder):
    df = pd.DataFrame(data=[hand], columns=['S1', 'C1','S2', 'C2','S3', 'C3','S4', 'C4','S5', 'C5'])
    return data_hotencode(df, columns, encoder, 'transform')

In [15]:
def shuffle_columns(data, columns):
    shuffled = data[columns].reindex(np.random.permutation(data[columns].columns), axis=1)
    return pd.concat([data.drop(columns, axis=1), shuffled], axis=1)

In [16]:
encoder = OneHotEncoder()
columns_to_hotencode = ['S1', 'S2', 'S3', 'S4', 'S5']
data_encoded = data_hotencode(data, columns_to_hotencode, encoder, 'fit')
print(data_encoded.shape)

data_encoded.head()

(25010, 25)


Unnamed: 0,C1,C2,C3,C4,C5,0,1,2,3,4,...,10,11,12,13,14,15,16,17,18,19
0,9,1,2,7,8,0.0,0.0,0.0,1.0,0.0,...,0.0,0.0,0.0,0.0,0.0,1.0,0.0,1.0,0.0,0.0
1,4,6,12,11,7,1.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,1.0,0.0,0.0,1.0,0.0,0.0
2,11,1,7,11,1,1.0,0.0,0.0,0.0,0.0,...,1.0,0.0,0.0,0.0,0.0,1.0,0.0,1.0,0.0,0.0
3,9,4,6,9,9,0.0,1.0,0.0,0.0,0.0,...,1.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0
4,8,4,11,2,1,1.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,1.0,0.0,0.0,0.0,1.0,0.0,0.0


#### Sort

In [40]:
def sort(data, columns):
    sorted = data[columns].replace(1, 14) - 1
    sorted.values.sort()
    return pd.concat([sorted, data.drop(columns, axis=1)], axis=1)

In [42]:
columns_to_sort = ['C1', 'C2', 'C3', 'C4', 'C5']
data_sorted = sort(data_encoded, columns_to_sort)
data_sorted.head()

Unnamed: 0,C1,C2,C3,C4,C5,0,1,2,3,4,...,10,11,12,13,14,15,16,17,18,19
0,1,6,7,8,13,0.0,0.0,0.0,1.0,0.0,...,0.0,0.0,0.0,0.0,0.0,1.0,0.0,1.0,0.0,0.0
1,3,5,6,10,11,1.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,1.0,0.0,0.0,1.0,0.0,0.0
2,6,10,10,13,13,1.0,0.0,0.0,0.0,0.0,...,1.0,0.0,0.0,0.0,0.0,1.0,0.0,1.0,0.0,0.0
3,3,5,8,8,8,0.0,1.0,0.0,0.0,0.0,...,1.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0
4,1,3,7,10,13,1.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,1.0,0.0,0.0,0.0,1.0,0.0,0.0


#### Normalization

In [17]:
def normalize(data, columns, scaler):
    data_normalized = pd.DataFrame(scaler.transform(data[columns]))
    to_return = pd.concat([data.drop(columns, axis=1), data_normalized], axis=1)
    to_return.columns = list(range(to_return.shape[1]))
    return to_return

In [43]:
columns_to_normalize = ['C1', 'C2', 'C3', 'C4', 'C5']
scaler = preprocessing.StandardScaler().fit(data_sorted[columns_to_normalize])
data_normalized = normalize(data_sorted, columns_to_normalize, scaler)
data_normalized.head()

Unnamed: 0,0,1,2,3,4,5,6,7,8,9,...,15,16,17,18,19,20,21,22,23,24
0,0.0,0.0,0.0,1.0,0.0,1.0,0.0,0.0,0.0,1.0,...,1.0,0.0,1.0,0.0,0.0,-0.918046,0.534843,-0.006809,-0.536917,0.920795
1,1.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,1.0,0.0,...,0.0,0.0,1.0,0.0,0.0,0.22335,0.091386,-0.427298,0.346262,-0.227272
2,1.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,...,1.0,0.0,1.0,0.0,0.0,1.935445,2.308669,1.254658,1.671031,0.920795
3,0.0,1.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,1.0,0.22335,0.091386,0.41368,-0.536917,-1.949372
4,1.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,1.0,...,0.0,0.0,1.0,0.0,0.0,-0.918046,-0.795527,-0.006809,0.346262,0.920795


#### 52 encoding

In [109]:
def data_52encode(data):
    columns = []
    for i in range(1, 5):
        for j in range(1, 14):
            columns.append(str(i) + str(j))
    
    data_squashed = pd.DataFrame()
    for i in range(1, 6):
        column_S = 'S'+str(i)
        column_C = 'C' + str(i)
        data_squashed[column_S+column_C] = data[column_S].astype(str) + data[column_C].astype(str)
    
    data_encoded = pd.DataFrame(data = np.zeros((data.shape[0], len(columns))), columns=columns)
    
    for i in range(data.shape[0]):
        row = data_squashed.iloc[i, :]
        for col in data_squashed.columns:
            data_encoded[row[col]][i] = 1
    
    return data_encoded

In [110]:
data_52encoded = data_52encode(data)

KeyboardInterrupt: 

In [111]:
def get_hand_df(hand):
    return pd.DataFrame({
        'S1': [hand[0]],
        'C1': [hand[1]],
        'S2': [hand[2]],
        'C2': [hand[3]],
        'S3': [hand[4]],
        'C3': [hand[5]],
        'S4': [hand[6]],
        'C4': [hand[7]],
        'S5': [hand[8]],
        'C5': [hand[9]],
    })

#### Test function

In [44]:
def calc_hand_label(hand):    
    def f(hand):
        suits_hist = reduce(lambda d,x: {**d, **{x: (d.get(x, 0) + 1)}}, hand[:,0], {})
        ranks_hist = reduce(lambda d,x: {**d, **{x: (d.get(x, 0) + 1)}}, hand[:,1], {})
        
        if len(ranks_hist.values()) < 5:
            if len(ranks_hist.values()) == 2:
                if max(list(ranks_hist.values())) == 4:
                    return 7
    #                 print('7: Four of a kind; four equal ranks within five cards')
                else:
                    return 6
    #                 print('6 Full house; pair + different rank three of a kind')
            elif len(ranks_hist.values()) == 3:
                if max(list(ranks_hist.values())) == 3:
                    return 3
    #                 print('3: Three of a kind; three equal ranks within five cards')
                else:
                    return 2
    #                 print('2: Two pairs; two pairs of equal ranks within five cards')
            else:
                return 1
    #             print('1: One pair; one pair of equal ranks within five cards')
        else:
            if len(suits_hist.values()) == 1:
                if max(list(ranks_hist.keys())) -  min(list(ranks_hist.keys())) == 4:
                    if max(list(ranks_hist.keys())) == 13:
                        return 9
    #                     print('9: Royal flush; {Ace, King, Queen, Jack, Ten} + flush')
                    else:
                        return 8
    #                     print('8: Straight flush; straight + flush')
                else:
                    return 5
    #                 print('5: Flush; five cards with the same suit')
            elif max(list(ranks_hist.keys())) -  min(list(ranks_hist.keys())) == 4:
                return 4    
    #             print('4: Straight; five cards, sequentially ranked with no gaps')
            else:
                return 0
                print('0: Nothing in hand; not a recognized poker hand ')
                
    hand = np.array(hand).reshape(5,2)
    res1 = f(hand)
    
    hand[:,1] = list(map(lambda x: 13 if (x == 1) else x-1, hand[:,1]))
    res2 = f(hand)
    
    if res1 == 9 and res2 == 8:
        return res2
    else:
        return max(res1, res2)

#### Encode cards

In [45]:
def card_encode(s, c):
    cards=["A","2","3","4","5","6","7","8","9","10","J","Q","K"]
    suits="♥♦♣♠"
    return cards[c - 1] + suits[s - 1]

def hand_encode(hand):
    return ', '.join(map(lambda x: card_encode(hand[x*2], hand[x*2+1]),range(5)))

print(hand_encode([4,9,2,1,2,2,4,7,2,8]))
print(hand_encode([2,9,2,4,3,6,1,9,4,9]))

9♠, A♦, 2♦, 7♠, 8♦
9♦, 4♦, 6♣, 9♥, 9♠


### Solutions

1. Neural network
2. Random forest

Ousiders:
 - Genetic algorithm
 - Decision tree

### Neural Network

In [46]:
def fit(model, data, labels, columns_to_shuffle, iterations, epochs, batch_size):
    for i in range(iterations):
        print ('-----------------------Iteration:' + str(i+1) +  '-----------------------')
        model.fit(np.array(data), np.array(labels), epochs=epochs, batch_size=batch_size)
        data = shuffle_columns(data, columns_to_shuffle)
    return model

In [78]:
from time import time
from keras.callbacks import TensorBoard

model = keras.Sequential()
#input
model.add(keras.layers.Dense(64, activation='relu', input_shape=(25,)))
#hidden
model.add(keras.layers.Dense(32, activation='relu'))
# Add a softmax layer with 10 output units:
model.add(keras.layers.Dense(10, activation='softmax'))

model.compile(optimizer=tf.train.AdamOptimizer(0.0005),
              loss='categorical_crossentropy',
              metrics=['accuracy'])

tensorboard = TensorBoard(log_dir="logs/{}".format(time()))

model.fit(np.array(data_normalized), np.array(labels_vect), epochs=300, batch_size=512)

Epoch 1/300

Epoch 2/300

Epoch 3/300

Epoch 4/300

Epoch 5/300

Epoch 6/300

Epoch 7/300

Epoch 8/300

Epoch 9/300

Epoch 10/300

Epoch 11/300

Epoch 12/300

Epoch 13/300

Epoch 14/300

Epoch 15/300

Epoch 16/300

Epoch 17/300

Epoch 18/300

Epoch 19/300

Epoch 20/300

Epoch 21/300

Epoch 22/300

Epoch 23/300

Epoch 24/300

Epoch 25/300

Epoch 26/300

Epoch 27/300

Epoch 28/300

Epoch 29/300

Epoch 30/300

Epoch 31/300

Epoch 32/300

Epoch 33/300

Epoch 34/300

Epoch 35/300

Epoch 36/300

Epoch 37/300

Epoch 38/300

Epoch 39/300

Epoch 40/300

Epoch 41/300

Epoch 42/300

Epoch 43/300

Epoch 44/300

Epoch 45/300

Epoch 46/300

Epoch 47/300

Epoch 48/300

Epoch 49/300

Epoch 50/300

Epoch 51/300

Epoch 52/300

Epoch 53/300

Epoch 54/300

Epoch 55/300

Epoch 56/300



Epoch 57/300

Epoch 58/300

Epoch 59/300

Epoch 60/300

Epoch 61/300

Epoch 62/300

Epoch 63/300

Epoch 64/300

Epoch 65/300

Epoch 66/300

Epoch 67/300

Epoch 68/300

Epoch 69/300

Epoch 70/300

Epoch 71/300

Epoch 72/300

Epoch 73/300

Epoch 74/300

Epoch 75/300

Epoch 76/300

Epoch 77/300

Epoch 78/300

Epoch 79/300

Epoch 80/300

Epoch 81/300

Epoch 82/300

Epoch 83/300

Epoch 84/300

Epoch 85/300

Epoch 86/300

Epoch 87/300

Epoch 88/300

Epoch 89/300

Epoch 90/300

Epoch 91/300

Epoch 92/300

Epoch 93/300

Epoch 94/300

Epoch 95/300

Epoch 96/300

Epoch 97/300

Epoch 98/300

Epoch 99/300

Epoch 100/300

Epoch 101/300

Epoch 102/300

Epoch 103/300

Epoch 104/300

Epoch 105/300

Epoch 106/300

Epoch 107/300

Epoch 108/300

Epoch 109/300

Epoch 110/300

Epoch 111/300



Epoch 112/300

Epoch 113/300

Epoch 114/300

Epoch 115/300

Epoch 116/300

Epoch 117/300

Epoch 118/300

Epoch 119/300

Epoch 120/300

Epoch 121/300

Epoch 122/300

Epoch 123/300

Epoch 124/300

Epoch 125/300

Epoch 126/300

Epoch 127/300

Epoch 128/300

Epoch 129/300

Epoch 130/300

Epoch 131/300

Epoch 132/300

Epoch 133/300

Epoch 134/300

Epoch 135/300

Epoch 136/300

Epoch 137/300

Epoch 138/300

Epoch 139/300

Epoch 140/300

Epoch 141/300

Epoch 142/300

Epoch 143/300

Epoch 144/300

Epoch 145/300

Epoch 146/300

Epoch 147/300

Epoch 148/300

Epoch 149/300

Epoch 150/300

Epoch 151/300

Epoch 152/300

Epoch 153/300

Epoch 154/300

Epoch 155/300

Epoch 156/300

Epoch 157/300

Epoch 158/300

Epoch 159/300

Epoch 160/300

Epoch 161/300

Epoch 162/300

Epoch 163/300

Epoch 164/300

Epoch 165/300

Epoch 166/300



Epoch 167/300

Epoch 168/300

Epoch 169/300

Epoch 170/300

Epoch 171/300

Epoch 172/300

Epoch 173/300

Epoch 174/300

Epoch 175/300

Epoch 176/300

Epoch 177/300

Epoch 178/300

Epoch 179/300

Epoch 180/300

Epoch 181/300

Epoch 182/300

Epoch 183/300

Epoch 184/300

Epoch 185/300

Epoch 186/300

Epoch 187/300

Epoch 188/300

Epoch 189/300

Epoch 190/300

Epoch 191/300

Epoch 192/300

Epoch 193/300

Epoch 194/300

Epoch 195/300

Epoch 196/300

Epoch 197/300

Epoch 198/300

Epoch 199/300

Epoch 200/300

Epoch 201/300

Epoch 202/300

Epoch 203/300

Epoch 204/300

Epoch 205/300

Epoch 206/300

Epoch 207/300

Epoch 208/300

Epoch 209/300

Epoch 210/300

Epoch 211/300

Epoch 212/300

Epoch 213/300

Epoch 214/300

Epoch 215/300

Epoch 216/300

Epoch 217/300

Epoch 218/300

Epoch 219/300

Epoch 220/300



Epoch 221/300

Epoch 222/300

Epoch 223/300

Epoch 224/300

Epoch 225/300

Epoch 226/300

Epoch 227/300

Epoch 228/300

Epoch 229/300

Epoch 230/300

Epoch 231/300

Epoch 232/300

Epoch 233/300

Epoch 234/300

Epoch 235/300

Epoch 236/300

Epoch 237/300

Epoch 238/300

Epoch 239/300

Epoch 240/300

Epoch 241/300

Epoch 242/300

Epoch 243/300

Epoch 244/300

Epoch 245/300

Epoch 246/300

Epoch 247/300

Epoch 248/300

Epoch 249/300

Epoch 250/300

Epoch 251/300

Epoch 252/300

Epoch 253/300

Epoch 254/300

Epoch 255/300

Epoch 256/300

Epoch 257/300

Epoch 258/300

Epoch 259/300

Epoch 260/300

Epoch 261/300

Epoch 262/300

Epoch 263/300

Epoch 264/300

Epoch 265/300

Epoch 266/300

Epoch 267/300

Epoch 268/300

Epoch 269/300

Epoch 270/300

Epoch 271/300

Epoch 272/300

Epoch 273/300

Epoch 274/300



Epoch 275/300

Epoch 276/300

Epoch 277/300

Epoch 278/300

Epoch 279/300

Epoch 280/300

Epoch 281/300

Epoch 282/300

Epoch 283/300

Epoch 284/300

Epoch 285/300

Epoch 286/300

Epoch 287/300

Epoch 288/300

Epoch 289/300

Epoch 290/300

Epoch 291/300

Epoch 292/300

Epoch 293/300

Epoch 294/300

Epoch 295/300

Epoch 296/300

Epoch 297/300

Epoch 298/300

Epoch 299/300

Epoch 300/300



<tensorflow.python.keras._impl.keras.callbacks.History at 0x7fec436bc438>

In [48]:
y_test = test_dataset.drop(['id'], axis=1).apply(calc_hand_label, axis=1)

In [73]:
model.save('./model-sort-245')



### Predictions

In [50]:
test_dataset_noid = test_dataset.drop(['id'], axis=1)

In [51]:
test_sample = test_dataset_noid
test_data_sample_encoded = normalize(
    sort(
        data_hotencode(test_sample, columns_to_hotencode, encoder),
        columns_to_sort,
    ), 
    columns_to_normalize, scaler
)

<bound method _cs_matrix.toarray of <1000000x20 sparse matrix of type '<class 'numpy.float64'>'
	with 5000000 stored elements in Compressed Sparse Row format>>


In [80]:
model = keras.models.load_model('./model-sort-245')
model.compile(optimizer=tf.train.AdamOptimizer(0.0005),
              loss='categorical_crossentropy',
              metrics=['accuracy'])



In [6]:
test_hand = [1,9,1,10,1,11,1,12,1,1]
print(hand_encode(test_hand))
prediction = model.predict(np.array(normalize(hand_hotencode(test_hand, columns_to_hotencode, encoder), columns_to_normalize, scaler)))
print(prediction[0])
plt.plot(prediction[0])

9♥, 10♥, J♥, Q♥, A♥


NameError: name 'model' is not defined

In [81]:
test_data_sample_predicted_10 = model.predict(np.array(test_data_sample_encoded))
test_data_sample_predicted = np.array(list(map(np.argmax, test_data_sample_predicted_10)))
test_data_sample_labels = y_test
df = pd.DataFrame({'predicted': test_data_sample_predicted, 'real': test_data_sample_labels})
df = pd.concat([test_sample, df], axis=1)
wrong_answers = df[df['predicted'] != df['real']]
print(wrong_answers.shape)
wrong_answers[0:10]

(245, 12)


Unnamed: 0,S1,C1,S2,C2,S3,C3,S4,C4,S5,C5,predicted,real
5833,3,8,3,13,3,1,3,10,3,12,0,5
9256,2,5,1,2,3,3,1,1,1,4,0,4
9503,1,4,4,1,4,2,3,5,3,3,0,4
13082,1,5,2,2,4,3,2,1,3,4,0,4
14657,4,1,1,5,3,4,3,3,1,2,0,4
15467,2,1,3,1,1,1,4,1,4,12,3,7
20151,3,4,3,1,3,3,2,5,2,2,0,4
24421,1,4,1,2,4,3,4,1,1,5,0,4
27574,1,6,3,2,1,3,1,4,1,5,8,4
28339,1,3,3,5,4,2,2,1,3,4,0,4


In [57]:
wrong_answers[0:10].drop(['predicted', 'real'], axis=1).apply(lambda h: hand_encode(np.array(h)), axis=1)

3902     4♠, 3♦, 2♣, A♣, 5♥
5775     K♥, 3♦, 5♦, 2♦, 4♦
9503     4♥, A♠, 2♠, 5♣, 3♣
13082    5♥, 2♦, 3♠, A♦, 4♣
14327    2♠, 3♦, A♥, 5♦, 4♣
14357    5♣, A♠, 3♠, 4♠, 2♣
14657    A♠, 5♥, 4♣, 3♣, 2♥
15467    A♦, A♣, A♥, A♠, Q♠
20151    4♣, A♣, 3♣, 5♦, 2♦
21815    2♠, 3♥, A♦, 5♣, 4♦
dtype: object