## Data Description

You are provided with 25,010 poker hands in train.csv and 1,000,000 in test.csv. Each hand consists of five cards with a given suit and rank, drawn from a standard deck of 52. Suits and ranks are represented as ordinal categories:

```
S1 “Suit of card #1”
Ordinal (1-4) representing {Hearts, Spades, Diamonds, Clubs}
C1 “Rank of card #1”
Numerical (1-13) representing (Ace, 2, 3, ... , Queen, King)

...

S5 “Suit of card #5”
C5 “Rank of card #5”
```

Each row in the training set has the accompanying class label for the poker hand it comprises. The hands are omitted from the test set and must be predicted by participants. Hands are classified into the following ordinal categories:


```
0: Nothing in hand; not a recognized poker hand 
1: One pair; one pair of equal ranks within five cards
2: Two pairs; two pairs of equal ranks within five cards
3: Three of a kind; three equal ranks within five cards
4: Straight; five cards, sequentially ranked with no gaps
5: Flush; five cards with the same suit
6: Full house; pair + different rank three of a kind
7: Four of a kind; four equal ranks within five cards
8: Straight flush; straight + flush
9: Royal flush; {Ace, King, Queen, Jack, Ten} + flush
```
Note that the Straight flush and Royal flush hands are not representative of
the true domain because they have been over-sampled. The straight flush
is 14.43 times more likely to occur in the training set, while the royal flush is 129.82 times more likely.

In [1]:
import tensorflow as tf
from tensorflow import keras
import numpy as np
import pandas as pd
import matplotlib
import matplotlib.pyplot as plt
from sklearn.preprocessing import OneHotEncoder
from functools import reduce

## Train data

In [2]:
suits = ['S1','S2','S3','S4','S5']
cards = ['C1','C2','C3','C4','C5']

In [3]:
train_dataset = pd.read_csv('train.csv', delimiter=',')

In [4]:
train_dataset.head()

Unnamed: 0,S1,C1,S2,C2,S3,C3,S4,C4,S5,C5,hand
0,4,9,2,1,2,2,4,7,2,8,0
1,1,4,3,6,1,12,3,11,2,7,0
2,1,11,4,1,3,7,4,11,2,1,2
3,2,9,2,4,3,6,1,9,4,9,3
4,1,8,2,4,2,11,2,2,2,1,0


#### Binarization

In [14]:
def binarize(data, column, non_zero_class):
    data = data.copy()
    data[column] = (data[column] == non_zero_class) * 1
    return data

In [15]:
straights_train_dataset = binarize(train_dataset, 'hand', 4)

In [16]:
straights_train_dataset.head()

Unnamed: 0,S1,C1,S2,C2,S3,C3,S4,C4,S5,C5,hand
0,4,9,2,1,2,2,4,7,2,8,0
1,1,4,3,6,1,12,3,11,2,7,0
2,1,11,4,1,3,7,4,11,2,1,0
3,2,9,2,4,3,6,1,9,4,9,0
4,1,8,2,4,2,11,2,2,2,1,0


#### Augmentation

In [21]:
def shuffle_columns(data, columns):
    shuffled = data[columns].reindex(np.random.permutation(data[columns].columns), axis=1)
    return pd.concat([shuffled, data.drop(columns, axis=1)], axis=1)

def repeat(data, times):
    repeated = data.copy()
    for i in range(times - 1):
        repeated = pd.concat([shuffle_columns(repeated, cards), data], axis=0)
    return repeated

def augment(data):
    data_aug = pd.concat([data[cards], data[suits], data['hand']], axis=1)
    counts = data['hand'].value_counts()
    times = counts[0] // counts[1]
    print(times)
    
    repeated = repeat(data[data['hand'] == 1], times)
    data_aug = pd.concat([data_aug, repeated], axis=0)
    data_aug.index = list(range(data_aug.shape[0]))
    
    return data_aug

In [22]:
straights_train_dataset_aug = augment(straights_train_dataset)

267


of pandas will change to not sort by default.

To accept the future behavior, pass 'sort=False'.


  


In [23]:
straights_train_dataset_aug.shape

(49841, 11)

#### One hot encoding

In [26]:
data = straights_train_dataset_aug.drop('hand', axis=1)
train_labels = straights_train_dataset_aug['hand']

In [27]:
def data_hotencode(data, columns, encoder, mode='transform'):
    encoded = None
    if (mode == 'fit'):
        encoded = encoder.fit_transform(data[columns])
    elif (mode == 'transform'):
        encoded = encoder.transform(data[columns])
    return pd.concat([data.drop(columns, axis=1), pd.DataFrame(encoded.toarray())], axis=1)

In [28]:
encoder = OneHotEncoder()
data_encoded = data_hotencode(data, suits, encoder, 'fit')
print(data_encoded.shape)

data_encoded.head()

(49841, 25)


Unnamed: 0,C1,C2,C3,C4,C5,0,1,2,3,4,...,10,11,12,13,14,15,16,17,18,19
0,9,1,2,7,8,0.0,0.0,0.0,1.0,0.0,...,0.0,0.0,0.0,0.0,0.0,1.0,0.0,1.0,0.0,0.0
1,4,6,12,11,7,1.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,1.0,0.0,0.0,1.0,0.0,0.0
2,11,1,7,11,1,1.0,0.0,0.0,0.0,0.0,...,1.0,0.0,0.0,0.0,0.0,1.0,0.0,1.0,0.0,0.0
3,9,4,6,9,9,0.0,1.0,0.0,0.0,0.0,...,1.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0
4,8,4,11,2,1,1.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,1.0,0.0,0.0,0.0,1.0,0.0,0.0


#### Normalization

In [30]:
def normalize(data, columns, mean, std):
    data_normalized = (data[columns] - mean) / std
    to_return = pd.concat([data_normalized, data.drop(columns, axis=1)], axis=1)
    to_return.columns = list(range(to_return.shape[1]))
    return to_return

In [31]:
mean = data_encoded[cards].mean().mean()
std = pd.Series(data_encoded[cards].values.flatten()).std()
data_normalized = normalize(data_encoded, cards, mean, std)
data_normalized.head()

Unnamed: 0,0,1,2,3,4,5,6,7,8,9,...,15,16,17,18,19,20,21,22,23,24
0,0.478215,-1.77551,-1.493795,-0.085216,0.1965,0.0,0.0,0.0,1.0,0.0,...,0.0,0.0,0.0,0.0,0.0,1.0,0.0,1.0,0.0,0.0
1,-0.930363,-0.366932,1.323362,1.041647,-0.085216,1.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,1.0,0.0,0.0,1.0,0.0,0.0
2,1.041647,-1.77551,-0.085216,1.041647,-1.77551,1.0,0.0,0.0,0.0,0.0,...,1.0,0.0,0.0,0.0,0.0,1.0,0.0,1.0,0.0,0.0
3,0.478215,-0.930363,-0.366932,0.478215,0.478215,0.0,1.0,0.0,0.0,0.0,...,1.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0
4,0.1965,-0.930363,1.041647,-1.493795,-1.77551,1.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,1.0,0.0,0.0,0.0,1.0,0.0,0.0


In [32]:
data_normalized.shape

(49841, 25)

#### Test function

In [33]:
def calc_hand_label(hand):    
    def f(hand):
        suits_hist = reduce(lambda d,x: {**d, **{x: (d.get(x, 0) + 1)}}, hand[:,0], {})
        ranks_hist = reduce(lambda d,x: {**d, **{x: (d.get(x, 0) + 1)}}, hand[:,1], {})
        
        if len(ranks_hist.values()) < 5:
            if len(ranks_hist.values()) == 2:
                if max(list(ranks_hist.values())) == 4:
                    return 7
    #                 print('7: Four of a kind; four equal ranks within five cards')
                else:
                    return 6
    #                 print('6 Full house; pair + different rank three of a kind')
            elif len(ranks_hist.values()) == 3:
                if max(list(ranks_hist.values())) == 3:
                    return 3
    #                 print('3: Three of a kind; three equal ranks within five cards')
                else:
                    return 2
    #                 print('2: Two pairs; two pairs of equal ranks within five cards')
            else:
                return 1
    #             print('1: One pair; one pair of equal ranks within five cards')
        else:
            if len(suits_hist.values()) == 1:
                if max(list(ranks_hist.keys())) -  min(list(ranks_hist.keys())) == 4:
                    if max(list(ranks_hist.keys())) == 13:
                        return 9
    #                     print('9: Royal flush; {Ace, King, Queen, Jack, Ten} + flush')
                    else:
                        return 8
    #                     print('8: Straight flush; straight + flush')
                else:
                    return 5
    #                 print('5: Flush; five cards with the same suit')
            elif max(list(ranks_hist.keys())) -  min(list(ranks_hist.keys())) == 4:
                return 4    
    #             print('4: Straight; five cards, sequentially ranked with no gaps')
            else:
                return 0
                print('0: Nothing in hand; not a recognized poker hand ')
                
    hand = np.array(hand).reshape(5,2)
    res1 = f(hand)
    
    hand[:,1] = list(map(lambda x: 13 if (x == 1) else x-1, hand[:,1]))
    res2 = f(hand)
    
    if res1 == 9 and res2 == 8:
        return res2
    else:
        return max(res1, res2)

#### Encode cards

In [34]:
def card_encode(s, c):
    cards=["A","2","3","4","5","6","7","8","9","10","J","Q","K"]
    suits="♥♦♣♠"
    return cards[c - 1] + suits[s - 1]

def hand_encode(hand):
    return ', '.join(map(lambda x: card_encode(hand[x*2], hand[x*2+1]),range(5)))

print(hand_encode([4,9,2,1,2,2,4,7,2,8]))
print(hand_encode([2,9,2,4,3,6,1,9,4,9]))

9♠, A♦, 2♦, 7♠, 8♦
9♦, 4♦, 6♣, 9♥, 9♠


## Neural Network

In [35]:
from time import time
from keras.callbacks import TensorBoard

model = keras.Sequential()

#input
model.add(keras.layers.Dense(100, activation='relu', input_shape=(25,)))
#model.add(keras.layers.Dropout(0.1))

model.add(keras.layers.Dense(200, activation='relu'))
#model.add(keras.layers.Dropout(0.05))
model.add(keras.layers.Dense(50, activation='relu'))
model.add(keras.layers.Dense(25, activation='relu'))

# Add a softmax layer with 10 output units:
model.add(keras.layers.Dense(1, activation='sigmoid'))

model.compile(optimizer=tf.train.AdamOptimizer(0.0005),
              loss='binary_crossentropy',
              metrics=['accuracy'])

model.fit(np.array(data_normalized), np.array(train_labels), epochs=100, batch_size=512)

Instructions for updating:
keep_dims is deprecated, use keepdims instead


Using TensorFlow backend.


Epoch 1/100

Epoch 2/100

Epoch 3/100

Epoch 4/100

Epoch 5/100

Epoch 6/100

Epoch 7/100

Epoch 8/100

Epoch 9/100

Epoch 10/100

Epoch 11/100

Epoch 12/100

Epoch 13/100

Epoch 14/100

Epoch 15/100

Epoch 16/100

Epoch 17/100

Epoch 18/100

Epoch 19/100

Epoch 20/100

Epoch 21/100

Epoch 22/100

Epoch 23/100

Epoch 24/100

Epoch 25/100

Epoch 26/100

Epoch 27/100

Epoch 28/100

Epoch 29/100

Epoch 30/100

Epoch 31/100

Epoch 32/100

Epoch 33/100

Epoch 34/100

Epoch 35/100

Epoch 36/100

Epoch 37/100

Epoch 38/100

Epoch 39/100

Epoch 40/100

Epoch 41/100

Epoch 42/100

Epoch 43/100

Epoch 44/100

Epoch 45/100

Epoch 46/100

Epoch 47/100

Epoch 48/100

Epoch 49/100

Epoch 50/100

Epoch 51/100

Epoch 52/100

Epoch 53/100

Epoch 54/100

Epoch 55/100



Epoch 56/100

Epoch 57/100

Epoch 58/100

Epoch 59/100

Epoch 60/100

Epoch 61/100

Epoch 62/100

Epoch 63/100

Epoch 64/100

Epoch 65/100

Epoch 66/100

Epoch 67/100

Epoch 68/100

Epoch 69/100

Epoch 70/100

Epoch 71/100

Epoch 72/100

Epoch 73/100

Epoch 74/100

Epoch 75/100

Epoch 76/100

Epoch 77/100

Epoch 78/100

Epoch 79/100

Epoch 80/100

Epoch 81/100

Epoch 82/100

Epoch 83/100

Epoch 84/100

Epoch 85/100

Epoch 86/100

Epoch 87/100

Epoch 88/100

Epoch 89/100

Epoch 90/100

Epoch 91/100

Epoch 92/100

Epoch 93/100

Epoch 94/100

Epoch 95/100

Epoch 96/100

Epoch 97/100

Epoch 98/100

Epoch 99/100

Epoch 100/100



<tensorflow.python.keras._impl.keras.callbacks.History at 0x7f23172203c8>

## Test data

In [36]:
test_dataset = pd.read_csv('test.csv', delimiter=',')
test_data = test_dataset.drop('id', axis=1)

In [37]:
test_labels_full = test_data.apply(calc_hand_label, axis=1)

In [38]:
test_labels_4 = (test_labels_full == 4) * 1
test_labels_4.sum()

3885

In [39]:
test_data_encoded = normalize(
    data_hotencode(test_data, suits, encoder),
    cards,
    mean,
    std,
)

## Predictions

In [40]:
pretictions_prob = model.predict(np.array(test_data_encoded))

In [43]:
pretictions = ((pretictions_prob > 0.5) * 1).reshape(pretictions_prob.shape[0])

In [44]:
predictions_data = pd.concat([
    test_dataset,
    pd.DataFrame({'pretictions_prob': pretictions_prob.reshape(pretictions_prob.shape[0])}),
    pd.DataFrame({'predictions': pretictions}),
    pd.DataFrame({'labels': test_labels_4}),
], axis=1)

In [45]:
wrong_answers = predictions_data[predictions_data['predictions'] != predictions_data['labels']]
wrong_answers.shape

(5573, 14)

In [46]:
wrong_answers['labels'].value_counts()

1    3807
0    1766
Name: labels, dtype: int64

In [47]:
wrong_answers

Unnamed: 0,id,S1,C1,S2,C2,S3,C3,S4,C4,S5,C5,pretictions_prob,predictions,labels
6,7,1,10,3,8,1,4,3,11,3,9,9.970978e-01,1,0
34,35,3,13,3,12,2,10,2,1,1,11,3.610290e-20,0,1
75,76,4,7,2,7,2,6,2,9,1,8,9.963707e-01,1,0
85,86,4,7,2,10,4,9,1,12,1,10,9.691641e-01,1,0
426,427,2,12,1,11,1,7,1,8,4,11,9.750966e-01,1,0
917,918,1,3,4,6,4,7,2,5,2,4,2.925200e-10,0,1
1031,1032,3,1,1,4,1,6,4,2,2,3,8.706117e-01,1,0
1146,1147,4,4,1,3,3,6,1,2,2,5,8.906184e-09,0,1
1183,1184,1,5,1,3,1,2,3,5,3,3,8.590872e-01,1,0
1550,1551,1,8,1,6,2,5,3,9,3,7,7.993282e-08,0,1
