# Kaggle Learn-and-compete `Poker Rule Induction`

## Data Description

You are provided with 25,010 poker hands in train.csv and 1,000,000 in test.csv. Each hand consists of five cards with a given suit and rank, drawn from a standard deck of 52. Suits and ranks are represented as ordinal categories:

```
S1 “Suit of card #1”
Ordinal (1-4) representing {Hearts, Spades, Diamonds, Clubs}
C1 “Rank of card #1”
Numerical (1-13) representing (Ace, 2, 3, ... , Queen, King)

...

S5 “Suit of card #5”
C5 “Rank of card #5”
```

Each row in the training set has the accompanying class label for the poker hand it comprises. The hands are omitted from the test set and must be predicted by participants. Hands are classified into the following ordinal categories:


```
0: Nothing in hand; not a recognized poker hand 
1: One pair; one pair of equal ranks within five cards
2: Two pairs; two pairs of equal ranks within five cards
3: Three of a kind; three equal ranks within five cards
4: Straight; five cards, sequentially ranked with no gaps
5: Flush; five cards with the same suit
6: Full house; pair + different rank three of a kind
7: Four of a kind; four equal ranks within five cards
8: Straight flush; straight + flush
9: Royal flush; {Ace, King, Queen, Jack, Ten} + flush
```
Note that the Straight flush and Royal flush hands are not representative of
the true domain because they have been over-sampled. The straight flush
is 14.43 times more likely to occur in the training set, while the royal flush is 129.82 times more likely.

In [1]:
import tensorflow as tf
from tensorflow import keras
import numpy as np
import pandas as pd
import matplotlib
import matplotlib.pyplot as plt
from sklearn.preprocessing import OneHotEncoder
from sklearn import preprocessing
from functools import reduce
from itertools import permutations

#### Read data

In [2]:
test_dataset = pd.read_csv('test.csv', delimiter=',')

In [3]:
test_dataset

Unnamed: 0,id,S1,C1,S2,C2,S3,C3,S4,C4,S5,C5
0,1,1,10,2,2,3,3,3,8,1,1
1,2,2,13,3,5,3,7,4,6,1,4
2,3,1,3,1,11,2,8,2,1,2,4
3,4,1,6,3,3,4,7,1,8,3,11
4,5,2,10,3,4,1,6,2,12,2,6
5,6,1,4,3,10,2,11,2,6,1,7
6,7,1,10,3,8,1,4,3,11,3,9
7,8,2,11,3,8,1,1,1,11,2,3
8,9,3,4,1,1,1,3,3,5,3,6
9,10,3,12,2,1,1,3,1,2,3,10


In [4]:
train_dataset = pd.read_csv('train.csv', delimiter=',')

In [5]:
train_dataset['hand'].value_counts()

0    12493
1    10599
2     1206
3      513
4       93
5       54
6       36
7        6
9        5
8        5
Name: hand, dtype: int64

##### Data augmentation

In [6]:
cards = ['C1','C2','C3','C4','C5']
suits = ['S1','S2','S3','S4','S5']

def shuffle_columns(data, columns):
    shuffled = data[columns].reindex(np.random.permutation(data[columns].columns), axis=1)
    return pd.concat([shuffled, data.drop(columns, axis=1)], axis=1)

def repeat(data, times):
    repeated = data.copy()
    for i in range(times - 1):
        repeated = pd.concat([shuffle_columns(repeated, cards), data], axis=0)
    return repeated

def data_augmentation(data):
    data_aug = pd.concat([data[cards], data[suits], data['hand']], axis=1)
    counts = data['hand'].value_counts()
    for value, count in enumerate(counts):
        if value == 0:
            continue
        else:
            times = counts[0]//count
            
        if (times == 1):
            continue
            
        repeated = repeat(data[data['hand'] == value], times)
        data_aug = pd.concat([data_aug, repeated], axis=0)
    data_aug.index = list(range(data_aug.shape[0]))
    return data_aug

In [7]:
train_dataset_aug = data_augmentation(train_dataset)

of pandas will change to not sort by default.

To accept the future behavior, pass 'sort=False'.


  # This is added back by InteractiveShellApp.init_path()


In [8]:
train_dataset_aug['hand'].value_counts()

2    13266
3    12825
4    12555
6    12528
5    12528
7    12498
9    12495
8    12495
0    12493
1    10599
Name: hand, dtype: int64

In [9]:
train_dataset_aug.shape

(124282, 11)

In [10]:
data = train_dataset_aug[np.concatenate((cards, suits))]
labels = train_dataset_aug[['hand']]

In [11]:
data[0:10]

Unnamed: 0,C1,C2,C3,C4,C5,S1,S2,S3,S4,S5
0,9,1,2,7,8,4,2,2,4,2
1,4,6,12,11,7,1,3,1,3,2
2,11,1,7,11,1,1,4,3,4,2
3,9,4,6,9,9,2,2,3,1,4
4,8,4,11,2,1,1,2,2,2,2
5,5,5,13,3,13,2,1,2,2,3
6,10,6,4,13,5,3,4,1,2,4
7,10,1,13,2,7,4,3,2,4,4
8,2,10,3,4,9,3,4,3,4,1
9,7,8,8,13,12,2,3,4,2,2


In [12]:
labels_vect = tf.keras.utils.to_categorical(labels)

#### One hot encoding

In [13]:
def data_hotencode(data, columns, encoder, mode='transform'):
    encoded = None
    if (mode == 'fit'):
        encoded = encoder.fit_transform(data[columns])
    elif (mode == 'transform'):
        encoded = encoder.transform(data[columns])
    return pd.concat([data.drop(columns, axis=1), pd.DataFrame(encoded.toarray())], axis=1)

def hand_hotencode(hand, columns, encoder):
    df = pd.DataFrame(data=[hand], columns=['S1', 'C1','S2', 'C2','S3', 'C3','S4', 'C4','S5', 'C5'])
    return data_hotencode(df, columns, encoder, 'transform')

In [14]:
encoder = OneHotEncoder()
columns_to_hotencode = ['S1', 'S2', 'S3', 'S4', 'S5']
data_encoded = data_hotencode(data, columns_to_hotencode, encoder, 'fit')
print(data_encoded.shape)

data_encoded.head()

(124282, 25)


Unnamed: 0,C1,C2,C3,C4,C5,0,1,2,3,4,...,10,11,12,13,14,15,16,17,18,19
0,9,1,2,7,8,0.0,0.0,0.0,1.0,0.0,...,0.0,0.0,0.0,0.0,0.0,1.0,0.0,1.0,0.0,0.0
1,4,6,12,11,7,1.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,1.0,0.0,0.0,1.0,0.0,0.0
2,11,1,7,11,1,1.0,0.0,0.0,0.0,0.0,...,1.0,0.0,0.0,0.0,0.0,1.0,0.0,1.0,0.0,0.0
3,9,4,6,9,9,0.0,1.0,0.0,0.0,0.0,...,1.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0
4,8,4,11,2,1,1.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,1.0,0.0,0.0,0.0,1.0,0.0,0.0


#### Normalization

In [15]:
def normalize(data, columns, mean, std):
    data_normalized = (data[columns] - mean) / std
    to_return = pd.concat([data.drop(columns, axis=1), data_normalized], axis=1)
    to_return.columns = list(range(to_return.shape[1]))
    return to_return

In [16]:
columns_to_normalize = ['C1', 'C2', 'C3', 'C4', 'C5']
mean = data_encoded[columns_to_normalize].mean().mean()
std = pd.Series(data_encoded[columns_to_normalize].values.flatten()).std()
data_normalized = normalize(data_encoded, columns_to_normalize, mean, std)
data_normalized.head()

Unnamed: 0,0,1,2,3,4,5,6,7,8,9,...,15,16,17,18,19,20,21,22,23,24
0,0.0,0.0,0.0,1.0,0.0,1.0,0.0,0.0,0.0,1.0,...,1.0,0.0,1.0,0.0,0.0,0.489357,-1.615549,-1.352436,-0.03687,0.226243
1,1.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,1.0,0.0,...,0.0,0.0,1.0,0.0,0.0,-0.82621,-0.299983,1.278696,1.015583,-0.03687
2,1.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,...,1.0,0.0,1.0,0.0,0.0,1.015583,-1.615549,-0.03687,1.015583,-1.615549
3,0.0,1.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,1.0,0.489357,-0.82621,-0.299983,0.489357,0.489357
4,1.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,1.0,...,0.0,0.0,1.0,0.0,0.0,0.226243,-0.82621,1.015583,-1.352436,-1.615549


In [17]:
data_normalized.shape

(124282, 25)

#### Test function

In [18]:
def calc_hand_label(hand):    
    def f(hand):
        suits_hist = reduce(lambda d,x: {**d, **{x: (d.get(x, 0) + 1)}}, hand[:,0], {})
        ranks_hist = reduce(lambda d,x: {**d, **{x: (d.get(x, 0) + 1)}}, hand[:,1], {})
        
        if len(ranks_hist.values()) < 5:
            if len(ranks_hist.values()) == 2:
                if max(list(ranks_hist.values())) == 4:
                    return 7
    #                 print('7: Four of a kind; four equal ranks within five cards')
                else:
                    return 6
    #                 print('6 Full house; pair + different rank three of a kind')
            elif len(ranks_hist.values()) == 3:
                if max(list(ranks_hist.values())) == 3:
                    return 3
    #                 print('3: Three of a kind; three equal ranks within five cards')
                else:
                    return 2
    #                 print('2: Two pairs; two pairs of equal ranks within five cards')
            else:
                return 1
    #             print('1: One pair; one pair of equal ranks within five cards')
        else:
            if len(suits_hist.values()) == 1:
                if max(list(ranks_hist.keys())) -  min(list(ranks_hist.keys())) == 4:
                    if max(list(ranks_hist.keys())) == 13:
                        return 9
    #                     print('9: Royal flush; {Ace, King, Queen, Jack, Ten} + flush')
                    else:
                        return 8
    #                     print('8: Straight flush; straight + flush')
                else:
                    return 5
    #                 print('5: Flush; five cards with the same suit')
            elif max(list(ranks_hist.keys())) -  min(list(ranks_hist.keys())) == 4:
                return 4    
    #             print('4: Straight; five cards, sequentially ranked with no gaps')
            else:
                return 0
                print('0: Nothing in hand; not a recognized poker hand ')
                
    hand = np.array(hand).reshape(5,2)
    res1 = f(hand)
    
    hand[:,1] = list(map(lambda x: 13 if (x == 1) else x-1, hand[:,1]))
    res2 = f(hand)
    
    if res1 == 9 and res2 == 8:
        return res2
    else:
        return max(res1, res2)

#### Encode cards

In [19]:
def card_encode(s, c):
    cards=["A","2","3","4","5","6","7","8","9","10","J","Q","K"]
    suits="♥♦♣♠"
    return cards[c - 1] + suits[s - 1]

def hand_encode(hand):
    return ', '.join(map(lambda x: card_encode(hand[x*2], hand[x*2+1]),range(5)))

print(hand_encode([4,9,2,1,2,2,4,7,2,8]))
print(hand_encode([2,9,2,4,3,6,1,9,4,9]))

9♠, A♦, 2♦, 7♠, 8♦
9♦, 4♦, 6♣, 9♥, 9♠


### Solutions

1. Neural network
2. Random forest

Ousiders:
 - Genetic algorithm
 - Decision tree

### Neural Network

In [20]:
from time import time
from keras.callbacks import TensorBoard

model = keras.Sequential()
#input
model.add(keras.layers.Dense(200, activation='relu', input_shape=(25,)))
#model.add(keras.layers.Dropout(0.1))
#hidden
model.add(keras.layers.Dense(400, activation='relu'))
#model.add(keras.layers.Dropout(0.05))
model.add(keras.layers.Dense(200, activation='relu'))
model.add(keras.layers.Dense(50, activation='relu'))
# Add a softmax layer with 10 output units:
model.add(keras.layers.Dense(10, activation='softmax'))

model.compile(optimizer=tf.train.AdamOptimizer(0.0005),
              loss='categorical_crossentropy',
              metrics=['accuracy'])

tensorboard = TensorBoard(log_dir="logs/{}".format(time()))

model.fit(np.array(data_normalized), np.array(labels_vect), epochs=500, batch_size=512)

Using TensorFlow backend.


Instructions for updating:
keep_dims is deprecated, use keepdims instead
Instructions for updating:
keep_dims is deprecated, use keepdims instead
Epoch 1/500

Epoch 2/500

Epoch 3/500

Epoch 4/500

Epoch 5/500

Epoch 6/500

Epoch 7/500

Epoch 8/500

Epoch 9/500

Epoch 10/500

Epoch 11/500

Epoch 12/500

Epoch 13/500

Epoch 14/500

Epoch 15/500

Epoch 16/500

Epoch 17/500

Epoch 18/500

Epoch 19/500

Epoch 20/500

Epoch 21/500

Epoch 22/500

Epoch 23/500

Epoch 24/500

Epoch 25/500

Epoch 26/500

Epoch 27/500

Epoch 28/500

Epoch 29/500

Epoch 30/500

Epoch 31/500

Epoch 32/500

Epoch 33/500

Epoch 34/500

Epoch 35/500

Epoch 36/500

Epoch 37/500

Epoch 38/500

Epoch 39/500

Epoch 40/500

Epoch 41/500

Epoch 42/500

Epoch 43/500

Epoch 44/500

Epoch 45/500

Epoch 46/500

Epoch 47/500

Epoch 48/500

Epoch 49/500

Epoch 50/500

Epoch 51/500



Epoch 52/500

Epoch 53/500

Epoch 54/500

Epoch 55/500

Epoch 56/500

Epoch 57/500

Epoch 58/500

Epoch 59/500

Epoch 60/500

Epoch 61/500

Epoch 62/500

Epoch 63/500

Epoch 64/500

Epoch 65/500

Epoch 66/500

Epoch 67/500

Epoch 68/500

Epoch 69/500

Epoch 70/500

Epoch 71/500

Epoch 72/500

Epoch 73/500

Epoch 74/500

Epoch 75/500

Epoch 76/500

Epoch 77/500

Epoch 78/500

Epoch 79/500

Epoch 80/500

Epoch 81/500

Epoch 82/500

Epoch 83/500

Epoch 84/500

Epoch 85/500

Epoch 86/500

Epoch 87/500

Epoch 88/500

Epoch 89/500

Epoch 90/500

Epoch 91/500

Epoch 92/500

Epoch 93/500

Epoch 94/500

Epoch 95/500

Epoch 96/500

Epoch 97/500

Epoch 98/500

Epoch 99/500

Epoch 100/500

Epoch 101/500

Epoch 102/500

Epoch 103/500

Epoch 104/500

Epoch 105/500



Epoch 106/500

Epoch 107/500

Epoch 108/500

Epoch 109/500

Epoch 110/500

Epoch 111/500

Epoch 112/500

Epoch 113/500

Epoch 114/500

Epoch 115/500

Epoch 116/500

Epoch 117/500

Epoch 118/500

Epoch 119/500

Epoch 120/500

Epoch 121/500

Epoch 122/500

Epoch 123/500

Epoch 124/500

Epoch 125/500

Epoch 126/500

Epoch 127/500

Epoch 128/500

Epoch 129/500

Epoch 130/500

Epoch 131/500

Epoch 132/500

Epoch 133/500

Epoch 134/500

Epoch 135/500

Epoch 136/500

Epoch 137/500

Epoch 138/500

Epoch 139/500

Epoch 140/500

Epoch 141/500

Epoch 142/500

Epoch 143/500

Epoch 144/500

Epoch 145/500

Epoch 146/500

Epoch 147/500

Epoch 148/500

Epoch 149/500

Epoch 150/500

Epoch 151/500

Epoch 152/500

Epoch 153/500

Epoch 154/500

Epoch 155/500

Epoch 156/500

Epoch 157/500

Epoch 158/500



Epoch 159/500

Epoch 160/500

Epoch 161/500

Epoch 162/500

Epoch 163/500

Epoch 164/500

Epoch 165/500

Epoch 166/500

Epoch 167/500

Epoch 168/500

Epoch 169/500

Epoch 170/500

Epoch 171/500

Epoch 172/500

Epoch 173/500

Epoch 174/500

Epoch 175/500

Epoch 176/500

Epoch 177/500

Epoch 178/500

Epoch 179/500

Epoch 180/500

Epoch 181/500

Epoch 182/500

Epoch 183/500

Epoch 184/500

Epoch 185/500

Epoch 186/500

Epoch 187/500

Epoch 188/500

Epoch 189/500

Epoch 190/500

Epoch 191/500

Epoch 192/500

Epoch 193/500

Epoch 194/500

Epoch 195/500

Epoch 196/500

Epoch 197/500

Epoch 198/500

Epoch 199/500

Epoch 200/500

Epoch 201/500

Epoch 202/500

Epoch 203/500

Epoch 204/500

Epoch 205/500

Epoch 206/500

Epoch 207/500

Epoch 208/500

Epoch 209/500

Epoch 210/500

Epoch 211/500

Epoch 212/500



Epoch 213/500

Epoch 214/500

Epoch 215/500

Epoch 216/500

Epoch 217/500

Epoch 218/500

Epoch 219/500

Epoch 220/500

Epoch 221/500

Epoch 222/500

Epoch 223/500

Epoch 224/500

Epoch 225/500

Epoch 226/500

Epoch 227/500

Epoch 228/500

Epoch 229/500

Epoch 230/500

Epoch 231/500

Epoch 232/500

Epoch 233/500

Epoch 234/500

Epoch 235/500

Epoch 236/500

Epoch 237/500

Epoch 238/500

Epoch 239/500

Epoch 240/500

Epoch 241/500

Epoch 242/500

Epoch 243/500

Epoch 244/500

Epoch 245/500

Epoch 246/500

Epoch 247/500

Epoch 248/500

Epoch 249/500

Epoch 250/500

Epoch 251/500

Epoch 252/500

Epoch 253/500

Epoch 254/500

Epoch 255/500

Epoch 256/500

Epoch 257/500

Epoch 258/500

Epoch 259/500

Epoch 260/500

Epoch 261/500

Epoch 262/500

Epoch 263/500

Epoch 264/500

Epoch 265/500



Epoch 266/500

Epoch 267/500

Epoch 268/500

Epoch 269/500

Epoch 270/500

Epoch 271/500

Epoch 272/500

Epoch 273/500

Epoch 274/500

Epoch 275/500

Epoch 276/500

Epoch 277/500

Epoch 278/500

Epoch 279/500

Epoch 280/500

Epoch 281/500

Epoch 282/500

Epoch 283/500

Epoch 284/500

Epoch 285/500

Epoch 286/500

Epoch 287/500

Epoch 288/500

Epoch 289/500

Epoch 290/500

Epoch 291/500

Epoch 292/500

Epoch 293/500

Epoch 294/500

Epoch 295/500

Epoch 296/500

Epoch 297/500

Epoch 298/500

Epoch 299/500

Epoch 300/500

Epoch 301/500

Epoch 302/500

Epoch 303/500

Epoch 304/500

Epoch 305/500

Epoch 306/500

Epoch 307/500

Epoch 308/500

Epoch 309/500

Epoch 310/500

Epoch 311/500

Epoch 312/500

Epoch 313/500

Epoch 314/500

Epoch 315/500

Epoch 316/500

Epoch 317/500

Epoch 318/500



Epoch 319/500

Epoch 320/500

Epoch 321/500

Epoch 322/500

Epoch 323/500

Epoch 324/500

Epoch 325/500

Epoch 326/500

Epoch 327/500

Epoch 328/500

Epoch 329/500

Epoch 330/500

Epoch 331/500

Epoch 332/500

Epoch 333/500

Epoch 334/500

Epoch 335/500

Epoch 336/500

Epoch 337/500

Epoch 338/500

Epoch 339/500

Epoch 340/500

Epoch 341/500

Epoch 342/500

Epoch 343/500

Epoch 344/500

Epoch 345/500

Epoch 346/500

Epoch 347/500

Epoch 348/500

Epoch 349/500

Epoch 350/500

Epoch 351/500

Epoch 352/500

Epoch 353/500

Epoch 354/500

Epoch 355/500

Epoch 356/500

Epoch 357/500

Epoch 358/500

Epoch 359/500

Epoch 360/500

Epoch 361/500

Epoch 362/500

Epoch 363/500

Epoch 364/500

Epoch 365/500

Epoch 366/500

Epoch 367/500

Epoch 368/500

Epoch 369/500

Epoch 370/500

Epoch 371/500



Epoch 372/500

Epoch 373/500

Epoch 374/500

Epoch 375/500

Epoch 376/500

Epoch 377/500

Epoch 378/500

Epoch 379/500

Epoch 380/500

Epoch 381/500

Epoch 382/500

Epoch 383/500

Epoch 384/500

Epoch 385/500

Epoch 386/500

Epoch 387/500

Epoch 388/500

Epoch 389/500

Epoch 390/500

Epoch 391/500

Epoch 392/500

Epoch 393/500

Epoch 394/500

Epoch 395/500

Epoch 396/500

Epoch 397/500

Epoch 398/500

Epoch 399/500

Epoch 400/500

Epoch 401/500

Epoch 402/500

Epoch 403/500

Epoch 404/500

Epoch 405/500

Epoch 406/500

Epoch 407/500

Epoch 408/500

Epoch 409/500

Epoch 410/500

Epoch 411/500

Epoch 412/500

Epoch 413/500

Epoch 414/500

Epoch 415/500

Epoch 416/500

Epoch 417/500

Epoch 418/500

Epoch 419/500

Epoch 420/500

Epoch 421/500

Epoch 422/500

Epoch 423/500

Epoch 424/500



Epoch 425/500

Epoch 426/500

Epoch 427/500

Epoch 428/500

Epoch 429/500

Epoch 430/500

Epoch 431/500

Epoch 432/500

Epoch 433/500

Epoch 434/500

Epoch 435/500

Epoch 436/500

Epoch 437/500

Epoch 438/500

Epoch 439/500

Epoch 440/500

Epoch 441/500

Epoch 442/500

Epoch 443/500

Epoch 444/500

Epoch 445/500

Epoch 446/500

Epoch 447/500

Epoch 448/500

Epoch 449/500

Epoch 450/500

Epoch 451/500

Epoch 452/500

Epoch 453/500

Epoch 454/500

Epoch 455/500

Epoch 456/500

Epoch 457/500

Epoch 458/500

Epoch 459/500

Epoch 460/500

Epoch 461/500

Epoch 462/500

Epoch 463/500

Epoch 464/500

Epoch 465/500

Epoch 466/500

Epoch 467/500

Epoch 468/500

Epoch 469/500

Epoch 470/500

Epoch 471/500

Epoch 472/500

Epoch 473/500

Epoch 474/500

Epoch 475/500

Epoch 476/500

Epoch 477/500



Epoch 478/500

Epoch 479/500

Epoch 480/500

Epoch 481/500

Epoch 482/500

Epoch 483/500

Epoch 484/500

Epoch 485/500

Epoch 486/500

Epoch 487/500

Epoch 488/500

Epoch 489/500

Epoch 490/500

Epoch 491/500

Epoch 492/500

Epoch 493/500

Epoch 494/500

Epoch 495/500

Epoch 496/500

Epoch 497/500

Epoch 498/500

Epoch 499/500

Epoch 500/500



<tensorflow.python.keras._impl.keras.callbacks.History at 0x7fb9444a5dd8>

In [21]:
y_test = test_dataset.drop(['id'], axis=1).apply(calc_hand_label, axis=1)

In [39]:
model.save('./model-sort-166')



### Predictions

In [22]:
test_dataset_noid = test_dataset.drop(['id'], axis=1)

In [23]:
test_sample = pd.concat([test_dataset_noid[cards], test_dataset_noid[suits]], axis=1)
test_sample.head()

Unnamed: 0,C1,C2,C3,C4,C5,S1,S2,S3,S4,S5
0,10,2,3,8,1,1,2,3,3,1
1,13,5,7,6,4,2,3,3,4,1
2,3,11,8,1,4,1,1,2,2,2
3,6,3,7,8,11,1,3,4,1,3
4,10,4,6,12,6,2,3,1,2,2


In [24]:
test_data_sample_encoded = normalize(
    data_hotencode(test_sample, columns_to_hotencode, encoder),
    columns_to_normalize,
    mean,
    std
)

In [1]:
model1 = keras.models.load_model('./model-sort-166')
model1.compile(optimizer=tf.train.AdamOptimizer(0.0005),
              loss='categorical_crossentropy',
              metrics=['accuracy'])

NameError: name 'keras' is not defined

In [25]:
test_data_sample_encoded.head()

Unnamed: 0,0,1,2,3,4,5,6,7,8,9,...,15,16,17,18,19,20,21,22,23,24
0,1.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,...,0.0,1.0,0.0,0.0,0.0,0.75247,-1.352436,-1.089323,0.226243,-1.615549
1,0.0,1.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,...,1.0,1.0,0.0,0.0,0.0,1.541809,-0.563096,-0.03687,-0.299983,-0.82621
2,1.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,1.0,...,0.0,0.0,1.0,0.0,0.0,-1.089323,1.015583,0.226243,-1.615549,-0.82621
3,1.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,...,0.0,0.0,0.0,1.0,0.0,-0.299983,-1.089323,-0.03687,0.226243,1.015583
4,0.0,1.0,0.0,0.0,0.0,0.0,1.0,0.0,1.0,0.0,...,0.0,0.0,1.0,0.0,0.0,0.75247,-0.82621,-0.299983,1.278696,-0.299983


In [26]:
test_data_sample_predicted_10 = model.predict(np.array(test_data_sample_encoded))
test_data_sample_predicted = np.array(list(map(np.argmax, test_data_sample_predicted_10)))

In [27]:
#test_data_sample_predicted = np.maximum(test_data_sample_predicted_1, test_data_sample_predicted_2)
#test_data_sample_predicted = test_data_sample_predicted_2

test_data_sample_labels = y_test
df = pd.DataFrame({
    'predicted': test_data_sample_predicted, 
    'real': test_data_sample_labels
})
df = pd.concat([test_sample, df], axis=1)
wrong_answers = df[df['predicted'] != df['real']]
print(wrong_answers.shape)
wrong_answers

(488797, 12)


Unnamed: 0,C1,C2,C3,C4,C5,S1,S2,S3,S4,S5,predicted,real
1,13,5,7,6,4,2,3,3,4,1,1,0
3,6,3,7,8,11,1,3,4,1,3,1,0
4,10,4,6,12,6,2,3,1,2,2,0,1
6,10,8,4,11,9,1,3,1,3,3,1,0
8,4,1,3,5,6,3,1,1,3,3,1,0
9,12,1,3,2,10,3,2,1,1,3,1,0
10,7,1,8,10,11,1,3,4,4,3,1,0
13,7,2,3,4,1,4,4,3,1,1,1,0
14,8,10,11,5,12,4,3,4,3,1,1,0
17,7,6,4,12,6,3,1,1,3,3,0,1


In [73]:
df[df['real'] == 1].shape

(422498, 12)

In [68]:
wrong_answers['real'].value_counts()

1    214857
0    203935
2     44101
3     19954
4      3809
6      1414
5       733
7       230
8        12
9         2
Name: real, dtype: int64

In [48]:
wrong_answers.drop(['predicted', 'real'], axis=1).apply(lambda h: hand_encode(np.array(h)), axis=1)

34        K♣, Q♣, 10♦, A♦, J♥
665        4♣, 5♣, 3♣, A♣, 2♣
3902       4♠, 3♦, 2♣, A♣, 5♥
9503       4♥, A♠, 2♠, 5♣, 3♣
12027     K♦, J♦, 10♦, Q♥, A♥
14357      5♣, A♠, 3♠, 4♠, 2♣
14657      A♠, 5♥, 4♣, 3♣, 2♥
15059     K♦, Q♥, A♥, 10♥, J♠
15877     10♦, Q♥, K♥, A♥, J♠
20151      4♣, A♣, 3♣, 5♦, 2♦
36911     K♦, J♣, A♣, 10♦, Q♥
42427      3♦, A♣, 2♣, 5♥, 4♣
48362     Q♦, J♦, A♠, 10♥, K♦
50832     Q♦, 10♥, A♦, K♥, J♠
51428      A♦, 4♣, 5♠, 3♥, 2♥
57633     J♠, Q♦, K♠, 10♠, A♠
65236     10♥, Q♣, A♦, K♦, J♠
66296     K♠, Q♣, J♥, 10♦, A♥
69065     Q♣, J♥, A♦, 10♦, K♦
70575     9♣, 8♥, K♦, J♠, 10♠
72500      3♠, 5♠, 2♣, A♣, 4♥
78123     10♦, A♥, J♥, K♣, Q♥
85179     Q♦, K♣, A♣, J♦, 10♥
88839     10♣, J♥, Q♦, A♥, K♥
91839      5♣, A♣, 4♠, 3♣, 2♦
95994     10♣, K♥, Q♥, J♠, A♦
96353     J♣, 9♥, 10♦, K♦, 8♠
97192      5♠, 3♣, 4♠, 2♦, A♣
100764     3♣, 5♣, 2♣, 4♥, A♣
103163    K♥, A♣, Q♦, J♣, 10♥
                 ...         
879365    K♣, A♥, 10♥, Q♦, J♦
890194    9♣, J♥, Q♣, 10♣, A♥
893648    