In [1]:
import tensorflow as tf
import pandas as pd

In [2]:
##########################
##  SET UTILS
##########################
def get_suit_info(df_row):
    return max(df_row.value_counts())

def get_rank_info(df_row):
    mean  = df_row.mean()
    std   = df_row.std()
    same1 = max(df_row.value_counts())
    same2 = df_row.value_counts().reset_index(drop=True)[1]

    return same1, same2, mean, round(std,2)

In [3]:
##########################
##  LOAD DATASET
##########################
dataset_train = pd.read_csv("../../data/poker-hand-training-true.data",
                            delimiter=",", header=None, lineterminator="\n",
                            names=["S1", "C1", "S2", "C2", "S3", "C3", "S4", "C4", "S5", "C5", "CLASS"])

dataset_test = pd.read_csv("../../data/poker-hand-testing.data",
                            delimiter=",", header=None, lineterminator="\n",
                            names=["S1", "C1", "S2", "C2", "S3", "C3", "S4", "C4", "S5", "C5", "CLASS"])


In [4]:
dataset_train.head()

Unnamed: 0,S1,C1,S2,C2,S3,C3,S4,C4,S5,C5,CLASS
0,1,10,1,11,1,13,1,12,1,1,9
1,2,11,2,13,2,10,2,12,2,1,9
2,3,12,3,11,3,13,3,10,3,1,9
3,4,10,4,11,4,1,4,13,4,12,9
4,4,1,4,13,4,12,4,11,4,10,9


In [5]:
##########################
##  TRAIN DATASET
##########################
df_suit = dataset_train.loc[:,["S1","S2","S3","S4","S5"]]
df_rank = dataset_train.loc[:,["C1","C2","C3","C4","C5"]]
df_class = dataset_train.loc[:,["CLASS"]]

df_rank = df_rank.replace(1, 14)

In [6]:
df_rank.head()

Unnamed: 0,C1,C2,C3,C4,C5
0,10,11,13,12,14
1,11,13,10,12,14
2,12,11,13,10,14
3,10,11,14,13,12
4,14,13,12,11,10


In [7]:
##########################
##  TEST DATASET
##########################
df_suit_test = dataset_test.loc[:,["S1","S2","S3","S4","S5"]]
df_rank_test = dataset_test.loc[:,["C1","C2","C3","C4","C5"]]
df_class_test = dataset_test.loc[:,["CLASS"]]

df_rank_test = df_rank_test.replace(1, 14)

In [8]:
##################################
##  PRE-PROCESS ::: TRAIN DATASET
##################################
df_suit['SAME_SUIT'] = df_suit.apply(get_suit_info, axis=1)
df_rank['SAME_RANK1'], df_rank['SAME_RANK2'], df_rank['MEAN'], df_rank['STD'] = zip(*df_rank.apply(get_rank_info, axis=1))

In [9]:
df_suit.head()

Unnamed: 0,S1,S2,S3,S4,S5,SAME_SUIT
0,1,1,1,1,1,5
1,2,2,2,2,2,5
2,3,3,3,3,3,5
3,4,4,4,4,4,5
4,4,4,4,4,4,5


In [10]:
df_suit.shape

(25010, 6)

In [11]:
dataset_train.shape

(25010, 11)

In [12]:
df_rank.head()

Unnamed: 0,C1,C2,C3,C4,C5,SAME_RANK1,SAME_RANK2,MEAN,STD
0,10,11,13,12,14,1,1,12.0,1.58
1,11,13,10,12,14,1,1,12.0,1.58
2,12,11,13,10,14,1,1,12.0,1.58
3,10,11,14,13,12,1,1,12.0,1.58
4,14,13,12,11,10,1,1,12.0,1.58


In [13]:
df_rank.head(100)

Unnamed: 0,C1,C2,C3,C4,C5,SAME_RANK1,SAME_RANK2,MEAN,STD
0,10,11,13,12,14,1,1,12.0,1.58
1,11,13,10,12,14,1,1,12.0,1.58
2,12,11,13,10,14,1,1,12.0,1.58
3,10,11,14,13,12,1,1,12.0,1.58
4,14,13,12,11,10,1,1,12.0,1.58
5,2,4,5,3,6,1,1,4.0,1.58
6,9,12,10,11,13,1,1,11.0,1.58
7,14,2,3,4,5,1,1,5.6,4.83
8,5,6,9,7,8,1,1,7.0,1.58
9,14,4,2,3,5,1,1,5.6,4.83


In [14]:
x_data = pd.concat([df_suit['SAME_SUIT'], df_rank['SAME_RANK1'], df_rank['SAME_RANK2'], df_rank['MEAN'],df_rank['STD']], axis=1).values
y_data = df_class.values

In [15]:
x_data.shape

(25010, 5)

In [16]:
x_data

array([[ 5.  ,  1.  ,  1.  , 12.  ,  1.58],
       [ 5.  ,  1.  ,  1.  , 12.  ,  1.58],
       [ 5.  ,  1.  ,  1.  , 12.  ,  1.58],
       ...,
       [ 3.  ,  2.  ,  1.  , 11.  ,  4.24],
       [ 2.  ,  2.  ,  1.  ,  9.2 ,  3.7 ],
       [ 3.  ,  2.  ,  1.  ,  7.2 ,  2.86]])

In [17]:
y_data

array([[9],
       [9],
       [9],
       ...,
       [1],
       [1],
       [1]], dtype=int64)

In [18]:
##################################
##  PRE-PROCESS ::: TEST DATASET
##################################
df_suit_test['SAME_SUIT'] = df_suit_test.apply(get_suit_info, axis=1)
df_rank_test['SAME_RANK1'], df_rank_test['SAME_RANK2'], df_rank_test['MEAN'], df_rank_test['STD'] = zip(*df_rank_test.apply(get_rank_info, axis=1))

In [20]:
df_suit.to_pickle("../../data/df_suit.pkl")
df_rank.to_pickle("../../data/df_rank.pkl")
df_suit_test.to_pickle("../../data/df_suit_test.pkl")
df_rank_test.to_pickle("../../data/df_rank_test.pkl")

In [21]:
x_test = pd.concat([df_suit_test['SAME_SUIT'], df_rank_test['SAME_RANK1'], df_rank_test['SAME_RANK2'], df_rank_test['MEAN'], df_rank_test['STD']], axis=1).values
y_test = df_class_test.values

In [22]:
nb_classes = 10  # 0~9

X = tf.placeholder(tf.float32, [None, 5])
Y = tf.placeholder(tf.int32,   [None, 1])  # 0~9

Y_one_hot = tf.one_hot(Y, nb_classes)  # one hot
Y_one_hot

<tf.Tensor 'one_hot:0' shape=(?, 1, 10) dtype=float32>

In [23]:
Y_one_hot = tf.reshape(Y_one_hot, [-1, nb_classes])
Y_one_hot

<tf.Tensor 'Reshape:0' shape=(?, 10) dtype=float32>

In [24]:
# 1. Classfication Model 생성

W = tf.get_variable('W', shape=[5,nb_classes], initializer=tf.initializers.glorot_normal())
b = tf.Variable(tf.zeros([nb_classes]))

# W = tf.Variable(tf.random_normal([5, nb_classes]), name='weight')
# b = tf.Variable(tf.random_normal([nb_classes]), name='bias')

# Classfication Model 설정
logit = tf.matmul(X, W) + b
H = tf.nn.softmax(logit)

Instructions for updating:
Colocations handled automatically by placer.


In [25]:
# 2. Cost function 최소화

cost = tf.reduce_mean(tf.nn.softmax_cross_entropy_with_logits_v2(logits=logit, labels=tf.stop_gradient([Y_one_hot])))

optimizer = tf.train.AdamOptimizer(learning_rate=0.01).minimize(cost)

In [26]:
prediction = tf.argmax(H, 1)
correct_prediction = tf.equal(prediction, tf.argmax(Y_one_hot, 1))
accuracy = tf.reduce_mean(tf.cast(correct_prediction, tf.float32))

In [27]:
# 3. 학습을 통해 실행

with tf.Session() as sess:     
    sess.run(tf.global_variables_initializer())
    
    for i in range(10001):
        _, cost_val, acc_val = sess.run([optimizer, cost, accuracy], feed_dict={X: x_data, Y: y_data})
        
        if i % 2000 == 0:
            print("Step: {:5}\tCost: {:.3f}\tAcc: {:.2%}".format(i, cost_val, acc_val))            
            
#     preds = sess.run(H, {X: x_test})
#     preds_arg = np.argmax(preds, axis=1)  # 1: 수평, 0: 수직
#     print('acc: ', np.mean(preds_arg == y_test))
#     print(preds_arg)            

Step:     0	Cost: 2.759	Acc: 42.38%
Step:  2000	Cost: 0.041	Acc: 99.52%
Step:  4000	Cost: 0.019	Acc: 99.78%
Step:  6000	Cost: 0.013	Acc: 99.93%
Step:  8000	Cost: 0.011	Acc: 99.95%
Step: 10000	Cost: 0.010	Acc: 99.95%


In [None]:
# 4. 예측
# Let's see if we can predict

with tf.Session() as sess:     
    sess.run(tf.global_variables_initializer())
    pred, acc = sess.run([prediction, accuracy], feed_dict={X: x_test, Y: y_test})

    print("TEST ACCURACY ::: ", acc)
    print("#" * 25, " PREDICT RESULT ", "#" * 25)
    # y_data: (N,1) = flatten => (N, ) matches pred.shape
#     for p, y in zip(pred, y_test.flatten()):
#         if p != int(y):
#             print("[{}] Prediction: {} True Y: {}".format(p == int(y), p, int(y)))