# 동물 종 분류기
- multi-class 분류를 위해 softmax 함수를 사용. 단층 머신러닝으로 구현.
- 16가지의 동물 특성을 변수로 설정
    -  hair / feathers / eggs / milk / airborne / aquatic / predator / toothed / backbone / breathes / venomous / fins / tail / domestic / catsize : Bloon
    - legs : set of values {0,2,4,5,6,8}
- 라벨 : {0,1,2,3,4,5,6} : 특성에 따라 7종으로 나눔
- 데이터 출처 : https://archive.ics.uci.edu/ml/machine-learning-databases/zoo/


In [3]:
import tensorflow as tf
import numpy as np
tf.set_random_seed(777)  # for reproducibility

- 데이터 로드, 변수 데이터 지정 및 라벨 지정

In [1]:
import os
os.chdir('/Users/yongwan89/PycharmProjects/dataset')

In [2]:
xy = np.loadtxt('animals.csv', delimiter=',', dtype=np.float32)
x_data = xy[:, 0:-1]
y_data = xy[:, [-1]]

print(x_data.shape, y_data.shape)

(101, 16) (101, 1)


- x_data, y_data -> placeholder

In [11]:
nb_classes = 7  # 분류 종 : 총 7 종

X = tf.placeholder(tf.float32, [None, 16]) #기준이 되는 동물의 특징 갯수 : 16가지
Y = tf.placeholder(tf.int32, [None, 1])  # Y : 0 ~ 6

 - Y를 one_hot으로 만든다. 0~6의 숫자를 텐서로 풀어내는 것.
    - ex) [3] = [[0,0,0,1,0,0,0]]

In [12]:
Y_one_hot = tf.one_hot(Y, nb_classes)  # one hot
# print("one_hot:", Y_one_hot)
Y_one_hot = tf.reshape(Y_one_hot, [-1, nb_classes])
# print("reshape one_hot:", Y_one_hot)

- 머신 러닝 모델 정의 
    - 단층
    - activaition funtion : softmax function
    - cost function : cross entropy
    - optimizer : GradientDescentOptimizer / learning rate = 0.1

In [13]:
#단층
W = tf.Variable(tf.random_normal([16, nb_classes]), name='weight')
b = tf.Variable(tf.random_normal([nb_classes]), name='bias')

# activations
logits = tf.matmul(X, W) + b
hypothesis = tf.nn.softmax(logits) # softmax = exp(logits) / reduce_sum(exp(logits), dim)

# Cross entropy cost function
cost = tf.reduce_mean(tf.nn.softmax_cross_entropy_with_logits_v2(logits=logits,
                                                                 labels=tf.stop_gradient([Y_one_hot])))
optimizer = tf.train.GradientDescentOptimizer(learning_rate=0.1).minimize(cost)

- 예측 및 정확도 측정 프로세스

In [14]:
prediction = tf.argmax(hypothesis, 1) #hypothesis의 열에서 가장 큰 값의 인텍스를 반환.
correct_prediction = tf.equal(prediction, tf.argmax(Y_one_hot, 1)) #Y_one_hot 중에 큰거 하나 택한 값과, prediction 비교
accuracy = tf.reduce_mean(tf.cast(correct_prediction, tf.float32)) #그것들을 더해서 평균내서 확률로 바꾼다.
print("prediction:", prediction, "correct_prediction:", correct_prediction, "accuracy:", accuracy)

prediction: Tensor("ArgMax:0", shape=(?,), dtype=int64) correct_prediction: Tensor("Equal:0", shape=(?,), dtype=bool) accuracy: Tensor("Mean_1:0", shape=(), dtype=float32)


- 머신 훈련 및 평가

In [15]:
with tf.Session() as sess:
    sess.run(tf.global_variables_initializer())

    for step in range(2001):
        _, cost_val, acc_val = sess.run([optimizer, cost, accuracy], feed_dict={X: x_data, Y: y_data})
                                        
        if step % 100 == 0:
            print("Step: {:5}\tCost: {:.3f}\tAcc: {:.2%}".format(step, cost_val, acc_val))

    # 훈련된 머신으로 예측해보기
    pred = sess.run(prediction, feed_dict={X: x_data})
    for p, y in zip(pred, y_data.flatten()): #flatten : y=[[0],[2],...] -> y=[0,2,...] # zip : p,y로 넘기기 편하게 묵어준다.
        print("[{}] Prediction: {} True Y: {}".format(p == int(y), p, int(y)))

Step:     0	Cost: 5.838	Acc: 10.89%
Step:   100	Cost: 0.510	Acc: 88.12%
Step:   200	Cost: 0.344	Acc: 90.10%
Step:   300	Cost: 0.265	Acc: 91.09%
Step:   400	Cost: 0.216	Acc: 94.06%
Step:   500	Cost: 0.182	Acc: 95.05%
Step:   600	Cost: 0.157	Acc: 97.03%
Step:   700	Cost: 0.138	Acc: 98.02%
Step:   800	Cost: 0.123	Acc: 98.02%
Step:   900	Cost: 0.110	Acc: 98.02%
Step:  1000	Cost: 0.100	Acc: 100.00%
Step:  1100	Cost: 0.091	Acc: 100.00%
Step:  1200	Cost: 0.084	Acc: 100.00%
Step:  1300	Cost: 0.078	Acc: 100.00%
Step:  1400	Cost: 0.073	Acc: 100.00%
Step:  1500	Cost: 0.068	Acc: 100.00%
Step:  1600	Cost: 0.064	Acc: 100.00%
Step:  1700	Cost: 0.060	Acc: 100.00%
Step:  1800	Cost: 0.057	Acc: 100.00%
Step:  1900	Cost: 0.054	Acc: 100.00%
Step:  2000	Cost: 0.051	Acc: 100.00%
[True] Prediction: 0 True Y: 0
[True] Prediction: 0 True Y: 0
[True] Prediction: 3 True Y: 3
[True] Prediction: 0 True Y: 0
[True] Prediction: 0 True Y: 0
[True] Prediction: 0 True Y: 0
[True] Prediction: 0 True Y: 0
[True] Predictio