<a href="https://colab.research.google.com/github/devyulbae/AIClass/blob/main/Q3_Zoo_classification.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [1]:
import tensorflow as tf
import numpy as np
import matplotlib.pyplot as plt

tf.__version__

'2.15.0'

In [2]:
from google.colab import drive
drive.mount('/content/drive')

Drive already mounted at /content/drive; to attempt to forcibly remount, call drive.mount("/content/drive", force_remount=True).


# ZOO classification

### Data list

1. 동물 이름  animal name:     (deleted)
2. 털  hair     Boolean
3. 깃털  feathers     Boolean
4. 알  eggs     Boolean
5. 우유 milk     Boolean
6. 날 수있는지  airborne     Boolean
7. 수중 생물  aquatic      Boolean
8. 포식자  predator     Boolean
9. 이빨이 있는지 toothed      Boolean
10. 척추 동물  backbone     Boolean
11. 호흡 방법  breathes     Boolean
12. 독  venomous     Boolean
13. 물갈퀴  fins     Boolean
14. 다리  legs     Numeric (set of values: {0",2,4,5,6,8})
15. 꼬리  tail     Boolean
16. 사육 가능한 지 domestic     Boolean
17. 고양이 크기인지 catsize      Boolean
18. 동물 타입 type     Numeric (integer values in range [0",6])

In [3]:
xy = np.loadtxt('/content/drive/MyDrive/datas/data-04-zoo.csv',
                delimiter=',',
                dtype=np.int32)
x_train = xy[0:-10, 0:-1]
y_train = xy[0:-10, [-1]]

x_train = tf.cast(x_train, tf.float32)

x_test = xy[-10:, 0:-1]
y_test = xy[-10:, [-1]]

x_test = tf.cast(x_test, tf.float32)

nb_classes = 7  # 0 ~ 6

# [0, 1, 2] 총 class가 3개일때,
# label : 0, 0, 1, 2, 0, 1, 2 ....
# 1 => [0, 1, 0]
# 0 => [1, 0, 0]
# 2 => [0, 0, 1]

print(y_train[15])
y_train = tf.one_hot(list(y_train), nb_classes)
y_train = tf.reshape(y_train, [-1, nb_classes])
print(y_train[15])

y_test = tf.one_hot(list(y_test), nb_classes)
y_test = tf.reshape(y_test, [-1, nb_classes])

print(x_train.shape, y_train.shape)
print(x_test.shape, y_test.shape)

print(x_train.dtype, y_train.dtype)
print(x_test.dtype, y_test.dtype)


[6]
tf.Tensor([0. 0. 0. 0. 0. 0. 1.], shape=(7,), dtype=float32)
(91, 16) (91, 7)
(10, 16) (10, 7)
<dtype: 'float32'> <dtype: 'float32'>
<dtype: 'float32'> <dtype: 'float32'>


In [4]:
dataset = tf.data.Dataset.from_tensor_slices((x_train, y_train)).batch(len(x_train))

W = tf.Variable(tf.random.normal([16, 7]), name='weight')
b = tf.Variable(tf.random.normal([7]), name='bias')

print(W.shape, b.shape)

(16, 7) (7,)


# 가설 설정

* 주어진 동물의 데이터들로 분류하는 가설 모델을 생성한다

## $$ y_k = \frac{exp(H(x_k))}{\sum_{i=1}^{n}exp(H(x_i))}  $$

In [None]:
def lr_softmax(features): # hypothesis_softmax
    return tf.nn.softmax(tf.matmul(features, W) + b)

print(lr_softmax(x_train))

## Loss Function

##$$
\begin{align}
cost(H(x),y) & = −\sum_{n=1}^{n} Y log(H(x))
\end{align}
$$

In [6]:
def loss_fn(hypothesis, labels):
  loss = tf.keras.losses.categorical_crossentropy(labels, hypothesis)
  # tf.keras.losses.binary_crossentropy(labels, hypothesis) # 이진분류용 CE Loss
  # loss = -tf.reduce_mean(labels * tf.math.log(hypothesis))
  return loss

optimizer = tf.compat.v1.train.GradientDescentOptimizer(learning_rate=1e-2)

In [7]:
epochs = 5001

for step in range(epochs):
  for features, labels in dataset:
    with tf.GradientTape() as tape:
      loss_value = loss_fn(lr_softmax(features),labels)
      grads = tape.gradient(loss_value, [W, b])
      optimizer.apply_gradients(grads_and_vars=zip(grads, [W, b]))
      if step % 100 == 0:
            print("Iter: {}, Loss: {:.4f}".format(step, tf.reduce_mean(loss_fn(lr_softmax(features),labels))))


Iter: 0, Loss: 2.1265
Iter: 100, Loss: 0.1384
Iter: 200, Loss: 0.0703
Iter: 300, Loss: 0.0447
Iter: 400, Loss: 0.0323
Iter: 500, Loss: 0.0253
Iter: 600, Loss: 0.0208
Iter: 700, Loss: 0.0177
Iter: 800, Loss: 0.0154
Iter: 900, Loss: 0.0137
Iter: 1000, Loss: 0.0123
Iter: 1100, Loss: 0.0111
Iter: 1200, Loss: 0.0102
Iter: 1300, Loss: 0.0094
Iter: 1400, Loss: 0.0087
Iter: 1500, Loss: 0.0081
Iter: 1600, Loss: 0.0076
Iter: 1700, Loss: 0.0072
Iter: 1800, Loss: 0.0068
Iter: 1900, Loss: 0.0064
Iter: 2000, Loss: 0.0061
Iter: 2100, Loss: 0.0058
Iter: 2200, Loss: 0.0055
Iter: 2300, Loss: 0.0053
Iter: 2400, Loss: 0.0051
Iter: 2500, Loss: 0.0049
Iter: 2600, Loss: 0.0047
Iter: 2700, Loss: 0.0045
Iter: 2800, Loss: 0.0044
Iter: 2900, Loss: 0.0042
Iter: 3000, Loss: 0.0041
Iter: 3100, Loss: 0.0039
Iter: 3200, Loss: 0.0038
Iter: 3300, Loss: 0.0037
Iter: 3400, Loss: 0.0036
Iter: 3500, Loss: 0.0035
Iter: 3600, Loss: 0.0034
Iter: 3700, Loss: 0.0033
Iter: 3800, Loss: 0.0032
Iter: 3900, Loss: 0.0031
Iter: 4000, 

In [8]:
def accuracy_fn(hypothesis, labels):
    hypothesis = tf.argmax(hypothesis, 1)
    predicted = tf.cast(hypothesis, dtype=tf.float32)
    print(predicted)

    labels = tf.argmax(labels, 1)
    labels = tf.cast(labels, dtype=tf.float32)
    print(labels)

    accuracy = tf.reduce_mean(tf.cast(tf.equal(predicted, labels), dtype=tf.float32))

    return accuracy

In [9]:
test_acc = accuracy_fn(lr_softmax(x_test),y_test)
print("Testset Accuracy: {:.4f}".format(test_acc))

tf.Tensor([4. 3. 0. 0. 1. 0. 5. 0. 6. 1.], shape=(10,), dtype=float32)
tf.Tensor([2. 3. 0. 0. 1. 0. 5. 0. 6. 1.], shape=(10,), dtype=float32)
Testset Accuracy: 0.9000
