# 04. Softmax Classification

In [1]:
import tensorflow as tf
import numpy as np

tf.__version__

'2.5.0'

In [2]:
tf.random.set_seed(1234)        # for reproducibility

## Data 확인 및 가중치 설정

In [3]:
x_data = [[1, 2, 1, 1],
          [2, 1, 3, 2],
          [3, 1, 3, 4],
          [4, 1, 5, 5],
          [1, 7, 5, 5],
          [1, 2, 5, 6],
          [1, 6, 6, 6],
          [1, 7, 7, 7]]
y_data = [[0, 0, 1],
          [0, 0, 1],
          [0, 0, 1],
          [0, 1, 0],
          [0, 1, 0],
          [0, 1, 0],
          [1, 0, 0],
          [1, 0, 0]]

# convert into numpy and float format
x_data = np.asarray(x_data, dtype=np.float32)
y_data = np.asarray(y_data, dtype=np.float32)

In [4]:
nb_classes = 3   #class의 개수

print(x_data.shape)
print(y_data.shape)

(8, 4)
(8, 3)


In [5]:
# Weight and bias setting
W = tf.Variable(tf.random.normal((4, nb_classes)), name='weight')
b = tf.Variable(tf.random.normal((nb_classes,)), name='bias')
variables = [W, b]

print(W,b)

<tf.Variable 'weight:0' shape=(4, 3) dtype=float32, numpy=
array([[ 0.8369314 , -0.7342977 ,  1.0402943 ],
       [ 0.04035992, -0.7218659 ,  1.0794858 ],
       [ 0.9032698 , -0.73601735, -0.36105633],
       [-0.60787624,  0.07614239, -0.7211218 ]], dtype=float32)> <tf.Variable 'bias:0' shape=(3,) dtype=float32, numpy=array([1.1468066 , 0.96459925, 0.87830144], dtype=float32)>


## Modeling

In [6]:
# tf.nn.softmax computes softmax activations
# softmax = exp(logits) / reduce_sum(exp(logits), dim)
def hypothesis(X):
    return tf.nn.softmax(tf.matmul(X, W) + b)

print(hypothesis(x_data))

tf.Tensor(
[[3.4452534e-01 4.9978672e-03 6.5047675e-01]
 [9.4447958e-01 4.5562655e-04 5.5064805e-02]
 [9.4574940e-01 3.7234681e-04 5.3878248e-02]
 [9.9504590e-01 6.0790699e-06 4.9480950e-03]
 [4.2045984e-01 2.9557280e-06 5.7953721e-01]
 [9.9260592e-01 6.2507798e-04 6.7689032e-03]
 [8.9049131e-01 5.1609854e-06 1.0950344e-01]
 [9.1940206e-01 9.5655480e-07 8.0596931e-02]], shape=(8, 3), dtype=float32)


## Cost function

In [7]:
def cost_fn(X, Y):
    logits = hypothesis(X)
    cost = -tf.reduce_sum(Y * tf.math.log(logits), axis=1)
    cost_mean = tf.reduce_mean(cost)
    
    #print('cost: ', cost)
    return cost_mean

print('cost_mean:  ', cost_fn(x_data, y_data))

cost_mean:   tf.Tensor(4.8212996, shape=(), dtype=float32)


### GradientTape 기능

In [8]:
x = tf.constant(3.0)
with tf.GradientTape() as g:
    g.watch(x)
    y = x * x # x^2
dy_dx = g.gradient(y, x) # Will compute to 6.0
print(dy_dx)

tf.Tensor(6.0, shape=(), dtype=float32)


In [9]:
def grad_fn(X, Y):
    with tf.GradientTape() as tape:
        loss = cost_fn(X, Y)
        grads = tape.gradient(loss, variables)

        return grads

print(grad_fn(x_data, y_data))    # 가중치 각 원소들의 grad 계산

[<tf.Tensor: shape=(4, 3), dtype=float32, numpy=
array([[ 1.2842344 , -0.74903935, -0.535195  ],
       [ 0.9101898 , -1.2484827 ,  0.33829278],
       [ 2.1043162 , -1.8736638 , -0.23065293],
       [ 2.2285507 , -1.998596  , -0.22995517]], dtype=float32)>, <tf.Tensor: shape=(3,), dtype=float32, numpy=array([ 0.5565949 , -0.37419173, -0.18240319], dtype=float32)>]


In [10]:
# variables = [W, b]
print(variables)

[<tf.Variable 'weight:0' shape=(4, 3) dtype=float32, numpy=
array([[ 0.8369314 , -0.7342977 ,  1.0402943 ],
       [ 0.04035992, -0.7218659 ,  1.0794858 ],
       [ 0.9032698 , -0.73601735, -0.36105633],
       [-0.60787624,  0.07614239, -0.7211218 ]], dtype=float32)>, <tf.Variable 'bias:0' shape=(3,) dtype=float32, numpy=array([1.1468066 , 0.96459925, 0.87830144], dtype=float32)>]


In [11]:
# zip 기능 확인
grads = grad_fn(x_data, y_data)
for pair in zip(grads, variables):
    print(pair)

(<tf.Tensor: shape=(4, 3), dtype=float32, numpy=
array([[ 1.2842344 , -0.74903935, -0.535195  ],
       [ 0.9101898 , -1.2484827 ,  0.33829278],
       [ 2.1043162 , -1.8736638 , -0.23065293],
       [ 2.2285507 , -1.998596  , -0.22995517]], dtype=float32)>, <tf.Variable 'weight:0' shape=(4, 3) dtype=float32, numpy=
array([[ 0.8369314 , -0.7342977 ,  1.0402943 ],
       [ 0.04035992, -0.7218659 ,  1.0794858 ],
       [ 0.9032698 , -0.73601735, -0.36105633],
       [-0.60787624,  0.07614239, -0.7211218 ]], dtype=float32)>)
(<tf.Tensor: shape=(3,), dtype=float32, numpy=array([ 0.5565949 , -0.37419173, -0.18240319], dtype=float32)>, <tf.Variable 'bias:0' shape=(3,) dtype=float32, numpy=array([1.1468066 , 0.96459925, 0.87830144], dtype=float32)>)


## 학습 및 가중치 최적화

In [12]:
def fit(X, Y, epochs=2000, verbose=100):
    optimizer =  tf.keras.optimizers.SGD(learning_rate=0.1)

    for i in range(epochs):
        grads = grad_fn(X, Y)
        optimizer.apply_gradients(zip(grads, variables))
        if (i==0) | ((i+1)%verbose==0):
            print('Loss at epoch %d: %f' %(i+1, cost_fn(X, Y).numpy()))
            
fit(x_data, y_data)

Loss at epoch 1: 3.077796
Loss at epoch 100: 0.648126
Loss at epoch 200: 0.583560
Loss at epoch 300: 0.527852
Loss at epoch 400: 0.476864
Loss at epoch 500: 0.428181
Loss at epoch 600: 0.380648
Loss at epoch 700: 0.333812
Loss at epoch 800: 0.288416
Loss at epoch 900: 0.252232
Loss at epoch 1000: 0.236942
Loss at epoch 1100: 0.225131
Loss at epoch 1200: 0.214419
Loss at epoch 1300: 0.204656
Loss at epoch 1400: 0.195719
Loss at epoch 1500: 0.187507
Loss at epoch 1600: 0.179934
Loss at epoch 1700: 0.172930
Loss at epoch 1800: 0.166433
Loss at epoch 1900: 0.160392
Loss at epoch 2000: 0.154759


## Prediction Check

In [13]:
sample_data = [[2,1,3,2]] # answer_label [[0,0,1]]
sample_data = np.asarray(sample_data, dtype=np.float32)

a = hypothesis(sample_data)

print(a)
print(tf.argmax(a, 1)) #index: 2

tf.Tensor([[0.00233654 0.0834768  0.91418666]], shape=(1, 3), dtype=float32)
tf.Tensor([2], shape=(1,), dtype=int64)


In [14]:
b = hypothesis(x_data)
print(b)
print(tf.argmax(b, 1))
print(tf.argmax(y_data, 1)) # matches with y_data

tf.Tensor(
[[2.5278325e-06 9.0602465e-04 9.9909151e-01]
 [2.3365410e-03 8.3476759e-02 9.1418666e-01]
 [8.5252346e-08 1.6341875e-01 8.3658111e-01]
 [2.8292488e-06 8.4959275e-01 1.5040438e-01]
 [2.5089645e-01 7.3689163e-01 1.2211842e-02]
 [1.3153560e-01 8.6846066e-01 3.7197158e-06]
 [7.5783312e-01 2.4215178e-01 1.5085251e-05]
 [9.2092609e-01 7.9073697e-02 2.2452508e-07]], shape=(8, 3), dtype=float32)
tf.Tensor([2 2 2 1 1 1 0 0], shape=(8,), dtype=int64)
tf.Tensor([2 2 2 1 1 1 0 0], shape=(8,), dtype=int64)


## Convert as Class

In [15]:
class softmax_classifer(tf.keras.Model):
    def __init__(self, nb_classes):
        super(softmax_classifer, self).__init__()
        self.W = tf.Variable(tf.random.normal((4, nb_classes)), name='weight')
        self.b = tf.Variable(tf.random.normal((nb_classes,)), name='bias')
        
    def softmax_regression(self, X):
        return tf.nn.softmax(tf.matmul(X, self.W) + self.b)
    
    def cost_fn(self, X, Y):
        logits = self.softmax_regression(X)
        cost = tf.reduce_mean(-tf.reduce_sum(Y * tf.math.log(logits), axis=1))        
        return cost
    
    def grad_fn(self, X, Y):
        with tf.GradientTape() as tape:
            cost = self.cost_fn(x_data, y_data)
            grads = tape.gradient(cost, self.variables)            
            return grads
    
    def fit(self, X, Y, epochs=2000, verbose=500):
        optimizer =  tf.keras.optimizers.SGD(learning_rate=0.1)

        for i in range(epochs):
            grads = self.grad_fn(X, Y)
            optimizer.apply_gradients(zip(grads, self.variables))
            if (i==0) | ((i+1)%verbose==0):
                print('Loss at epoch %d: %f' %(i+1, self.cost_fn(X, Y).numpy()))
            
model = softmax_classifer(nb_classes)
model.fit(x_data, y_data)

Loss at epoch 1: 2.043091
Loss at epoch 500: 0.462359
Loss at epoch 1000: 0.245531
Loss at epoch 1500: 0.191300
Loss at epoch 2000: 0.157818


## Example : Zoo classifier

In [16]:
xy = np.loadtxt('zoo.txt', delimiter=',', dtype=np.float32)

x_data = xy[:, 0:-1]
y_data = xy[:, -1]

nb_classes = 7  # 0 ~ 6

In [17]:
y_data.astype(np.int32)   # 데이터 형태 변환 (converting data type)

array([0, 0, 3, 0, 0, 0, 0, 3, 3, 0, 0, 1, 3, 6, 6, 6, 1, 0, 3, 0, 1, 1,
       0, 1, 5, 4, 4, 0, 0, 0, 5, 0, 0, 1, 3, 0, 0, 1, 3, 5, 5, 1, 5, 1,
       0, 0, 6, 0, 0, 0, 0, 5, 4, 6, 0, 0, 1, 1, 1, 1, 3, 3, 2, 0, 0, 0,
       0, 0, 0, 0, 0, 1, 6, 3, 0, 0, 2, 6, 1, 1, 2, 6, 3, 1, 0, 6, 3, 1,
       5, 4, 2, 2, 3, 0, 0, 1, 0, 5, 0, 6, 1], dtype=int32)

In [18]:
# Make Y data as onehot shape
Y_one_hot = tf.one_hot(y_data.astype(np.int32), nb_classes)

print(x_data.shape, Y_one_hot.shape)

(101, 16) (101, 7)


In [19]:
#Weight and bias setting
W = tf.Variable(tf.random.normal((16, nb_classes)), name='weight')
b = tf.Variable(tf.random.normal((nb_classes,)), name='bias')
variables = [W, b]

# tf.nn.softmax computes softmax activations
# softmax = exp(logits) / reduce_sum(exp(logits), dim)
def logit_fn(X):
    return tf.matmul(X, W) + b

def hypothesis(X):
    return tf.nn.softmax(logit_fn(X))

def cost_fn(X, Y):
    logits = logit_fn(X)
    cost_i = tf.keras.losses.categorical_crossentropy(y_true=Y, y_pred=logits, 
                                                      from_logits=True)    
    cost = tf.reduce_mean(cost_i)    
    return cost

def grad_fn(X, Y):
    with tf.GradientTape() as tape:
        loss = cost_fn(X, Y)
        grads = tape.gradient(loss, variables)
        return grads
    
def prediction(X, Y):
    pred = tf.argmax(hypothesis(X), 1)
    correct_prediction = tf.equal(pred, tf.argmax(Y, 1))
    accuracy = tf.reduce_mean(tf.cast(correct_prediction, tf.float32))

    return accuracy

In [20]:
def fit(X, Y, epochs=1000, verbose=100):
    optimizer =  tf.keras.optimizers.SGD(learning_rate=0.1)

    for i in range(epochs):
        grads = grad_fn(X, Y)
        optimizer.apply_gradients(zip(grads, variables))
        if (i==0) | ((i+1)%verbose==0):
#             print('Loss at epoch %d: %f' %(i+1, cost_fn(X, Y).numpy()))
            acc = prediction(X, Y).numpy()
            loss = cost_fn(X, Y).numpy() 
            print('Steps: {} Loss: {}, Acc: {}'.format(i+1, loss, acc))

fit(x_data, Y_one_hot)

Steps: 1 Loss: 3.553328037261963, Acc: 0.3465346395969391
Steps: 100 Loss: 0.553804337978363, Acc: 0.8316831588745117
Steps: 200 Loss: 0.367810994386673, Acc: 0.9108911156654358
Steps: 300 Loss: 0.28497543931007385, Acc: 0.9405940771102905
Steps: 400 Loss: 0.23434379696846008, Acc: 0.9405940771102905
Steps: 500 Loss: 0.1992104947566986, Acc: 0.9504950642585754
Steps: 600 Loss: 0.17302118241786957, Acc: 0.9504950642585754
Steps: 700 Loss: 0.1526147574186325, Acc: 0.9504950642585754
Steps: 800 Loss: 0.13624872267246246, Acc: 0.9504950642585754
Steps: 900 Loss: 0.12285429239273071, Acc: 0.9504950642585754
Steps: 1000 Loss: 0.11171947419643402, Acc: 0.9801980257034302


- Reference 
 * https://github.com/deeplearningzerotoall/TensorFlow/blob/master/tf_2.x/lab-06-1-softmax_classifier-eager.ipynb
 * https://github.com/deeplearningzerotoall/TensorFlow/blob/master/tf_2.x/lab-06-2-softmax_zoo_classifier-eager.ipynb