# Softmax classification: Multinomial classification
## Softmax
$$ S(y_i) = \frac{e^{y_i}}{\sum_{j}e^{y_j}}$$
- One-Hot Encoding

## Cost function
$$ D(S, L) = -\sum_iL_i\log(S_i) $$
$$ = \sum_iL_i\times-log(y_i) $$



In [10]:
import tensorflow as tf
import numpy as np

x_data = [[1, 2, 1, 1],
          [2, 1, 3, 2],
          [3, 1, 3, 4],
          [4, 1, 5, 5],
          [1, 7, 5, 5],
          [1, 2, 5, 6],
          [1, 6, 6, 6],
          [1, 7, 7, 7]]
y_data = [[0, 0, 1],
          [0, 0, 1],
          [0, 0, 1],
          [0, 1, 0],
          [0, 1, 0],
          [0, 1, 0],
          [1, 0, 0],
          [1, 0, 0]]

#convert into numpy and float format
x_data = np.asarray(x_data, dtype=np.float32)
y_data = np.asarray(y_data, dtype=np.float32)

#num classes
nb_classes = 3

#Weight and bias setting
W = tf.Variable(tf.random.normal((4, nb_classes)), name='weight')
b = tf.Variable(tf.random.normal((nb_classes,)), name='bias')
variables = [W, b]

sample_db = [[8,2,1,4]]
sample_db = np.asarray(sample_db, dtype=np.float32)

# tf.nn.softmax computes softmax activations
# softmax = exp(logits) / reduce_sum(exp(logits), dim)
def hypothesis(X):
    return tf.nn.softmax(tf.matmul(X, W) + b)

def cost_fn(X, Y):
    logits = hypothesis(X)
    cost = -tf.reduce_sum(Y * tf.math.log(logits), axis=1)
    cost_mean = tf.reduce_mean(cost)
    
    return cost_mean

def grad_fn(X, Y):
    with tf.GradientTape() as tape:
        loss = cost_fn(X, Y)
        grads = tape.gradient(loss, variables)

        return grads

def fit(X, Y, epochs=2000, verbose=100):
    optimizer =  tf.keras.optimizers.SGD(learning_rate=0.1)

    for i in range(epochs):
        grads = grad_fn(X, Y)
        optimizer.apply_gradients(zip(grads, variables))
        if (i==0) | ((i+1)%verbose==0):
            print('Loss at epoch %d: %f' %(i+1, cost_fn(X, Y).numpy()))
            
fit(x_data, y_data)

Loss at epoch 1: 3.216145
Loss at epoch 100: 0.680034
Loss at epoch 200: 0.597494
Loss at epoch 300: 0.541869
Loss at epoch 400: 0.496473
Loss at epoch 500: 0.456460
Loss at epoch 600: 0.419667
Loss at epoch 700: 0.384725
Loss at epoch 800: 0.350517
Loss at epoch 900: 0.316028
Loss at epoch 1000: 0.280555
Loss at epoch 1100: 0.246620
Loss at epoch 1200: 0.229420
Loss at epoch 1300: 0.218568
Loss at epoch 1400: 0.208656
Loss at epoch 1500: 0.199564
Loss at epoch 1600: 0.191197
Loss at epoch 1700: 0.183473
Loss at epoch 1800: 0.176322
Loss at epoch 1900: 0.169685
Loss at epoch 2000: 0.163510


In [12]:
sample_data = [[2,1,3,2]] # answer_label [[0,0,1]]
sample_data = np.asarray(sample_data, dtype=np.float32)

a = hypothesis(sample_data)

print(a)
print(tf.argmax(a, 1)) #index: 2

b = hypothesis(x_data)
print(b)
print(tf.argmax(b, 1))
print(tf.argmax(y_data, 1)) # matches with y_data

tf.Tensor(
[[0.05484341 0.89765733 0.04749919]
 [0.05128267 0.9077265  0.04099078]
 [0.04755265 0.91751    0.03493747]
 [0.02524517 0.96539634 0.00935843]
 [0.03730619 0.9534999  0.00919393]
 [0.02885423 0.96316403 0.00798174]
 [0.08851133 0.89799076 0.01349787]
 [0.12119357 0.8633212  0.01548521]], shape=(8, 3), dtype=float32)
tf.Tensor([1 1 1 1 1 1 1 1], shape=(8,), dtype=int64)
tf.Tensor(
[[4.4573816e-03 2.4084675e-01 7.5469589e-01]
 [5.1282674e-02 9.0772653e-01 4.0990781e-02]
 [2.3469377e-06 9.8353195e-01 1.6465705e-02]
 [2.8394020e-06 9.9984765e-01 1.4958532e-04]
 [6.4425015e-01 3.5574478e-01 5.0468789e-06]
 [3.8023913e-01 6.1976081e-01 4.1970171e-08]
 [9.7157156e-01 2.8428353e-02 8.5547134e-09]
 [9.9465632e-01 5.3436258e-03 1.1282447e-10]], shape=(8, 3), dtype=float32)
tf.Tensor([2 1 1 1 0 1 0 0], shape=(8,), dtype=int64)
tf.Tensor([2 2 2 1 1 1 0 0], shape=(8,), dtype=int64)


In [14]:
import tensorflow as tf
import numpy as np

xy = np.loadtxt('data-04-zoo.csv', delimiter=',', dtype=np.float32)
x_data = xy[:, 0:-1]
y_data = xy[:, -1]

nb_classes = 7  # 0 ~ 6

# Make Y data as onehot shape
Y_one_hot = tf.one_hot(y_data.astype(np.int32), nb_classes)

print(x_data.shape, Y_one_hot.shape)

(101, 16) (101, 7)


In [15]:
#Weight and bias setting
W = tf.Variable(tf.random.normal((16, nb_classes)), name='weight')
b = tf.Variable(tf.random.normal((nb_classes,)), name='bias')
variables = [W, b]

# tf.nn.softmax computes softmax activations
# softmax = exp(logits) / reduce_sum(exp(logits), dim)
def logit_fn(X):
    return tf.matmul(X, W) + b

def hypothesis(X):
    return tf.nn.softmax(logit_fn(X))

def cost_fn(X, Y):
    logits = logit_fn(X)
    cost_i = tf.keras.losses.categorical_crossentropy(y_true=Y, y_pred=logits, 
                                                      from_logits=True)    
    cost = tf.reduce_mean(cost_i)    
    return cost

def grad_fn(X, Y):
    with tf.GradientTape() as tape:
        loss = cost_fn(X, Y)
        grads = tape.gradient(loss, variables)
        return grads
    
def prediction(X, Y):
    pred = tf.argmax(hypothesis(X), 1)
    correct_prediction = tf.equal(pred, tf.argmax(Y, 1))
    accuracy = tf.reduce_mean(tf.cast(correct_prediction, tf.float32))

    return accuracy

In [16]:
def fit(X, Y, epochs=1000, verbose=100):
    optimizer =  tf.keras.optimizers.SGD(learning_rate=0.1)

    for i in range(epochs):
        grads = grad_fn(X, Y)
        optimizer.apply_gradients(zip(grads, variables))
        if (i==0) | ((i+1)%verbose==0):
#             print('Loss at epoch %d: %f' %(i+1, cost_fn(X, Y).numpy()))
            acc = prediction(X, Y).numpy()
            loss = cost_fn(X, Y).numpy() 
            print('Steps: {} Loss: {}, Acc: {}'.format(i+1, loss, acc))

fit(x_data, Y_one_hot)

Steps: 1 Loss: 5.017784595489502, Acc: 0.20792078971862793
Steps: 100 Loss: 0.7714686393737793, Acc: 0.8217821717262268
Steps: 200 Loss: 0.47590935230255127, Acc: 0.8712871074676514
Steps: 300 Loss: 0.33353596925735474, Acc: 0.9009901285171509
Steps: 400 Loss: 0.2523742616176605, Acc: 0.9207921028137207
Steps: 500 Loss: 0.20376144349575043, Acc: 0.9306930899620056
Steps: 600 Loss: 0.17214415967464447, Acc: 0.9702970385551453
Steps: 700 Loss: 0.1497756540775299, Acc: 0.9702970385551453
Steps: 800 Loss: 0.13292743265628815, Acc: 0.9801980257034302
Steps: 900 Loss: 0.11966721713542938, Acc: 0.9801980257034302
Steps: 1000 Loss: 0.10889842361211777, Acc: 0.9801980257034302
