# Softmax classification
## TensorFlow

In [1]:
import tensorflow as tf

  from ._conv import register_converters as _register_converters


In [2]:
x_data = [[1, 2, 1, 1], [2, 1, 3, 2], [3, 1, 3, 4], [4, 1, 5, 5], [1, 7, 5, 5], [1, 2, 5, 6], \
          [1, 6, 6, 6], [1, 7, 7, 7]]
# one-hot encoding
y_data = [[0, 0, 1], [0, 0, 1], [0, 0, 1], [0, 1, 0], [0, 1, 0], [0, 1, 0], [1, 0, 0], [1, 0, 0]]

In [6]:
X = tf.placeholder('float', [None, 4])
Y = tf.placeholder('float', [None, 3])
nb_classes = 3

# tf.random_normal([input shape, output shape])
W = tf.Variable(tf.random_normal([4, nb_classes]), name = 'weight')
b = tf.Variable(tf.random_normal([nb_classes]), name = 'bias')

In [7]:
# tf.nn.softmax computes softmax activations
# softmax = exp(logits) / reduce_sum(exp(logits), dim)
hypothesis = tf.nn.softmax(tf.matmul(X, W)+ b)

cost = tf.reduce_mean(-tf.reduce_sum(Y * tf.log(hypothesis), axis = 1))
optimizer = tf.train.GradientDescentOptimizer(learning_rate = 0.1).minimize(cost)

In [8]:
# launch graph
with tf.Session() as sess:
    sess.run(tf.global_variables_initializer())
    
    for step in range(2001):
        sess.run(optimizer, feed_dict = {X: x_data, Y: y_data})
        if step % 200 == 0:
            print(step, sess.run(cost, feed_dict = {X: x_data, Y: y_data}))
    
    # testing & one-hot encoding
    a = sess.run(hypothesis, feed_dict = {X: [[1, 11, 7, 9]]})
    # tf.argmax -> return the index of the max element
    print(a, sess.run(tf.argmax(a, 1)))
    
    print('-'*10)

    b = sess.run(hypothesis, feed_dict = {X: [[1, 3, 4, 3]]})
    print(b, sess.run(tf.argmax(b, 1)))

    print('-'*10)

    c = sess.run(hypothesis, feed_dict = {X: [[1, 1, 0, 1]]})
    print(c, sess.run(tf.argmax(c, 1)))

    print('-'*10)

    all = sess.run(hypothesis, feed_dict = {X: [[1, 11, 7, 9], [1, 3, 4, 3], [1, 1, 0, 1]]})
    print(all, sess.run(tf.argmax(all, 1)))

0 1.089675
200 0.5427944
400 0.43479735
600 0.33923328
800 0.25229973
1000 0.22181144
1200 0.20167935
1400 0.1848445
1600 0.1705471
1800 0.15825337
2000 0.14757152
[[2.5224951e-03 9.9746740e-01 1.0052209e-05]] [1]
----------
[[0.89527243 0.09045193 0.01427561]] [0]
----------
[[8.6944016e-09 3.0477278e-04 9.9969530e-01]] [2]
----------
[[2.5224951e-03 9.9746740e-01 1.0052190e-05]
 [8.9527243e-01 9.0451956e-02 1.4275616e-02]
 [8.6944016e-09 3.0477278e-04 9.9969530e-01]] [1 0 2]


### Fancy Softmax Classifier

#### Animal classification
Predicting animal type based on various features

In [9]:
import numpy as np

In [11]:
xy = np.loadtxt('data-04-zoo.csv', delimiter = ',', dtype = np.float32)
x_data = xy[:, :-1]
y_data = xy[:, [-1]]

In [12]:
nb_classes = 7 # 0 ~ 6

X = tf.placeholder(tf.float32, [None, 16])
Y = tf.placeholder(tf.int32, [None, 1])# 0 ~ 6, shape = (?,1)

Y_one_hot = tf.one_hot(Y, nb_classes) # one hot shape = (?, 1, 7)
Y_one_hot = tf.reshape(Y_one_hot, [-1, nb_classes]) # shape = (?, 7)

W = tf.Variable(tf.random_normal([16, nb_classes]), name = 'weight')
b = tf.Variable(tf.random_normal([nb_classes]), name = 'bias')

If the input indices are in rank N, the output will have rank N+1  
a new axis is created at dimension axis (default: appended at the end)  
-> therefore, need to reshape

#### softmax_cross_entropy_with_logits_v2
softmax_cross_entropy_with_logits -> deprecated

In [14]:
logits = tf.matmul(X, W) + b # aka score
hypothesis = tf.nn.softmax(logits)

# cross entropy cost/loss
# cost = tf.reduce_mean(-tf.reduce_sum(Y * log(hypothesis), axis = 1))

# simplified version
cost_i = tf.nn.softmax_cross_entropy_with_logits_v2(logits = logits, labels = Y_one_hot)
cost = tf.reduce_mean(cost_i )
optimizer = tf.train.GradientDescentOptimizer(learning_rate = 0.1).minimize(cost)

In [15]:
prediction = tf.argmax(hypothesis, 1) # probability -> value between 0 ~ 6
correct_prediction = tf.equal(prediction, tf.argmax(Y_one_hot, 1))
accuracy = tf.reduce_mean(tf.cast(correct_prediction, tf.float32))

# launch graph
with tf.Session() as sess:
    sess.run(tf.global_variables_initializer())
    
    for step in range(2000):
        sess.run(optimizer, feed_dict = {X: x_data, Y: y_data})
        if step % 100 == 0:
            loss, acc = sess.run([cost, accuracy], feed_dict = {X: x_data, Y: y_data})
            print('Step: {:5}\t Loss: {:.3f}\tAcc: {:.2%}'.format(step, loss, acc))
    
    # let's  see if we can predict
    pred = sess.run(prediction, feed_dict = {X: x_data})
    # y_data: (N, 1) = flatten => (N, ) matches pred. shape
    for p, y in zip(pred, y_data.flatten()):
        print('[{}] Prediction: {} True Y: {}'.format(p == int(y), p, int(y)))

Step:     0	 Loss: 7.815	Acc: 5.94%
Step:   100	 Loss: 0.733	Acc: 82.18%
Step:   200	 Loss: 0.466	Acc: 88.12%
Step:   300	 Loss: 0.352	Acc: 91.09%
Step:   400	 Loss: 0.282	Acc: 94.06%
Step:   500	 Loss: 0.233	Acc: 94.06%
Step:   600	 Loss: 0.197	Acc: 94.06%
Step:   700	 Loss: 0.170	Acc: 95.05%
Step:   800	 Loss: 0.148	Acc: 95.05%
Step:   900	 Loss: 0.130	Acc: 99.01%
Step:  1000	 Loss: 0.116	Acc: 99.01%
Step:  1100	 Loss: 0.105	Acc: 99.01%
Step:  1200	 Loss: 0.095	Acc: 99.01%
Step:  1300	 Loss: 0.087	Acc: 100.00%
Step:  1400	 Loss: 0.081	Acc: 100.00%
Step:  1500	 Loss: 0.075	Acc: 100.00%
Step:  1600	 Loss: 0.070	Acc: 100.00%
Step:  1700	 Loss: 0.065	Acc: 100.00%
Step:  1800	 Loss: 0.062	Acc: 100.00%
Step:  1900	 Loss: 0.058	Acc: 100.00%
[True] Prediction: 0 True Y: 0
[True] Prediction: 0 True Y: 0
[True] Prediction: 3 True Y: 3
[True] Prediction: 0 True Y: 0
[True] Prediction: 0 True Y: 0
[True] Prediction: 0 True Y: 0
[True] Prediction: 0 True Y: 0
[True] Prediction: 3 True Y: 3
[True]

## PyTorch

In [18]:
import torch
import torch.nn as nn
from torch.autograd import Variable

In [42]:
class LogisticClassificationModel(nn.Module):
    def __init__(self, input_size, nb_classes):
        super(LogisticClassificationModel, self).__init__()
        self.linear = nn.Linear(input_size, nb_classes)
        
    def forward(self, x):
        y_pred = self.linear(x)
        return y_pred

In [43]:
x_data = torch.from_numpy(np.array([[1, 2, 1, 1], [2, 1, 3, 2], [3, 1, 3, 4], [4, 1, 5, 5], [1, 7, 5, 5], [1, 2, 5, 6], \
          [1, 6, 6, 6], [1, 7, 7, 7]], dtype = np.float32))
# one-hot encoding
y_data = torch.from_numpy(np.array([[0, 0, 1], [0, 0, 1], [0, 0, 1], [0, 1, 0], [0, 1, 0], [0, 1, 0], [1, 0, 0], [1, 0, 0]]))
_, y_data = torch.max(y_data, 1)
                          
# hyper parameters
input_size = 4
nb_classes = 3
num_epochs = 2001
learning_rate = 0.1

In [44]:
model = LogisticClassificationModel(input_size, nb_classes)

# Softmax + CrossEntropy (logSoftmax + NLLLoss)
criterion = nn.CrossEntropyLoss() # (logit, class)
optimizer = torch.optim.SGD(model.parameters(), lr = learning_rate)

# target is of size nBatch
# each element in target has to have 0 <= value < nClasses (0~2)
# input is class, not one-hot
# input is of size nBatch x nClasses = 1 x 4
# Y_pred are logits (not softmax)

for epoch in range(num_epochs):
    y_pred = model(x_data) # logit
    _, predicted = torch.max(y_pred, 1) # class
    loss = criterion(y_pred, y_data)
    accuracy = torch.mean(torch.eq(y_data, predicted).float())
    
    optimizer.zero_grad()
    loss.backward()
    optimizer.step()
    
    if epoch % 200 == 0:
        print(epoch, '\tCost: ', loss.item(), '\tAccuracy: ', accuracy)

0 	Cost:  2.94557523727417 	Accuracy:  tensor(0.3750)
200 	Cost:  0.6071863770484924 	Accuracy:  tensor(0.7500)
400 	Cost:  0.5003668069839478 	Accuracy:  tensor(0.8750)
600 	Cost:  0.4063349664211273 	Accuracy:  tensor(0.8750)
800 	Cost:  0.3148180842399597 	Accuracy:  tensor(0.8750)
1000 	Cost:  0.24175181984901428 	Accuracy:  tensor(1.)
1200 	Cost:  0.21820279955863953 	Accuracy:  tensor(1.)
1400 	Cost:  0.1990385800600052 	Accuracy:  tensor(1.)
1600 	Cost:  0.18286724388599396 	Accuracy:  tensor(1.)
1800 	Cost:  0.16904345154762268 	Accuracy:  tensor(1.)
2000 	Cost:  0.15709702670574188 	Accuracy:  tensor(1.)


In [45]:
y_pred = model(torch.from_numpy(np.array( [[1, 11, 7, 9], [1, 3, 4, 3], [1, 1, 0, 1]], dtype = np.float32)))
_, predicted = torch.max(y_pred, 1)
predicted

tensor([1, 0, 2])

#### Fancy Softmax Classifier - Animal classification¶

In [62]:
xy = np.loadtxt('data-04-zoo.csv', delimiter = ',', dtype = np.float32)
x_data = Variable(torch.Tensor(xy[:, :-1]))
y_data = Variable(torch.Tensor(xy[:, [-1]]))

# hyper parameters
input_size = 16
nb_classes = 7
num_epochs = 2001
learning_rate = 0.1

In [78]:
model = LogisticClassificationModel(input_size, nb_classes)

criterion = nn.CrossEntropyLoss() # (logit, class)
optimizer = torch.optim.SGD(model.parameters(), lr = learning_rate)

for epoch in range(num_epochs):
    y_pred = model(x_data) # logit
    _, predicted = torch.max(y_pred, 1) # class
    loss = criterion(y_pred, y_data.view(-1))
    accuracy = torch.mean(torch.eq(y_data.view(-1), predicted).float())
    
    optimizer.zero_grad()
    loss.backward()
    optimizer.step()
    
    if epoch % 200 == 0:
        print(epoch, '\tCost: ', loss.item(), '\tAccuracy: ', accuracy)

0 	Cost:  2.4103832244873047 	Accuracy:  tensor(0.0990)
200 	Cost:  0.3244876265525818 	Accuracy:  tensor(0.9406)
400 	Cost:  0.20642788708209991 	Accuracy:  tensor(0.9703)
600 	Cost:  0.15293489396572113 	Accuracy:  tensor(0.9802)
800 	Cost:  0.12182530015707016 	Accuracy:  tensor(1.)
1000 	Cost:  0.10133848339319229 	Accuracy:  tensor(1.)
1200 	Cost:  0.08678511530160904 	Accuracy:  tensor(1.)
1400 	Cost:  0.0759008601307869 	Accuracy:  tensor(1.)
1600 	Cost:  0.06745016574859619 	Accuracy:  tensor(1.)
1800 	Cost:  0.0606982484459877 	Accuracy:  tensor(1.)
2000 	Cost:  0.05518006905913353 	Accuracy:  tensor(1.)


In [79]:
for p, y in zip(predicted, y_data.flatten()):
        print('[{}] Prediction: {} True Y: {}'.format(p == int(y), p, int(y)))

[1] Prediction: 0 True Y: 0
[1] Prediction: 0 True Y: 0
[1] Prediction: 3 True Y: 3
[1] Prediction: 0 True Y: 0
[1] Prediction: 0 True Y: 0
[1] Prediction: 0 True Y: 0
[1] Prediction: 0 True Y: 0
[1] Prediction: 3 True Y: 3
[1] Prediction: 3 True Y: 3
[1] Prediction: 0 True Y: 0
[1] Prediction: 0 True Y: 0
[1] Prediction: 1 True Y: 1
[1] Prediction: 3 True Y: 3
[1] Prediction: 6 True Y: 6
[1] Prediction: 6 True Y: 6
[1] Prediction: 6 True Y: 6
[1] Prediction: 1 True Y: 1
[1] Prediction: 0 True Y: 0
[1] Prediction: 3 True Y: 3
[1] Prediction: 0 True Y: 0
[1] Prediction: 1 True Y: 1
[1] Prediction: 1 True Y: 1
[1] Prediction: 0 True Y: 0
[1] Prediction: 1 True Y: 1
[1] Prediction: 5 True Y: 5
[1] Prediction: 4 True Y: 4
[1] Prediction: 4 True Y: 4
[1] Prediction: 0 True Y: 0
[1] Prediction: 0 True Y: 0
[1] Prediction: 0 True Y: 0
[1] Prediction: 5 True Y: 5
[1] Prediction: 0 True Y: 0
[1] Prediction: 0 True Y: 0
[1] Prediction: 1 True Y: 1
[1] Prediction: 3 True Y: 3
[1] Prediction: 0 Tr