In [13]:
import tensorflow as tf

# X_data = tf.Variable([[1], [2], [3]])
X_data = tf.Variable([0, 1, 2, 3])
X = tf.Variable(X_data)
print(X)
X = tf.reshape(X, [-1, 4])
print(X)

# NotFoundError: Could not find valid device for node.
# Node:{{node OneHot}}
# All kernels registered for op OneHot :
#X = tf.cast(X, tf.float32)

X = tf.one_hot(X, 4)  # one hot 
X = tf.cast(X, tf.float32)
print(X)


<tf.Variable 'Variable:0' shape=(4,) dtype=int32, numpy=array([0, 1, 2, 3])>
tf.Tensor([[0 1 2 3]], shape=(1, 4), dtype=int32)
tf.Tensor(
[[[1. 0. 0. 0.]
  [0. 1. 0. 0.]
  [0. 0. 1. 0.]
  [0. 0. 0. 1.]]], shape=(1, 4, 4), dtype=float32)


In [11]:
"""
In this file, we will implement back propagations by hands

We will use the Sigmoid Cross Entropy loss function.
This is equivalent to tf.nn.sigmoid_softmax_with_logits(logits, labels)

[References]

1) Tensorflow Document (tf.nn.sigmoid_softmax_with_logits)
    https://www.tensorflow.org/api_docs/python/tf/nn/sigmoid_cross_entropy_with_logits

2) Neural Net Backprop in one slide! by Sung Kim
    https://docs.google.com/presentation/d/1_ZmtfEjLmhbuM_PqbDYMXXLAqeWN0HwuhcSKnUQZ6MM/edit#slide=id.g1ec1d04b5a_1_83

3) Back Propagation with Tensorflow by Dan Aloni
    http://blog.aloni.org/posts/backprop-with-tensorflow/

4) Yes you should understand backprop by Andrej Karpathy
    https://medium.com/@karpathy/yes-you-should-understand-backprop-e2f06eab496b#.cockptkn7


[Network Architecture]

Input: x
Layer1: x * W + b
Output layer = σ(Layer1)

Loss_i = - y * log(σ(Layer1)) - (1 - y) * log(1 - σ(Layer1))
Loss = tf.reduce_sum(Loss_i)

We want to compute that

dLoss/dW = ???
dLoss/db = ???

please read "Neural Net Backprop in one slide!" for deriving formulas

"""
import numpy as np
import tensorflow as tf
assert(tf.__version__.find('2') == 0)
tf.random.set_seed(777)  # for reproducibility

# Predicting animal type based on various features
xy = np.loadtxt('data-04-zoo.csv', delimiter=',', dtype=np.float32)
X_data = xy[:, 0:-1]
N = X_data.shape[0]
y_data = xy[:, [-1]]

# y_data has labels from 0 ~ 6
print("y has one of the following values")
print(np.unique(y_data))

# X_data.shape = (101, 16) => 101 samples, 16 features
# y_data.shape = (101, 1)  => 101 samples, 1 label
print("Shape of X data: ", X_data.shape)
print("Shape of y data: ", y_data.shape)

nb_classes = 7  # 0 ~ 6

X = tf.Variable(tf.ones(X_data.shape))
y = tf.Variable(y_data)  # 0 ~ 6
print("Shape of y variable", y.shape)

target = tf.one_hot(tf.cast(y, tf.int32), nb_classes)  # one hot
target = tf.reshape(target, [-1, nb_classes])
target = tf.cast(target, tf.float32)

W = tf.Variable(tf.random.normal([16, nb_classes]), name='weight')
b = tf.Variable(tf.random.normal([nb_classes]), name='bias')


def sigma(x):
    # sigmoid function
    # σ(x) = 1 / (1 + exp(-x))
    return 1. / (1. + tf.exp(-x))


def sigma_prime(x):
    # derivative of the sigmoid function
    # σ'(x) = σ(x) * (1 - σ(x))
    return sigma(x) * (1. - sigma(x))


# Forward propagtion
layer_1 = lambda: tf.matmul(X, W) + b
y_pred = lambda: sigma(layer_1())

# Loss Function (end of forwad propagation)
loss_i = lambda: - target * tf.math.log(y_pred()) - (1. - target) * tf.math.log(1. - y_pred())
loss = lambda: tf.reduce_sum(loss_i())

# Dimension Check
assert y_pred().shape.as_list() == target.shape.as_list()


# Back prop (chain rule)
# How to derive? please read "Neural Net Backprop in one slide!"
d_loss = lambda: (y_pred() - target) / (y_pred() * (1. - y_pred()) + 1e-7)
d_sigma = lambda: sigma_prime(layer_1())
d_layer = lambda: d_loss() * d_sigma()
d_b = d_layer
d_W = lambda: tf.matmul(tf.transpose(X), d_layer())

# Updating network using gradients
learning_rate = 0.01
train_step = [
    lambda: W.assign(W - learning_rate * d_W()),
    lambda: b.assign(b - learning_rate * tf.reduce_sum(d_b())),
]

# Prediction and Accuracy
prediction = lambda: tf.argmax(y_pred(), 1)
acct_mat = lambda: tf.equal(tf.argmax(y_pred(), 1), tf.argmax(target, 1))
acct_res = lambda: tf.reduce_mean(tf.cast(acct_mat(), tf.float32))

# Launch graph
X.assign(X_data)
y.assign(y_data)
for step in range(500):
    train_step[0]()
    train_step[1]()
    if step % 100 == 0:
        step_loss = loss()
        acc = acct_res()
        tf.print("Step: {:5}\t Loss: {:10.5f}\t Acc: {:.2%}" .format(step, step_loss, acc))

# Let's see if we can predict
pred = prediction()
for p, y in zip(pred, y_data):
    msg = "[{}]\t Prediction: {:d}\t True y: {:d}"
    print(msg.format(p == int(y[0]), p, int(y[0])))

"""
Output Example

Step:     0      Loss:  453.74799        Acc: 38.61%
Step:    20      Loss:   95.05664        Acc: 88.12%
Step:    40      Loss:   66.43570        Acc: 93.07%
Step:    60      Loss:   53.09288        Acc: 94.06%
...
Step:   290      Loss:   18.72972        Acc: 100.00%
Step:   300      Loss:   18.24953        Acc: 100.00%
Step:   310      Loss:   17.79592        Acc: 100.00%
...
[True]   Prediction: 0   True y: 0
[True]   Prediction: 0   True y: 0
[True]   Prediction: 3   True y: 3
[True]   Prediction: 0   True y: 0
...
"""


y has one of the following values
[0. 1. 2. 3. 4. 5. 6.]
Shape of X data:  (101, 16)
Shape of y data:  (101, 1)
Shape of y variable (101, 1)
Step:     0	 Loss:  357.72537	 Acc: 58.42%
Step:   100	 Loss:   32.49531	 Acc: 97.03%
Step:   200	 Loss:   21.62311	 Acc: 100.00%
Step:   300	 Loss:   16.68365	 Acc: 100.00%
Step:   400	 Loss:   13.69160	 Acc: 100.00%
[True]	 Prediction: 0	 True y: 0
[True]	 Prediction: 0	 True y: 0
[True]	 Prediction: 3	 True y: 3
[True]	 Prediction: 0	 True y: 0
[True]	 Prediction: 0	 True y: 0
[True]	 Prediction: 0	 True y: 0
[True]	 Prediction: 0	 True y: 0
[True]	 Prediction: 3	 True y: 3
[True]	 Prediction: 3	 True y: 3
[True]	 Prediction: 0	 True y: 0
[True]	 Prediction: 0	 True y: 0
[True]	 Prediction: 1	 True y: 1
[True]	 Prediction: 3	 True y: 3
[True]	 Prediction: 6	 True y: 6
[True]	 Prediction: 6	 True y: 6
[True]	 Prediction: 6	 True y: 6
[True]	 Prediction: 1	 True y: 1
[True]	 Prediction: 0	 True y: 0
[True]	 Prediction: 3	 True y: 3
[True]	 Predic

'\nOutput Example\n\nStep:     0      Loss:  453.74799        Acc: 38.61%\nStep:    20      Loss:   95.05664        Acc: 88.12%\nStep:    40      Loss:   66.43570        Acc: 93.07%\nStep:    60      Loss:   53.09288        Acc: 94.06%\n...\nStep:   290      Loss:   18.72972        Acc: 100.00%\nStep:   300      Loss:   18.24953        Acc: 100.00%\nStep:   310      Loss:   17.79592        Acc: 100.00%\n...\n[True]   Prediction: 0   True y: 0\n[True]   Prediction: 0   True y: 0\n[True]   Prediction: 3   True y: 3\n[True]   Prediction: 0   True y: 0\n...\n'