In [3]:
import tensorflow as tf
import numpy as np

In [126]:
from tensorflow.keras.datasets import mnist

In [127]:
(x_train, y_train), (x_test, y_test) = mnist.load_data()

In [128]:
x_train.shape

(60000, 28, 28)

In [129]:
x_train = np.array(x_train, np.float32)
x_test = np.array(x_test, np.float32)

x_train = np.reshape(x_train, [-1, 28*28])
x_test = np.reshape(x_test, [-1, 28*28])

x_train = x_train/255.
x_test = x_test/255.

In [148]:
data = tf.data.Dataset.from_tensor_slices((x_train, y_train)).repeat().shuffle(100).batch(256)

In [131]:
w = tf.Variable(tf.ones([28*28, 10]), name='w')
b = tf.Variable(tf.ones([10]), name='b')

# func
def logistic_regression(x):
    return tf.nn.softmax(tf.matmul(x, w) + b)

# loss
def cross_entroy(y_pred, y):
    y_onehot = tf.one_hot(y, 10)
    y_pred = tf.clip_by_value(y_pred, 1e-9, 1)
    return tf.reduce_mean(-tf.reduce_sum(y_onehot * tf.math.log(y_pred), 1))

# metrics
def acc(y_pred, y):
    correct = tf.equal(tf.argmax(y_pred, axis=1), tf.cast(y, tf.int64))
    avg = tf.reduce_mean(tf.cast(correct, tf.float32))
    return avg

# optimizer
optimizer = tf.optimizers.SGD(0.01)

In [151]:
# Weight of shape [784, 10], the 28*28 image features, and total number of classes.
w = tf.Variable(tf.ones([28*28, 10]), name="weight")
# Bias of shape [10], the total number of classes.
b = tf.Variable(tf.zeros([10]), name="bias")

# Logistic regression (Wx + b).
def logistic_regression(x):
    # Apply softmax to normalize the logits to a probability distribution.
    return tf.nn.softmax(tf.matmul(x, w) + b)

# Cross-Entropy loss function.
def cross_entropy(y_pred, y_true):
    # Encode label to a one hot vector.
    y_true = tf.one_hot(y_true, depth=10)
    # Clip prediction values to avoid log(0) error.
    y_pred = tf.clip_by_value(y_pred, 1e-9, 1.)
    # Compute cross-entropy.
    return tf.reduce_mean(-tf.reduce_sum(y_true * tf.math.log(y_pred),1))

# Accuracy metric.
def accuracy(y_pred, y_true):
    # Predicted class is the index of highest score in prediction vector (i.e. argmax).
    correct_prediction = tf.equal(tf.argmax(y_pred, 1), tf.cast(y_true, tf.int64))
    return tf.reduce_mean(tf.cast(correct_prediction, tf.float32))

# Stochastic gradient descent optimizer.
optimizer = tf.optimizers.SGD(0.01)

In [152]:
def train_step(x, y):
    with tf.GradientTape() as tape:
        y_pred = logistic_regression(x)
        loss = cross_entroy(y_pred, y)
    grad = tape.gradient(loss, [w, b])
    optimizer.apply_gradients(zip(grad, [w,b]))
    
    return loss

In [153]:
# Run training for the given number of steps.
display_step=50
training_steps=300
for step, (batch_x, batch_y) in enumerate(data.take(training_steps), 1):
    # Run the optimization to update W and b values.
    train_step(batch_x, batch_y)
    
    if step % display_step == 0:
        pred = logistic_regression(batch_x)
        loss = cross_entropy(pred, batch_y)
        acc = accuracy(pred, batch_y)
        print("step: %i, loss: %f, accuracy: %f" % (step, loss, acc))

step: 50, loss: 1.872224, accuracy: 0.703125
step: 100, loss: 1.455467, accuracy: 0.824219
step: 150, loss: 1.376462, accuracy: 0.796875
step: 200, loss: 1.131896, accuracy: 0.863281
step: 250, loss: 1.057566, accuracy: 0.839844
step: 300, loss: 1.004309, accuracy: 0.835938


In [134]:
for i in range(4):
    for s, (x_batch, y_batch) in enumerate(data):
        loss = train_step(x_batch, y_batch)
        
        if s % 50 == 0:
            print('s',s)
            print('loss', loss)
            y_pred=logistic_regression(x_batch)
            l = cross_entroy(y_pred, y_batch)
            accu = acc(y_pred, y_batch)
            print('acc:', accu)

s 0
loss tf.Tensor(2.2265162, shape=(), dtype=float32)
acc: tf.Tensor(0.73046875, shape=(), dtype=float32)
s 50
loss tf.Tensor(1.8123469, shape=(), dtype=float32)
acc: tf.Tensor(0.73828125, shape=(), dtype=float32)
s 100
loss tf.Tensor(1.5208645, shape=(), dtype=float32)
acc: tf.Tensor(0.81640625, shape=(), dtype=float32)
s 150
loss tf.Tensor(1.3529414, shape=(), dtype=float32)
acc: tf.Tensor(0.796875, shape=(), dtype=float32)
s 200
loss tf.Tensor(1.1718863, shape=(), dtype=float32)
acc: tf.Tensor(0.80078125, shape=(), dtype=float32)
s 0
loss tf.Tensor(1.0494598, shape=(), dtype=float32)
acc: tf.Tensor(0.828125, shape=(), dtype=float32)
s 50
loss tf.Tensor(1.0671384, shape=(), dtype=float32)
acc: tf.Tensor(0.75390625, shape=(), dtype=float32)
s 100
loss tf.Tensor(0.8809053, shape=(), dtype=float32)
acc: tf.Tensor(0.85546875, shape=(), dtype=float32)
s 150
loss tf.Tensor(0.92793185, shape=(), dtype=float32)
acc: tf.Tensor(0.8125, shape=(), dtype=float32)
s 200
loss tf.Tensor(0.81799257,