In [1]:
import numpy as np
import tensorflow as tf
import time

In [2]:
# MNIST dataset parameters
num_classes = 10 # 0 to 9 digits
num_features = 784 # 28*28

# training parameters
learning_rate = 0.0001
training_steps = 3000
batch_size = 256
display_step = 50

In [3]:
from tensorflow.keras.datasets import mnist

(x_train, y_train), (x_test, y_test) = mnist.load_data()

Downloading data from https://storage.googleapis.com/tensorflow/tf-keras-datasets/mnist.npz


In [4]:
print(x_train.shape)
print(y_train.shape)

(60000, 28, 28)
(60000,)


In [5]:
# convert to float32
x_train, x_test = np.array(x_train, np.float32), np.array(x_test, np.float32)

# Flatten images to 1-D vector of 784 features
x_train, x_test = x_train.reshape([-1, num_features]), x_test.reshape([-1, num_features])

# Normalize images value from [0,255] to [0,1]
x_train, x_test = x_train / 255., x_test / 255.

In [6]:
print(x_train.shape)
print(x_test.shape)

(60000, 784)
(10000, 784)


In [7]:
train_data = tf.data.Dataset.from_tensor_slices((x_train, y_train))
train_data = train_data.repeat().shuffle(5000).batch(batch_size).prefetch(1)

In [9]:
# y_pred 0~1 , y_true 0~1
def accuracy(y_pred, y_true):
    correct_prediction = np.equal(np.argmax(y_pred, axis=1), y_true.astype(np.int64))
    return np.mean(correct_prediction.astype(np.float32))

In [10]:
def regression(x,b,b0):
    xx = np.exp(np.dot(x, b) + b0)
    z = np.expand_dims(np.sum(xx, axis=1), axis=-1)
    return xx/z

In [19]:
# multiclass logistic regression with batch vectorization

start = time.time()

b = np.random.uniform(-1, 1, num_features*num_classes).reshape((num_features, num_classes))
b0 = np.random.uniform(-1, 1, num_classes)

for step, (batch_x, batch_y) in enumerate(train_data.take(training_steps), 1):
    yy = tf.one_hot(batch_y, depth=num_classes).numpy()
    a = regression(batch_x, b,  b0)
    
    db = np.sum(np.matmul(np.expand_dims(batch_x, axis=-1), np.expand_dims(yy-a, axis=-2)), axis=0) - np.sign(b)
    db0 = np.sum(yy - a, axis=0)
    
    b += learning_rate * db
    b0 += learning_rate * db0
    
    if step % display_step == 0:
        pred = regression(batch_x, b, b0)
        acc = accuracy(pred, batch_y.numpy())
        print("step : %i, accuracy : %f" % (step, acc*100))
    
print('execution time: {} sec'.format(time.time() - start))

step : 50, accuracy : 18.359375
step : 100, accuracy : 30.859375
step : 150, accuracy : 34.765625
step : 200, accuracy : 38.671875
step : 250, accuracy : 49.609375
step : 300, accuracy : 54.687500
step : 350, accuracy : 58.984375
step : 400, accuracy : 53.906250
step : 450, accuracy : 60.937500
step : 500, accuracy : 64.453125
step : 550, accuracy : 67.187500
step : 600, accuracy : 64.453125
step : 650, accuracy : 67.187500
step : 700, accuracy : 70.703125
step : 750, accuracy : 66.796875
step : 800, accuracy : 71.484375
step : 850, accuracy : 66.796875
step : 900, accuracy : 75.390625
step : 950, accuracy : 77.343750
step : 1000, accuracy : 71.484375
step : 1050, accuracy : 74.609375
step : 1100, accuracy : 73.828125
step : 1150, accuracy : 80.078125
step : 1200, accuracy : 78.515625
step : 1250, accuracy : 73.828125
step : 1300, accuracy : 81.250000
step : 1350, accuracy : 75.000000
step : 1400, accuracy : 79.296875
step : 1450, accuracy : 76.562500
step : 1500, accuracy : 86.328125


In [20]:
pred = regression(x_test, b, b0)
print("Test Accuracy: %f" % accuracy(pred, y_test))

Test Accuracy: 0.854900
