In [1]:
from keras.datasets import mnist

Using TensorFlow backend.


In [2]:
# the data, shuffled and split between train and test sets
(X_train, y_train), (X_test, y_test) = mnist.load_data()

In [3]:
# Reshape each 28x28 image -> 784 dim. vector
X_train = X_train.reshape(60000, 784)
X_test = X_test.reshape(10000, 784)
X_train = X_train.astype('float32')
X_test = X_test.astype('float32')

In [4]:
# Normalization
X_train /= 255
X_test /= 255
print(X_train.shape[0], 'train samples')
print(X_test.shape[0], 'test samples')

60000 train samples
10000 test samples


In [5]:
# Display some images
import matplotlib as mpl
mpl.use('TKAgg')
import matplotlib.pyplot as plt

plt.figure(figsize=(7.195, 3.841), dpi=100)
for i in range(200):
    plt.subplot(10,20,i+1)
    plt.imshow(X_train[i,:].reshape([28,28]), cmap='gray')
    plt.axis('off')
plt.show()

In [6]:
X_train[0,:].shape

(784,)

In [7]:
# Logistic regression implementation

In [8]:
from keras.utils import np_utils
K=10
# convert class vectors to binary class matrices
Y_train = np_utils.to_categorical(y_train, K)
Y_test = np_utils.to_categorical(y_test, K)

In [70]:
def softmax(X):
    # Input matrix X of size Nbxd - Output matrix of same size
    E = np.exp(X)
    return (E.T / np.sum(E,axis=1)).T

In [115]:
def forward(X_in, W, b):
    #batch_size = X_in.shape[0]
    #K = X_in.shape[1]
    #print(b.shape)
    #print(np.matmul(X_in,W).shape)
    S_act = np.matmul(X_in,W) + b
    
    y_out = softmax(S_act)
    return y_out, S_act

In [116]:
def backward(X_out, y_in, y_est):
    batch_size = X_out.shape[0]
    #print(y_est.shape)
    #print(y_in.shape)
    gradW = (1/batch_size)*np.matmul(X_out.T, y_est - y_in)
    gradb = (1/batch_size)*np.sum((y_est - y_in), axis=0)
    return gradW, gradb

In [117]:
def update_params(W, b, gradW, gradb, learning_rate):
    W = W - learning_rate*gradW
    b = b - learning_rate*gradb
    return W, b

In [118]:
import numpy as np
N = X_train.shape[0]
d = X_train.shape[1]
W = np.zeros((d,K))
b = np.zeros((1,K))
numEp = 20 # Number of epochs for gradient descent
eta = 1e-1 # Learning rate
batch_size = 100
nb_batches = int(float(N) / batch_size)
gradW = np.zeros((d,K))
gradb = np.zeros((1,K))

for epoch in range(numEp):
    for ex in range(nb_batches):
        X_tmp = X_train[ex*batch_size:(ex+1)*batch_size , :]
        y_tmp = Y_train[ex*batch_size:(ex+1)*batch_size]
        # FORWARD PASS : compute prediction with current params for examples in batch        
        y_est, S_act  = forward(X_tmp, W, b)  
    
        # BACKWARD PASS :
        # 1) compute gradients for W and b
        gradW, gradb = backward(X_tmp, y_tmp, y_est)
        # 2) update W and b parameters with gradient descent
        W,b = update_params(W, b, gradW, gradb, eta)

In [119]:
def accuracy(W, b, images, labels):
    pred, ttt = forward(images, W,b )
    return np.where( pred.argmax(axis=1) != labels.argmax(axis=1) , 0.,1.).mean()*100.0

In [120]:
accuracy(W, b, X_test, Y_test)

92.24