In [1]:
import numpy as np

def sigmoid(z):
    return 1 / (1 + np.exp(-z))


def prediction(X, w):
    return sigmoid(np.matmul(X, w))


def classify(X, w):
    y_hat = prediction(X, w)
    labels = np.argmax(y_hat, axis=1)
    return labels.reshape(-1, 1)


def loss(X, Y, w):
    y_hat = prediction(X, w)
    first_term = Y * np.log(y_hat)
    second_term = (1 - Y) * np.log(1 - y_hat)
    return -np.sum(first_term + second_term) / X.shape[0]


def gradient(X, Y, w):
    return np.matmul(X.T, (prediction(X, w) - Y)) / X.shape[0]


def train(X_train, Y_train, X_test, Y_test, iterations, lr):
    w = np.zeros((X_train.shape[1], Y_train.shape[1]))
    for i in range(iterations):
        report(i, X_train, Y_train, X_test, Y_test, w)
        w -= gradient(X_train, Y_train, w) * lr
    report(iterations, X_train, Y_train, X_test, Y_test, w)
    return w

def report(iteration, X_train, Y_train, X_test, Y_test, w):
    matches = np.count_nonzero(classify(X_test, w) == Y_test)
    n_test_examples = Y_test.shape[0]
    matches = matches * 100.0 / n_test_examples
    training_loss = loss(X_train, Y_train, w)
    print("%d - Loss: %.20f, %.2f%%" % (iteration, training_loss, matches))


import mnist as data
w = train(data.X_train, data.Y_train,
          data.X_test, data.Y_test,
          iterations=200, lr=1e-5)


0 - Loss: 6.93147180559945397249, 9.80%
1 - Loss: 8.43445687508333641347, 68.04%
2 - Loss: 5.51204748892387641490, 68.10%
3 - Loss: 2.95687007359365416903, 68.62%
4 - Loss: 1.89853876570570956339, 73.75%
5 - Loss: 1.75582891552667441637, 81.99%
6 - Loss: 1.67488127292621791220, 81.25%
7 - Loss: 1.62387524342028100044, 82.89%
8 - Loss: 1.56528056897466516517, 82.69%
9 - Loss: 1.52926926510555771799, 83.61%
10 - Loss: 1.48349685001838960119, 83.55%
11 - Loss: 1.45473907235372745816, 84.30%
12 - Loss: 1.41878447814394381687, 84.27%
13 - Loss: 1.39425656696842192872, 84.84%
14 - Loss: 1.36593509106222588123, 84.96%
15 - Loss: 1.34458751883476468336, 85.34%
16 - Loss: 1.32201982320960964579, 85.40%
17 - Loss: 1.30346341841935919881, 85.81%
18 - Loss: 1.28511711376623205538, 85.86%
19 - Loss: 1.26906831515005369404, 86.18%
20 - Loss: 1.25378277537179583234, 86.22%
21 - Loss: 1.23989638166385729434, 86.55%
22 - Loss: 1.22684690399574325426, 86.51%
23 - Loss: 1.21474057573247806729, 86.74%
24 