In [45]:
import numpy as np
import pickle
import math

In [46]:
def softmax(z):
    t = np.exp(z)
    d = np.sum(t, axis = 0)
    return np.divide(t, d)

In [47]:
def initialize():
    w = np.random.rand(28 * 28, 26) * math.sqrt((2/784))
    b = np.zeros([26, 1])
    return w, b

In [48]:
def propagation(x, y, w, b):
    m = x.shape[1]
    
    z = np.dot(w.T, x) + b
    a = softmax(z)
    
    cost = -(np.sum(y * np.log(a) + (1 - y) * np.log(1 - a)))/m
    
    dz = a - y
    dw = np.dot(x, dz.T)/m
    db = np.sum(dz, axis = 1, keepdims = True)/m
    
    grads = {"dw": dw,
            "db": db}
    
    return a, cost, grads

In [49]:
def optimize(w, b, grads, learning_rate):
    
    w = w - (learning_rate * grads["dw"])
    b = b - (learning_rate * grads["db"])
    
    return w, b

In [50]:
def predict(x, y, w, b):
    
    a, _, _ = propagation(x, y, w, b)
    
    acc = ((a * y)/ y.shape[1]).sum() * 100
    
    return acc

In [51]:
def model(trainx, trainy, testx, testy, num_iteration, learning_rate = 0.05, print_cost = True):
    
    w, b = initialize()
    costs = []
    
    for i in range(0, num_iteration+1):
        
        a, cost, grads = propagation(trainx, trainy, w, b)
        w, b = optimize(w, b, grads, learning_rate)
        
        if(i%100==0 and print_cost):
            costs.append(cost)
            print("cost at iteration ", i, ": ", cost)
        
    train_acc = predict(trainx, trainy, w, b)
    test_acc = predict(testx, testy, w, b)
    
    print("training accuracy: ", train_acc)
    print("test accuracy: ", test_acc)
    
    performance = {"w": w,
                  "b": b,
                  "costs": costs,
                  "train_acc": train_acc,
                  "test_acc": test_acc}
    return performance

In [52]:
with open("fulalpha.pickle", "rb") as pickle_in:
    data = pickle.load(pickle_in)

In [53]:
trainx = data["trainx"]/255.0
trainy = data["trainy_c"]
testx = data["testx"]/255.0
testy = data["testy_c"]

In [54]:
permutation_train = np.random.permutation(trainx.shape[1])
permutation_test = np.random.permutation(testx.shape[1])

trainx = trainx[:, permutation_train]
trainy = trainy[:, permutation_train]
testx = testx[:, permutation_test]
testy = testy[:, permutation_test]

In [55]:
performance = model(trainx, trainy, testx, testy, 2000)

cost at iteration  0 :  4.247357853719499
cost at iteration  100 :  2.7091912482378246
cost at iteration  200 :  2.1288249678517612
cost at iteration  300 :  1.8464991827466304
cost at iteration  400 :  1.6784773398215396
cost at iteration  500 :  1.5647194089744378
cost at iteration  600 :  1.4811025059362382
cost at iteration  700 :  1.4161725091048671
cost at iteration  800 :  1.363769742427656
cost at iteration  900 :  1.320260179639836
cost at iteration  1000 :  1.2833435085499125
cost at iteration  1100 :  1.2514817211360536
cost at iteration  1200 :  1.2236007998674878
cost at iteration  1300 :  1.1989238980707537
cost at iteration  1400 :  1.1768726396892655
cost at iteration  1500 :  1.1570059108082247
cost at iteration  1600 :  1.1389803641499687
cost at iteration  1700 :  1.1225240562302035
cost at iteration  1800 :  1.1074183301055502
cost at iteration  1900 :  1.0934850461379382
cost at iteration  2000 :  1.080577381357523
training accuracy:  64.99322619202255
test accurac