In [1]:
%matplotlib inline
import matplotlib.pyplot as plt
import numpy as np
import warnings#ignore warning messages
warnings.filterwarnings("ignore")

file_data   = "mnist.csv"
handle_file = open(file_data, "r")
data        = handle_file.readlines() #list[10,000]
handle_file.close()

size_row    = 28    # height of the image
size_col    = 28    # width of the image

num_image   = len(data)
count       = 0     # count for the number of images

num_training = 6000
num_testing = 4000

# normalize the values of the input data to be [0, 1]
def normalize(data):
    
    data_normalized = (data - min(data)) / (max(data) - min(data))
    
    return(data_normalized)

# make a matrix each column of which represents an images in a vector form
list_image    = np.empty((size_row * size_col, num_image), dtype=float)    #784*10,000
list_label    = np.empty(num_image, dtype=int) #list[10,000]

for line in data:

    line_data   = line.split(',') #list[1+784]
    label       = line_data[0]
    im_vector   = np.asfarray(line_data[1:]) #list[784]
    im_vector   = normalize(im_vector)

    list_label[count]       = label
    list_image[:, count]    = im_vector

    count += 1
    
count = 0
label_vectors = np.empty((10, num_image), dtype=float)
zero_vector = np.zeros(10, dtype=float)

for l in list_label:
    la_vector    = zero_vector
    la_vector[l] = 1
    
    label_vectors[:, count] = la_vector
    count += 1
    
    
image_training = list_image[:, 0:num_training]         #784*6,000
image_testing  = list_image[:, num_training:num_image] #784*4,000

list_label_training = list_label[0:num_training]         #6,000
list_label_testing  = list_label[num_training:num_image] #4,000

label_training = label_vectors[:, 0:num_training]         #10*6,000
label_testing  = label_vectors[:, num_training:num_image] #10*4,000

bias_training = np.ones((1, num_training), dtype=int)
bias_testing  = np.ones((1, num_testing), dtype=int)

image_training = np.concatenate((bias_training, image_training), axis=0)#785*6,000
image_testing  = np.concatenate((bias_testing, image_testing), axis=0)#785*4,000

In [2]:
def sigmoid(matrix):
    return 1 / (1 + np.exp(-matrix))

def loss(h, label, m):
    return -(np.sum((label*np.log(h))+(1-label)*np.log(1-h)))/m

def forward_prop(u, v, w, x, m):
    #y  = prop_y(u, x, v)
    bias = np.ones((1,m))
    
    y_ = u.dot(x)
    y  = sigmoid(y_)
    y  = np.concatenate((bias, y), axis=0)

    z_ = v.dot(y)
    z  = sigmoid(z_)
    z  = np.concatenate((bias, z), axis=0)
    
    h_ = w.dot(z)
    h  = sigmoid(h_)
    
    return y, z, h

In [3]:
def delta_4(h, label):
    return h-label

def delta_3(d4, w, z):
    diff1 = (w.T).dot(d4)
    diff2 = z * (1 - z)
    return diff1*diff2

def delta_2(d3, v, y):
    bias = np.ones((1, 197))
    v = np.concatenate((bias, v), axis=0)
    diff1 = (v.T).dot(d3)
    diff2 = y * (1 - y)
    return diff1*diff2

def partial_w(d4, z, m):
    return (d4.dot(z.T))/m
    
def partial_v(d3, y, m):
    return (d3.dot(y.T))/m

def partial_u(d2, x, m):
    return (d2.dot(x.T))/m

In [4]:
def predict(h, m):
    
    pred_list = np.empty(m, dtype=int)
    for i in range(m):
        max = 0
        idx = 0
        for j in range(10):
            if max < h[j, i]:
                max = h[j, i]
                idx = j
        pred_list[i] = idx
        
    return pred_list

def getAccuracy(h, label, m):
    
    correct = 0
    count = 0
    pred = predict(h, m)
    #for i in range(m):
    for item in pred:
        comp = label[count]
        if item == comp:
            correct += 1
        count += 1
    accuracy = correct / m
    accuracy_list.append(accuracy)
    
    return accuracy

In [5]:
#assign random values to weights(u, v, w)
u = np.empty((196, 785))
for i in range(785):
    u[:, i] = np.random.normal(0.0, 1, size = 196)
    
v = np.empty((49, 197))
for i in range(197):
    v[:, i] = np.random.normal(0.0, 1, size = 49)
    
w = np.empty((10, 50))
for i in range(50):
    w[:, i] = np.random.normal(0.0, 1, size = 10)
    
learning_rate = 1
t         = 0
iteration = list()
temp_loss = 0 
temp_loss_test = 0
train_loss_list = list()
test_loss_list  = list()

accuracy_list = list()

In [6]:
while True:
    (y, z, h) = forward_prop(u, v, w, image_training, num_training)
    if t>1 and abs(temp_loss - loss(h, label_training, num_training)) <= 0.000001:
        fin_u = u#need?
        fin_v = v
        fin_w = w
        break
    temp_loss = loss(h, label_training, num_training)
    train_loss_list.append(temp_loss)
    iteration.append(t)
    
    (y_test, z_test, h_test) = forward_prop(u, v, w, image_testing, num_testing)
    temp_loss_test = loss(h, label_testing, num_testing)
    test_loss_list.append(t)
    
    d4 = delta_4(h, label_training)
    d3 = delta_3(d4, w, z)
    d2 = delta_2(d3, v, y)
    
    p_w = partial_w(d4, z, num_training)
    temp_p_v = partial_v(d3, y, num_training)
    temp_p_u = partial_u(d2, image_training, num_training)
    
    p_v = temp_p_v[1:50, :]
    p_u = temp_p_u[1:197, :]
        
    w = w - learning_rate * p_w
    v = v - learning_rate * p_v
    u = u - learning_rate * p_u
    
    fin_accuracy = getAccuracy(h, list_label_training, num_training)
    
    print(t, temp_loss, '\n')
    t += 1

0 18.689181960194613 

1 0.4151759451836027 

2 0.2827692890826933 

3 0.271073854201593 

4 0.2634036745169394 

5 0.25756413524660793 

6 0.25273277911664394 

7 0.24852395054920154 

8 0.24473094835903803 

9 0.241232358968753 

10 0.23795183448112667 

11 0.23483872261111083 

12 0.23185791970692296 

13 0.22898419930103947 

14 0.2261988726749479 

15 0.22348773530819846 

16 0.22083975590606356 

17 0.21824621117132872 

18 0.21570009736871198 

19 0.21319571903770979 

20 0.21072839449363417 

21 0.20829424064123322 

22 0.20589001335274712 

23 0.20351298807848311 

24 0.20116087061538554 

25 0.1988317311782443 

26 0.19652395711175594 

27 0.19423622088209258 

28 0.19196746089904335 

29 0.18971687339856055 

30 0.18748391408094806 

31 0.18526830858319707 

32 0.18307007114894622 

33 0.18088953112335404 

34 0.17872736694401298 

35 0.17658464734593557 

36 0.17446287926801737 

37 0.1723640615829853 

38 0.17029074312044368 

39 0.1682460824514199 

40 0.16623390563075177

KeyboardInterrupt: 

In [None]:
plt.plot(iteration1, train_loss_list, c = 'blue')
plt.plot(iteration2, test_loss_list, c = 'red')
plt.legend(['training error', 'testing error'])
plt.xlabel('t : iteration')
plt.ylabel('cost value')
plt.title('1. The loss curve')
plt.grid()
plt.show()