In [1]:
%matplotlib inline
import matplotlib.pyplot as plt
import numpy as np
import warnings#ignore warning messages
warnings.filterwarnings("ignore")

file_data   = "mnist.csv"
handle_file = open(file_data, "r")
data        = handle_file.readlines() #list[10,000]
handle_file.close()

size_row    = 28    # height of the image
size_col    = 28    # width of the image

num_image   = len(data)
count       = 0     # count for the number of images

num_training = 6000
num_testing = 4000

# normalize the values of the input data to be [0, 1]
def normalize(data):
    
    data_normalized = (data - min(data)) / (max(data) - min(data))
    
    return(data_normalized)

# make a matrix each column of which represents an images in a vector form
list_image    = np.empty((size_row * size_col, num_image), dtype=float)    #784*10,000
list_label    = np.empty(num_image, dtype=int) #list[10,000]

for line in data:

    line_data   = line.split(',') #list[1+784]
    label       = line_data[0]
    im_vector   = np.asfarray(line_data[1:]) #list[784]
    im_vector   = normalize(im_vector)

    list_label[count]       = label
    list_image[:, count]    = im_vector

    count += 1
    
count = 0
label_vectors = np.empty((10, num_image), dtype=float)
zero_vector = np.zeros(10, dtype=float)

for l in list_label:
    la_vector    = zero_vector
    la_vector[l] = 1
    
    label_vectors[:, count] = la_vector
    count += 1
    
    
image_training = list_image[:, 0:num_training]         #784*6,000
image_testing  = list_image[:, num_training:num_image] #784*4,000

list_label_training = list_label[0:num_training]         #6,000
list_label_testing  = list_label[num_training:num_image] #4,000

label_training = label_vectors[:, 0:num_training]         #10*6,000
label_testing  = label_vectors[:, num_training:num_image] #10*4,000

bias_training = np.ones((1, num_training), dtype=int)
bias_testing  = np.ones((1, num_testing), dtype=int)

image_training = np.concatenate((bias_training, image_training), axis=0)#785*6,000
image_testing  = np.concatenate((bias_testing, image_testing), axis=0)#785*4,000

In [2]:
#assign random values to weights(u, v, w)
u = np.empty((196, 785))
for i in range(785):
    u[:, i] = np.random.normal(0.0, 1, size = 196)
    
v = np.empty((49, 197))
for i in range(197):
    v[:, i] = np.random.normal(0.0, 1, size = 49)
    
w = np.empty((10, 50))
for i in range(50):
    w[:, i] = np.random.normal(0.0, 1, size = 10)

In [3]:
def sigmoid(matrix):
    return 1 / (1 + np.exp(-matrix))

In [4]:


def prop_y(u, x, m):
    y_ = u.dot(x)
    y  = sigmoid(y_)
    y  = np.concatenate((bias, y), axis=0)
    return y

def forward_prop(u, v, w, x, m):
    #y  = prop_y(u, x, v)
    bias = np.ones((1,m))
    
    y_ = u.dot(x)
    y  = sigmoid(y_)
    y  = np.concatenate((bias, y), axis=0)

    z_ = v.dot(y)
    z  = sigmoid(z_)
    z  = np.concatenate((bias, z), axis=0)
    
    h_ = w.dot(z)
    h  = sigmoid(h_)
    
    return y, z, h

In [5]:
def loss(h, label, m):
    return -(np.sum((label*np.log(h))+(1-label)*np.log(1-h)))/m

In [6]:
def delta_4(h, label):
    return h-label

def delta_3(d4, w, z):
    diff1 = (w.T).dot(d4)
    diff2 = z * (1 - z)
    return diff1*diff2

def delta_2(d3, v, y):
    bias = np.ones((1, 197))
    v = np.concatenate((bias, v), axis=0)
    diff1 = (v.T).dot(d3)
    diff2 = y * (1 - y)
    return diff1*diff2

def partial_w(d4, z, m):
    return (d4.dot(z.T))/m
    
def partial_v(d3, y, m):
    return (d3.dot(y.T))/m

def partial_u(d2, x, m):
    return (d2.dot(x.T))/m

In [7]:
def predict(h, m):
    
    pred_list = np.empty(m, dtype=int)
    for i in range(m):
        max = 0
        idx = 0
        for j in range(10):
            if max < h[j, i]:
                max = h[j, i]
                idx = j
        pred_list[i] = idx
        
    return pred_list

def getAccuracy(h, label, m):
    
    correct = 0
    count = 0
    pred = predict(h, m)
    #for i in range(m):
    for item in pred:
        comp = label[count]
        if item == comp:
            correct += 1
        count += 1
    accuracy = correct / m
    accuracy_list.append(accuracy)
    
    return accuracy

In [8]:
learning_rate = 0.001
t         = 0
iteration = list()
temp_loss = 0 
loss_list = list()

accuracy_list = list()

In [10]:
while True:
    (y, z, h) = forward_prop(u, v, w, image_training, num_training)
    if t>1 and abs(temp_loss - loss(h, label_training, num_training)) <= 0.000001:
        fin_u = u#need?
        fin_v = v
        fin_w = w
        break
    temp_loss = loss(h, label_training, num_training)
    loss_list.append(temp_loss)
    iteration.append(t)
    
    d4 = delta_4(h, label_training)
    d3 = delta_3(d4, w, z)
    d2 = delta_2(d3, v, y)
    
    p_w = partial_w(d4, z, num_training)
    temp_p_v = partial_v(d3, y, num_training)
    temp_p_u = partial_u(d2, image_training, num_training)
    
    p_v = temp_p_v[1:50, :]
    p_u = temp_p_u[1:197, :]
        
    w = w - learning_rate * p_w
    v = v - learning_rate * p_v
    u = u - learning_rate * p_u
    
    #fin_accuracy = getAccuracy(h, list_label_training, num_training)
    
    print(t, temp_loss, '\n')
    t += 1

  
  


429 nan 

430 nan 

431 nan 

432 nan 

433 nan 

434 nan 

435 nan 

436 nan 

437 nan 

438 nan 

439 nan 

440 nan 

441 nan 

442 nan 

443 nan 

444 nan 

445 nan 

446 nan 

447 nan 

448 nan 

449 nan 

450 nan 

451 nan 

452 nan 

453 nan 

454 nan 

455 nan 

456 nan 

457 nan 

458 nan 

459 nan 

460 nan 

461 nan 

462 nan 

463 nan 

464 nan 

465 nan 

466 nan 

467 nan 

468 nan 

469 nan 

470 nan 

471 nan 

472 nan 

473 nan 

474 nan 

475 nan 

476 nan 

477 nan 

478 nan 

479 nan 

480 nan 

481 nan 

482 nan 

483 nan 

484 nan 

485 nan 

486 nan 

487 nan 

488 nan 

489 nan 

490 nan 

491 nan 

492 nan 

493 nan 

494 nan 

495 nan 

496 nan 

497 nan 

498 nan 

499 nan 

500 nan 

501 nan 

502 nan 

503 nan 

504 nan 

505 nan 

506 nan 

507 nan 

508 nan 

509 nan 

510 nan 

511 nan 

512 nan 

513 nan 

514 nan 

515 nan 

516 nan 

517 nan 

518 nan 

519 nan 

520 nan 

521 nan 

522 nan 

523 nan 

524 nan 

525 nan 

526 nan 

527 nan 

528 nan 



KeyboardInterrupt: 