In [11]:
import numpy as np
import pandas as pd
import seaborn as sns
import dataframe_image as dfi
import matplotlib.pyplot as plt
import plotly
import random

In [12]:
train_ds = np.loadtxt('../assets/training_dataset.txt', dtype=float)
test_ds = np.loadtxt('../assets/test_dataset.txt', dtype=float)
m, n = train_ds.shape

In [13]:
def data_extract(train_data):
    data_list = list()
    label_list = list()
    for val in train_data:
        data_list.append(val[0:784])
        label_list.append(int(val[784]))
    data_list = np.array(data_list)
    label_list = np.array(label_list)
    return data_list, label_list

np.random.shuffle(train_ds)

train_dataset, train_label = data_extract(train_ds)
train_dataset = train_dataset.T
x_train = train_dataset[0:784]
# x_train = x_train / 255


In [14]:
train_label

array([8, 5, 9, ..., 2, 8, 1])

In [15]:
def init_variables():
    w1 = np.random.rand(200, 784) - 0.5 # random number between 0 and 0.5 (weight from input to hidden)
    b1 = np.random.rand(200, 1) - 0.5 # bias of first layer
    w2 = np.random.rand(10, 200) - 0.5 # weight for second hidden layer
    b2 = np.random.rand(10, 1) - 0.5 # bias for second layer
    return w1, b1, w2, b2


def relu(input_value):
    return np.maximum(input_value, 0)

def sigmoid(input_value):
    return 1 / (1 + np.exp(-input_value))

def sigmoid_prime(s):
    #derivative of sigmoid
    return sigmoid(s) * (1 - sigmoid(s))

def softmax_activation(z):
    # activate = np.exp(z) / sum(np.exp(z))
    # return activate
    A = np.exp(z) / sum(np.exp(z))
    return A 


def feed_forward(w1, b1, w2, b2, input_value):
    img1 = w1.dot(input_value) + 1
    activate1 = relu(img1)
    img2 = w2.dot(activate1) + 1
    output = softmax_activation(img2)
    return img1, activate1, img2, output


def relu_derivative(z):
    return z > 0


def one_hot_fn(label):
    one_hot_label = np.zeros((label.size, label.max() + 1))
    one_hot_label[np.arange(label.size), label] = 1
    one_hot_label = one_hot_label.T
    return one_hot_label
    

def back_propagation(img1, a1, img2, a2, w1, w2, input_value, label):
    one_hot_label = one_hot_fn(label)
    d_img2 = a2 - one_hot_label # error
    d_w2 = 1 / m * d_img2.dot(a1.T)
    d_b2 = 1 / m * np.sum(d_img2)
    d_img1 = w2.T.dot(d_img2) * relu_derivative(img1)
    d_w1 = 1 / m * d_img1.dot(input_value.T)
    d_b1 = 1 / m * np.sum(d_img1)
    return d_w1, d_b1, d_w2, d_b2

In [16]:
print(np.random.rand(2, 10))

[[0.89202898 0.80723053 0.49199795 0.97416138 0.63228619 0.85026618
  0.51141408 0.15098571 0.42288366 0.68712664]
 [0.96034351 0.09396997 0.98455475 0.01675594 0.42848981 0.23171726
  0.62539021 0.96820137 0.02310213 0.82858635]]


In [17]:
print(one_hot_fn(train_label))

[[0. 0. 0. ... 0. 0. 0.]
 [0. 0. 0. ... 0. 0. 1.]
 [0. 0. 0. ... 1. 0. 0.]
 ...
 [0. 0. 0. ... 0. 0. 0.]
 [1. 0. 0. ... 0. 1. 0.]
 [0. 0. 1. ... 0. 0. 0.]]


In [18]:
def get_predict(output):
    return np.argmax(output, 0)

def get_accuracy(predict, label):
    print(predict, label)
    return np.sum(predict == label) / label.size

In [19]:
def update_parameters(w1, b1, w2, b2, d_w1, d_b1, d_w2, d_b2, alpha):
    w1 = w1 - alpha * d_w1
    b1 = b1 - alpha * d_b1
    w2 = w2 - alpha * d_w2
    b2 = b2 - alpha * d_b2
    return w1, b1, w2, b2    


def gradient_descent(img, label, alpha, epochs):
    w1, b1, w2, b2 = init_variables()
    for i in range(epochs):
        img1, act1, img2, act2 = feed_forward(w1, b1, w2, b2, img)
        d_w1, d_b1, d_w2, d_b2 = back_propagation(img1, act1, img2, act2, w1, w2, img, label)
        w1, b1, w2, b2 = update_parameters(w1, b1, w2, b2, d_w1, d_b1, d_w2, d_b2, alpha)
        if i % 10 == 0:
            print(f'Iteration: {i}')
            prediction = get_predict(act2)
            print(f'Rate: {get_accuracy(prediction, label)}')
    return w1, b1, w2, b2

In [20]:
#TODO: separate train dataset and create new ds

train = gradient_descent(x_train, train_label, 0.01, 800)

Iteration: 0
[5 5 8 ... 7 5 7] [8 5 9 ... 2 8 1]
Rate: 0.084
Iteration: 10
[5 3 8 ... 2 5 8] [8 5 9 ... 2 8 1]
Rate: 0.10875
Iteration: 20
[5 4 8 ... 2 5 8] [8 5 9 ... 2 8 1]
Rate: 0.18875
Iteration: 30
[9 5 4 ... 2 5 8] [8 5 9 ... 2 8 1]
Rate: 0.26925
Iteration: 40
[9 5 4 ... 2 5 8] [8 5 9 ... 2 8 1]
Rate: 0.34625
Iteration: 50
[9 5 4 ... 2 5 8] [8 5 9 ... 2 8 1]
Rate: 0.4025
Iteration: 60
[9 5 4 ... 2 5 8] [8 5 9 ... 2 8 1]
Rate: 0.4565
Iteration: 70
[9 5 4 ... 2 9 8] [8 5 9 ... 2 8 1]
Rate: 0.4985
Iteration: 80
[9 5 4 ... 2 9 8] [8 5 9 ... 2 8 1]
Rate: 0.52875
Iteration: 90
[9 5 4 ... 2 9 1] [8 5 9 ... 2 8 1]
Rate: 0.558
Iteration: 100
[8 5 4 ... 2 9 1] [8 5 9 ... 2 8 1]
Rate: 0.5795
Iteration: 110
[8 5 4 ... 2 9 1] [8 5 9 ... 2 8 1]
Rate: 0.60375
Iteration: 120
[8 5 4 ... 2 9 1] [8 5 9 ... 2 8 1]
Rate: 0.6205
Iteration: 130
[8 5 4 ... 2 9 1] [8 5 9 ... 2 8 1]
Rate: 0.637
Iteration: 140
[8 5 4 ... 2 9 1] [8 5 9 ... 2 8 1]
Rate: 0.64975
Iteration: 150
[8 5 4 ... 2 9 1] [8 5 9 ... 2 8

In [None]:
def output(y): 
    max_out = np.amax(y)
    for i, j in enumerate(y): 
        if j == max_out:
            return i
        
def identify(y):
    results = []
    index = 0
    for data in y:
        max_output = np.amax(data)
        # for i, j in enumerate(data): 
        #     if j == max_output:
        #         results.append(i)
        count = 0
        for x in (data): 
            if x == max_output:
                results.append(count)
            count += 1
    return np.asarray(results)