In [1]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt

data = pd.read_csv("toy_dataset.csv")

print(data.head())

   label  left-top  right-top  left-bottom  right-bottom
0      0        50         60           55            65
1      1       120        130          125           135
2      2       200        210          205           215


In [2]:
data = np.array(data)
print(data.shape)
m, n = data.shape

(3, 5)


In [3]:
train_dataset = data.T
train_dataset

array([[  0,   1,   2],
       [ 50, 120, 200],
       [ 60, 130, 210],
       [ 55, 125, 205],
       [ 65, 135, 215]])

In [4]:
Y = train_dataset[0]
X = train_dataset[1:]
X = X / 255.0

In [5]:
X

array([[0.19607843, 0.47058824, 0.78431373],
       [0.23529412, 0.50980392, 0.82352941],
       [0.21568627, 0.49019608, 0.80392157],
       [0.25490196, 0.52941176, 0.84313725]])

In [6]:
X.shape

(4, 3)

In [7]:
Y

array([0, 1, 2])

In [8]:
def init_params():
    w1 = np.random.rand(3, 4) - 0.5
    b1 = np.random.rand(3, 1) - 0.5
    w2 = np.random.rand(3, 3) - 0.5
    b2 = np.random.rand(3, 1) - 0.5
    return w1, b1, w2, b2

In [9]:
def ReLU(Z):
    return np.maximum(Z, 0)


def softmax(Z):
    A = np.exp(Z) / sum(np.exp(Z))
    return A

In [10]:
def forward_prop(w1, b1, w2, b2, X):
    z1 = w1.dot(X)+b1
    a1 = ReLU(z1)
    z2 = w2.dot(a1)+b2
    a2 = softmax(z2)
    return z1, a1, z2, a2

In [11]:
def one_hot(Y):
    one_hot_Y = np.zeros((Y.size, Y.max() + 1))
    one_hot_Y[np.arange(Y.size), Y] = 1
    one_hot_Y = one_hot_Y.T
    return one_hot_Y


def deriv_ReLU(Z):
    return Z > 0

In [12]:
# z1, a1, z2, a2: M, A, Z, output
def back_prop(z1, a1, z2, a2, w2, Y, X):
    OneHot_Y = one_hot(Y)
    dZ2 = a2-OneHot_Y
    dW2 = 1/m * dZ2.dot(a1.T)
    db2 = 1/m * np.sum(dZ2)
    dZ1 = w2.T.dot(dZ2)*deriv_ReLU(z1)
    dW1 = 1/m * dZ1.dot(X.T)
    db1 = 1/m * np.sum(dZ1)
    return dW1, db1, dW2, db2

In [13]:
def update_params(W1, b1, W2, b2, dW1, db1, dW2, db2, alpha):
    W1 = W1-alpha*dW1
    b1 = b1-alpha*db1
    W2 = W2-alpha*dW2
    b2 = b2-alpha*db2
    return W1, b1, W2, b2

In [14]:
def get_predictions(a2):
    return np.argmax(a2, 0)


def get_accuracy(predictions, Y):
    print(predictions, Y)
    return np.sum(predictions == Y)/Y.size

In [15]:
def gradient_descent(X, Y, iterations, alpha):
    w1, b1, w2, b2 = init_params()
    for i in range(iterations):
        z1, a1, z2, a2 = forward_prop(w1, b1, w2, b2, X)
        dW1, db1, dW2, db2 = back_prop(z1, a1, z2, a2, w2, Y, X)
        w1, b1, w2, b2 = update_params(
            w1, b1, w2, b2, dW1, db1, dW2, db2, alpha)
        if i % 10 == 0:
            print("iterations: ", i)
            print("accuracy: ", get_accuracy(get_predictions(a2), Y))
    return w1, b1, w2, b2

In [16]:
w1, b1, w2, b2 = gradient_descent(X, Y, 500, 0.1)

iterations:  0
[1 1 1] [0 1 2]
accuracy:  0.3333333333333333
iterations:  10
[1 1 1] [0 1 2]
accuracy:  0.3333333333333333
iterations:  20
[1 1 1] [0 1 2]
accuracy:  0.3333333333333333
iterations:  30
[1 1 1] [0 1 2]
accuracy:  0.3333333333333333
iterations:  40
[1 1 1] [0 1 2]
accuracy:  0.3333333333333333
iterations:  50
[1 1 1] [0 1 2]
accuracy:  0.3333333333333333
iterations:  60
[1 1 2] [0 1 2]
accuracy:  0.6666666666666666
iterations:  70
[1 2 2] [0 1 2]
accuracy:  0.3333333333333333
iterations:  80
[1 2 2] [0 1 2]
accuracy:  0.3333333333333333
iterations:  90
[1 2 2] [0 1 2]
accuracy:  0.3333333333333333
iterations:  100
[1 2 2] [0 1 2]
accuracy:  0.3333333333333333
iterations:  110
[1 2 2] [0 1 2]
accuracy:  0.3333333333333333
iterations:  120
[1 2 2] [0 1 2]
accuracy:  0.3333333333333333
iterations:  130
[1 2 2] [0 1 2]
accuracy:  0.3333333333333333
iterations:  140
[1 2 2] [0 1 2]
accuracy:  0.3333333333333333
iterations:  150
[1 2 2] [0 1 2]
accuracy:  0.3333333333333333
ite

In [17]:
w1, b1, w2, b2

(array([[ 1.25959526,  0.75720921,  1.2168896 ,  1.1209563 ],
        [-0.88903485, -0.68473654, -0.53717486, -0.65051612],
        [-0.48134593, -0.33647967, -0.45232916, -0.79040967]]),
 array([[2.65492485],
        [2.23963294],
        [2.23380643]]),
 array([[-0.83451648,  2.05188063,  0.96329523],
        [ 0.03126201, -0.01478953,  0.0082976 ],
        [ 0.57393393, -1.78293824, -1.23173213]]),
 array([[ 0.18465453],
        [ 0.4927511 ],
        [-0.4247987 ]]))