In [147]:
import sklearn
import numpy as np
import matplotlib.pyplot as plt
from tqdm import tqdm
from sklearn.datasets import make_moons
from sklearn.utils import shuffle

In [148]:
n = 10016
batch_size = 32

In [149]:
X, Y = make_moons(n_samples=n, noise=0.1)
X, Y = shuffle(X, Y, random_state=72)

In [150]:
Xs = []
Ys = []
batch_num = int(np.ceil(len(X) / batch_size))

In [151]:
for i in range(batch_num):
    start = i * batch_size
    end = start + batch_size
    Xs.append(X[start:end])
    Ys.append(Y[start:end])

In [152]:
Xs = np.array(Xs).reshape(batch_num, batch_size, 2, 1)
Ys = np.array(Ys).reshape(batch_num, batch_size, 1, 1)
X = X.reshape(n, 2, 1)
Y = Y.reshape(n, 1, 1)

In [153]:
size = [2, 10, 5, 1]

In [154]:
weights = [0]
biases = [0]
for i in range(0, len(size) - 1):
    weights.append(np.random.rand(size[i + 1], size[i]))
    biases.append(np.random.rand(size[i + 1], 1))

In [155]:
weights

[0,
 array([[0.95985306, 0.90385927],
        [0.85077379, 0.04986331],
        [0.09023366, 0.33695476],
        [0.24351624, 0.0765251 ],
        [0.67719296, 0.8629407 ],
        [0.94189542, 0.49765498],
        [0.32925475, 0.70172597],
        [0.42504879, 0.75445369],
        [0.68013049, 0.62906304],
        [0.85376503, 0.39199793]]),
 array([[0.15850054, 0.63840564, 0.45108912, 0.5296283 , 0.86880553,
         0.71742102, 0.49401923, 0.93156636, 0.26121428, 0.6807936 ],
        [0.60249377, 0.40424151, 0.97798688, 0.69652276, 0.1365516 ,
         0.70445447, 0.74000987, 0.92927478, 0.80774946, 0.17789342],
        [0.36817682, 0.08179582, 0.98717719, 0.22574008, 0.89155667,
         0.43203504, 0.89252449, 0.42332459, 0.10404691, 0.5455261 ],
        [0.03421318, 0.47909279, 0.23585459, 0.68315243, 0.68658566,
         0.585004  , 0.74215408, 0.11664904, 0.99043439, 0.85984144],
        [0.96504346, 0.59046735, 0.95790349, 0.37752846, 0.25443665,
         0.76296269, 0.218256

In [156]:
def sigmoid(x):
    return 1 / (1 + np.exp(-x))

In [157]:
def L(y_hat, y):
    return 0.5 * (y_hat - y) ** 2

In [158]:
def feed_forward(xi, weights, biases):
    activations = [xi]
    z1 = weights[1] @ xi + biases[1]
    a1 = sigmoid(z1)
    activations.append(a1)
    for i in range(2, len(weights)):
        zn = weights[i] @ activations[-1] + biases[i]
        an = sigmoid(zn)
        activations.append(an)
    return activations

In [159]:
def sigmoid_derivative(y):
    return y * ( 1 - y )

In [160]:
def backprop(weights, biases, xi, y, activations):
    deltas = [0] * len(activations)
    dL_dwK = [0] * len(activations)
    dL_dbK = [0] * len(activations)

    deltas[-1] = ( activations[-1] - y ) * sigmoid_derivative(activations[-1])
    dL_dwK[-1] = deltas[-1] @ activations[-1].T
    dL_dbK[-1] = deltas[-1]

    for i in range(len(deltas)-2, 1, -1):
        deltas[i] = ( weights[i+1].T @ deltas[ i + 1 ] ) * sigmoid_derivative(activations[i])
        dL_dwK[i] = deltas[i] @ activations[ i - 1 ].T
        dL_dbK[i] = deltas[i]

    return dL_dwK, dL_dbK

In [161]:
act = feed_forward(Xs[0][2], weights, biases)
print(act)
bc = backprop(weights, biases, Xs[0][2], Ys[0][2], act)[0]
for w in bc:
    if type(w) is not int: print(w.shape)

[array([[-0.76764797],
       [ 0.8011585 ]]), array([[0.50547582],
       [0.40454156],
       [0.66700947],
       [0.49495943],
       [0.63034775],
       [0.57999124],
       [0.63861442],
       [0.58160648],
       [0.68103212],
       [0.5459224 ]]), array([[0.97859937],
       [0.98949086],
       [0.97822417],
       [0.98052354],
       [0.98297309]]), array([[0.97896418]])]
(5, 10)
(1, 1)


In [162]:
epochs = 20
lr = 0.01
loss = []
pm, pv, t = 0, 0, 1
beta_1 = 0.9
vt1 = [0]*len(weights)
gamma = 0.7

In [163]:
vt1

[0, 0, 0, 0]

In [164]:
def adam(weight_bias_derivative, weight, learning_rate, previous_moment, previous_velocity, t, beta_1=0.8, beta_2=0.95, epsilon=10**-8):
    gt = weight_bias_derivative
    mt = beta_1 * previous_moment + (1 - beta_1) * gt
    vt = beta_2 * previous_velocity + (1 - beta_2) * gt ** 2
    mt_hat = mt/(1 - beta_1**t)
    vt_hat = vt/(1 - beta_2**t)
    ret = weight - (learning_rate*mt_hat)/(np.sqrt(vt_hat)+epsilon)
    return ret, np.linalg.norm(mt), np.linalg.norm(vt), t+1

In [165]:
for i in tqdm(range(epochs)):
    epochloss = 0
    for j in range(batch_num):
        for i, xi in enumerate(Xs[j]):
            activations = feed_forward(xi, weights, biases)
            epochloss += L(activations[-1], Ys[j,i])

            dL_dw, dL_db = backprop(weights, biases, xi, Ys[j,i], activations)

            for i2, w in enumerate(weights):
                w -= lr * dL_dw[i]
                #vt1[i2] = gamma * vt1[i2] + lr * dL_dw[i2]
                #weights[i2] -= vt1[i2]
                #w, pm, pv, t = adam(dL_dw[i], w, lr, pm, pv, t)
            for i2, b in enumerate(biases):
                b -= lr * dL_db[i2]

    loss.append(epochloss / n)

  0%|          | 0/20 [00:00<?, ?it/s]


ValueError: operands could not be broadcast together with shapes (10,2) (5,10) (10,2) 

In [None]:
loss

In [None]:
xcoord = np.array([n for n in range(epochs)]).reshape(20, 1)
ycoord = np.array(loss).reshape(20, 1)
plt.plot(xcoord, ycoord)
#plt.xscale('log')
plt.yscale('log')
plt.savefig('loss1.png')

In [None]:
def evaluate_visually(x, w, b):
    rb = []
    for xi in x:
        predictions = feed_forward(xi, w, b)
        if predictions[-1] <= 0.5: rb.append('red')
        else: rb.append('blue')
    return rb

In [None]:
rb = evaluate_visually(X.reshape(n, 2, 1), weights, biases)

In [None]:
plt.scatter(Xs[:,:,0,:], Xs[:,:,1,:], c=rb)
plt.savefig('prediction1.png')

In [None]:
def evaluate_errors(x, w, b, y):
    rb = []
    for i, xi in enumerate(x):
        p = 0
        predictions = feed_forward(xi, w, b)
        if predictions[-1] <= 0.5: p = 0
        else: p = 1
        if p == Y[i, 0]:
            rb.append('gray')
        else: rb.append('red')
    return rb

In [None]:
br = evaluate_errors(X, weights, biases, Y)

In [None]:
plt.scatter(X[:,0,:], X[:,1,:], c=br)

In [None]:
inc = 0
for c in br:
    if c == 'red': inc += 1
print(f'Training Accuracy {inc} / {n} : {((n-inc)/n)*100:.2f}%')