In [57]:
import numpy as np

In [44]:
def numerical_derivative(f, x):
    delta_x = 1e-4 # 0.0001
    grad = np.zeros_like(x)
    
    it = np.nditer(x, flags=['multi_index'], op_flags=['readwrite'])
    
    while not it.finished:
        idx = it.multi_index        
        tmp_val = x[idx]
        x[idx] = float(tmp_val) + delta_x
        fx1 = f(x) # f(x+delta_x)
        
        x[idx] = tmp_val - delta_x 
        fx2 = f(x) # f(x-delta_x)
        grad[idx] = (fx1 - fx2) / (2*delta_x)
        
        x[idx] = tmp_val 
        it.iternext()   
        
    return grad

def sigmoid(x):
    return 1 / (1+np.exp(-x))

In [45]:
input_data1 = np.array([1, 2])
target_data1 = np.array([1])

In [46]:
input_nodes = 2
hidden_nodes = 3
output_nodes = 1

In [47]:
W2 = np.random.rand(input_nodes, hidden_nodes)
W3 = np.random.rand(hidden_nodes, output_nodes)
    
b2 = np.random.rand(hidden_nodes)
b3= np.random.rand(output_nodes)

print("W2 = ", W2, ", W2.shape = ", W2.shape)
print("b2 = ", b2, ", b2.shape = ", b2.shape)
print("W3 = ", W3, ", W3.shape = ", W3.shape)
print("b3 = ", b3, ", b3.shape = ", b3.shape)

W2 =  [[0.9748272  0.22817918 0.44782704]
 [0.29983271 0.46941353 0.19009797]] , W2.shape =  (2, 3)
b2 =  [0.14061273 0.74637708 0.20698104] , b2.shape =  (3,)
W3 =  [[0.48832384]
 [0.09307685]
 [0.37014902]] , W3.shape =  (3, 1)
b3 =  [0.33616153] , b3.shape =  (1,)


In [54]:
def feed_forward(xdata):
    delta = 1e-7   
    print("[feed_forward function]") 
    
    Z2 = np.dot(xdata, W2) + b2    
    print("Z2 = ", Z2)
    
    A2 = sigmoid(Z2)    
    print("A2 = ", A2)
    
    Z3 = np.dot(A2, W3) + b3    
    print("Z3 = ", Z3)
    
    y = A3 = sigmoid(Z3)    
    print("y = ", y, ", A3 = ", A3)
    
    loss = -np.sum( target_data1*np.log(y + delta) + (1-target_data1)*np.log((1 - y)+delta ) )    
    print('current loss val = ', loss)
    
    print("[feed_forward function]\n")
        
    return loss  

In [55]:
loss_val = feed_forward(input_data1)

[feed_forward function]
Z2 =  [1.74895089 1.91976056 1.07364582]
A2 =  [0.85182043 0.87211173 0.74528963]
Z3 =  [1.25414671]
y =  [0.77801685] , A3 =  [0.77801685]
current loss val =  0.25100696616487855
[feed_forward function]



In [56]:
learning_rate = 1e-1

f = lambda x : feed_forward(input_data1)
print('\n=================================================')
print('initial W2 = ', W2)
print('initial b2 = ', b2)
print('initial W3 = ', W3)
print('initial b3 = ', b3)
print('=================================================\n')

W2 -= learning_rate * numerical_derivative( f, W2 )
b2 -= learning_rate * numerical_derivative( f, b2 )

W3 -= learning_rate * numerical_derivative( f, W3 )
b3 -= learning_rate * numerical_derivative( f, b3 )

print('\n=================================================')
print('updated W2 = ', W2)
print('updated b2 = ', b2)
print('updated W3 = ', W3)
print('updated b3 = ', b3)
print('=================================================\n')


initial W2 =  [[0.98098225 0.22934277 0.45485465]
 [0.31214281 0.47174071 0.2041532 ]]
initial b2 =  [0.14368301 0.74693637 0.21048476]
initial W3 =  [[0.52939583]
 [0.13519302]
 [0.40601923]]
initial b3 =  [0.38269118]

[feed_forward function]
Z2 =  [1.74905089 1.91976056 1.07364582]
A2 =  [0.85183305 0.87211173 0.74528963]
Z3 =  [1.2541534]
y =  [0.77801801] , A3 =  [0.77801801]
current loss val =  0.25100548289408864
[feed_forward function]

[feed_forward function]
Z2 =  [1.74885089 1.91976056 1.07364582]
A2 =  [0.85180781 0.87211173 0.74528963]
Z3 =  [1.25414003]
y =  [0.7780157] , A3 =  [0.7780157]
current loss val =  0.2510084495477532
[feed_forward function]

[feed_forward function]
Z2 =  [1.74895089 1.91986056 1.07364582]
A2 =  [0.85182043 0.87212288 0.74528963]
Z3 =  [1.25414822]
y =  [0.77801711] , A3 =  [0.77801711]
current loss val =  0.251006631461069
[feed_forward function]

[feed_forward function]
Z2 =  [1.74895089 1.91966056 1.07364582]
A2 =  [0.85182043 0.87210058 0.7