In [1]:
import numpy as np

In [2]:
nh = 2 # hidden neuron count
X = np.loadtxt('data/mlp_in.txt')
y = np.loadtxt('data/mlp_out.txt')
learning_rate = 5e-3
max_iterations = int(5e5)
show_every = int(max_iterations / 5)

In [3]:
def activation(x): # sigmoid
    return 1 / (1 + np.exp(-x))

def activation_d(x): # sigmoid derivative
    s = activation(x)
    return s * (1 - s)

def cost(y1, y2):
    return (np.linalg.norm(y1 - y2) ** 2) / 2

def cost_d(y1, y2):
    return (y1 - y2) * activation_d(y1)

def mlp_train(X, y, nh, learning_rate=1e-2, max_iterations=int(1e5), show_every=1000):
    ni = 1 if len(X.shape) == 1 else len(X[0]) # input neuron count
    no = 1 if len(y.shape) == 1 else len(y[0]) # output neuron count
    
    hw = np.random.rand(ni, nh) # hidden layer weights
    hb = np.random.randn(1, nh) # hidden layer bias
    
    ow = np.random.rand(nh, no) # output layer weights
    ob = np.random.randn(1, no) # output layer bias
    
    for iteration in range(max_iterations):
        if (iteration % show_every == 0):
            print('iteration', iteration)
        for xi, yi in zip(X, y):
            xi = xi[np.newaxis, :]
            
            ha = np.dot(xi, hw) + hb
            ho = activation(ha)
            
            oa = np.dot(ho, ow) + ob
            oo = activation(oa)
            
            c = cost(oo, yi)
            
            grad_ob = cost_d(oo, yi)
            grad_ow = np.dot(ho.T, grad_ob)
            
            ow -= learning_rate * grad_ow
            ob -= learning_rate * grad_ob
            
            grad_hb = np.dot(grad_ob, ow.T) * (ho * (1 - ho))
            grad_hw = np.dot(xi.T, grad_hb)
            
            hw -= learning_rate * grad_hw
            hb -= learning_rate * grad_hb
            
            if (iteration % show_every == 0):
                print(xi, '->', oo, 'cost', c)
    
    return (hw, ow), (hb, ob)
            
def mlp_predict(x, w, b):
    hw, ow = w
    hb, ob = b
    ha = np.dot(x, hw) + hb
    ho = activation(ha)
    oa = np.dot(ho, ow) + ob
    oo = activation(oa)
    return oo

In [4]:
w, b = mlp_train(X, y, nh, learning_rate, max_iterations, show_every)

print('final:')
for x in X:
    o = mlp_predict(x, w, b)
    print(x, '->', o)

iteration 0
[[0. 0.]] -> [[0.73529909]] cost 0.2703323752982125
[[0. 1.]] -> [[0.75322895]] cost 0.03044797455038804
[[1. 0.]] -> [[0.74037906]] cost 0.03370151629866105
[[1. 1.]] -> [[0.75762876]] cost 0.28700066837091803
iteration 100000
[[0. 0.]] -> [[0.29335753]] cost 0.04302931914125131
[[0. 1.]] -> [[0.55896428]] cost 0.09725625528425884
[[1. 0.]] -> [[0.57330287]] cost 0.09103522169029009
[[1. 1.]] -> [[0.5980108]] cost 0.1788084558572554
iteration 200000
[[0. 0.]] -> [[0.03360377]] cost 0.0005646066517643046
[[0. 1.]] -> [[0.9635055]] cost 0.0006659241761070079
[[1. 0.]] -> [[0.96344531]] cost 0.0006681225532678748
[[1. 1.]] -> [[0.03422717]] cost 0.000585749547101952
iteration 300000
[[0. 0.]] -> [[0.01450236]] cost 0.00010515919056558344
[[0. 1.]] -> [[0.98586714]] cost 9.98688807417933e-05
[[1. 0.]] -> [[0.98585201]] cost 0.00010008276610595416
[[1. 1.]] -> [[0.01225632]] cost 7.510871123782723e-05
iteration 400000
[[0. 0.]] -> [[0.00915563]] cost 4.191276066565313e-05
[[0. 

In [5]:
hw, ow = w
hb, ob = b
print('hidden layer weights:')
print(hw)
print('hidden layer bias:')
print(hb)
print('output layer weights:')
print(ow)
print('output layer bias:')
print(ob)

hidden layer weights:
[[7.04839426 5.22942716]
 [7.05518855 5.23068698]]
hidden layer bias:
[[-3.1884271  -8.00381815]]
output layer weights:
[[ 11.48926528]
 [-12.26235715]]
output layer bias:
[[-5.45422749]]
