In [1]:
# Ref. https://iamtrask.github.io/2015/07/12/basic-python-network/

#### loading numpy

In [2]:
import numpy as np

## one layer

#### defining sigmoid function

In [3]:
def nonlin(x, deriv=False):
    if(deriv==True):
        return x*(1-x)
    return 1/(1 + np.exp(-x))

#### the input dataset

In [4]:
X = np.array([ [0,0,1],
             [0,1,1],
             [1,0,1],
             [1,1,1] ])

In [5]:
X

array([[0, 0, 1],
       [0, 1, 1],
       [1, 0, 1],
       [1, 1, 1]])

#### the output dataset

In [6]:
y = np.array([[0,0,1,1]]).T

In [7]:
y

array([[0],
       [0],
       [1],
       [1]])

#### now, we want to make calculations in a deterministic way, so we use seed random numbers

In [8]:
np.random.seed(1) # so, we set the seed 

#### now, go ahead

In [9]:
syn0 = 2*np.random.random((3,1)) - 1

In [10]:
syn0

array([[-0.16595599],
       [ 0.44064899],
       [-0.99977125]])

In [11]:
for iter in range(1000):
    
    # forward propagation 
    l0 = X
    l1 = nonlin(np.dot(l0, syn0))
    
    # the error
    l1_error = y - l1
    
    # about the delta 
    l1_delta = l1_error * nonlin(l1, True)
    
    # update weights
    syn0 += np.dot(l0.T, l1_delta)
    
print("output the training")
print(l1) # finally we have the layer l1 with the last update to its values

output the training
[[0.03178421]
 [0.02576499]
 [0.97906682]
 [0.97414645]]


## using two layers

In [12]:
X = np.array([ [0,0,1], [0,1,1], [1,0,1], [1,1,1] ])

In [13]:
X

array([[0, 0, 1],
       [0, 1, 1],
       [1, 0, 1],
       [1, 1, 1]])

In [14]:
y = np.array([[0,1,1,0]]).T

In [15]:
y

array([[0],
       [1],
       [1],
       [0]])

In [16]:
syn0 = 2*np.random.random((3,4)) - 1
syn1 = 2*np.random.random((4,1)) - 1

In [17]:
for j in range(10000):
    l1 = 1/(1 + np.exp(-(np.dot(X, syn0))))
    l2 = 1/(1 + np.exp(-(np.dot(l1,syn1))))
    l2_delta = (y - l2)*(l2*(1-l2))
    l1_delta = l2_delta.dot(syn1.T) * (l1 * (1-l1))
    syn1 += l1.T.dot(l2_delta)
    syn0 += X.T.dot(l1_delta)
print('after training')
print(l1)
print(l2)

after training
[[9.98272649e-01 8.10690891e-01 9.92720447e-01 9.48259994e-01]
 [8.99495704e-01 9.40027982e-01 8.56387523e-01 1.79116184e-02]
 [8.71408992e-01 8.19952447e-01 7.88333455e-01 2.01204937e-02]
 [9.49767161e-02 9.43403730e-01 1.40051039e-01 2.04333884e-05]]
[[0.01027924]
 [0.98879901]
 [0.99060372]
 [0.01310224]]


## now, with three layer neural network

In [18]:
def nonlin(x, deriv=False):
    
    if(deriv==True):
        return x*(1-x)
    
    return 1/(1+np.exp(-x))

In [19]:
X = np.array([[0,0,1], [0,1,1], [1,0,1], [1,1,1]])

In [20]:
X

array([[0, 0, 1],
       [0, 1, 1],
       [1, 0, 1],
       [1, 1, 1]])

In [21]:
y = np.array([[0], [1], [1], [0]])

In [22]:
y

array([[0],
       [1],
       [1],
       [0]])

In [23]:
np.random.seed(1)

In [24]:
syn0 = 2*np.random.random((3,4)) - 1
syn1 = 2*np.random.random((4,1)) - 1

In [25]:
syn0

array([[-0.16595599,  0.44064899, -0.99977125, -0.39533485],
       [-0.70648822, -0.81532281, -0.62747958, -0.30887855],
       [-0.20646505,  0.07763347, -0.16161097,  0.370439  ]])

In [26]:
syn1

array([[-0.5910955 ],
       [ 0.75623487],
       [-0.94522481],
       [ 0.34093502]])

In [27]:

for j in range(60000):
    
    l0 = X
    l1 = nonlin(np.dot(l0, syn0))
    l2 = nonlin(np.dot(l1, syn1))
    
    l2_error = y - l2
    
    if (j % 10000) == 0:
        print("Error: " + str(np.mean(np.abs(l2_error))) )
        
    l2_delta = l2_error * nonlin(l2, deriv=True)
    
    l1_error = l2_delta.dot(syn1.T)
    
    l1_delta = l1_error * nonlin(l1, deriv=True)
    
    syn1 += l1.T.dot(l2_delta)
    syn0 += l0.T.dot(l1_delta)

Error: 0.4964100319027255
Error: 0.008584525653247157
Error: 0.0057894598625078085
Error: 0.004629176776769985
Error: 0.0039587652802736475
Error: 0.003510122567861678
