### Forward Propagation without any hidden layer

In [1]:
import numpy as np

In [2]:
# AND gate
X = np.array([[0,0],[0,1],[1,0],[1,1]])
Y = np.array([[0,0,0,1]]).T

In [3]:
X.shape , Y.shape

((4, 2), (4, 1))

###### sigmoid function

In [4]:
def sig(z):
    return 1/(1+np.exp(-z))

###### derivative of sigmoid function

In [5]:
def derivativeSig(z):
    return sig(z)*(1-sig(z))

In [6]:
weights = 2*np.random.random((2,1)) - 1
weights

array([[ 0.0312625 ],
       [-0.24819102]])

In [7]:
bias = 2*np.random.random(1)-1
bias

array([0.27281806])

In [8]:
output0 = X
output = sig(np.dot(output0,weights)+ bias)

In [9]:
output

array([[0.5677846 ],
       [0.50615645],
       [0.57543974],
       [0.51396875]])

### Forward Propagation with hidden layer

In [10]:
wh = 2*np.random.random((2,2)) - 1
bh = 2*np.random.random((1,2)) - 1
wo = 2*np.random.random((2,1)) - 1
bo = 2*np.random.random((1,1)) - 1

In [11]:
output0 = X
outputHidden = sig(np.dot(output0,wh)+bh)

In [12]:
output = sig(np.dot(outputHidden,wo)+bo)

In [13]:
output

array([[0.70105584],
       [0.70756646],
       [0.72482236],
       [0.73248872]])

# Neural Netwrok implementation without Hidden Layer

In [14]:
import numpy as np
# AND gate
X = np.array([[0,0],[0,1],[1,0],[1,1]])
Y = np.array([[0,0,0,1]]).T
lr = 0.1
weights = 2*np.random.random((2,1)) - 1
bias = 2*np.random.random(1)-1
output0 = X
output = sig(np.dot(output0,weights)+ bias)
print(X)
print(Y)
print(bias)
print(weights)
print(output)

[[0 0]
 [0 1]
 [1 0]
 [1 1]]
[[0]
 [0]
 [0]
 [1]]
[-0.52407075]
[[0.91377251]
 [0.07176564]]
[[0.37190085]
 [0.38881285]
 [0.5962109 ]
 [0.61336223]]


In [15]:
first_term = output - Y
input_for_last_layer = np.dot(output0,weights) + bias
second_term = derivativeSig(input_for_last_layer)
first_two = first_term*second_term
first_two.shape,second_term.shape,first_two.shape

((4, 1), (4, 1), (4, 1))

In [16]:
changes = np.array([[0.0],[0.0]])
for i in range(2):
    for j in range(4):
        changes[i][0]+=first_two[j][0]*output0[j][i]
changes

array([[0.05184311],
       [0.00070572]])

In [17]:
weights = weights - lr*changes
bias_change = 0.0
for j in range(4):
    bias_change += first_two[j][0]*1
bias = bias -lr*bias_change

In [18]:
for iter in range(10000):
    output0 = X
    output = sig(np.dot(output0,weights)+bias)
    
    first_term = output-Y
    input_for_last_layer = np.dot(output0,weights)+bias
    second_term = derivativeSig(input_for_last_layer)
    first_two = first_term*second_term
    
    changes = np.array([[0.0],[0.0]])
    
    for i in range(2):
        for j in range(4):
            changes[i][0] += first_two[j][0]*output0[j][i]
    weights=weights - lr*changes
    bias_change = 0.0
    for j in range(4):
        bias_change += first_two[j][0]*1
    bias = bias - lr*bias_change
output = sig(np.dot(X,weights)+bias)
weights,bias,output

(array([[5.48372067],
        [5.48372064]]),
 array([-8.31825404]),
 array([[2.43962013e-04],
        [5.54863342e-02],
        [5.54863354e-02],
        [9.33960881e-01]]))

### Optimizing code using vector operation

In [19]:
# OR gate
X = np.array([[0,0],[0,1],[1,0],[1,1]])
Y = np.array([[0,1,1,1]]).T
lr = 0.1
weights = 2*np.random.random((2,1)) - 1
bias = 2*np.random.random(1)-1
output0 = X
output = sig(np.dot(output0,weights)+ bias)

In [20]:
for iter in range(10000):
    output0 = X
    output = sig(np.dot(output0,weights)+bias)
    
    first_term = output-Y
    input_for_last_layer = np.dot(output0,weights)+bias
    second_term = derivativeSig(input_for_last_layer)
    first_two = first_term*second_term
    
    changes = np.dot(output0.T,first_two)
    weights=weights - lr*changes
    bias_change = np.sum(first_two)
    bias = bias - lr*bias_change
output = sig(np.dot(X,weights)+bias)
weights,bias,output

(array([[6.17509873],
        [6.17530702]]),
 array([-2.84139527]),
 array([[0.05512781],
        [0.96557404],
        [0.96556711],
        [0.99992583]]))

# Neural Netwrok implementation with one hidden layer

In [21]:
X = np.array([[0,0],[0,1],[1,0],[1,1]])
Y = np.array([[0,1,1,0]]).T
X.shape,Y.shape

((4, 2), (4, 1))

In [22]:
# one hidden layer weighst and bias
wh = 2*np.random.random((2,2)) - 1
bh = 2*np.random.random((1,2)) - 1
wo = 2*np.random.random((2,1)) - 1
bo = 2*np.random.random((1,1)) - 1
lr = 0.1

In [23]:
# forward propagation with one huidden layer
output0 = X
inputHidden = np.dot(output0,wh)+bh
outputHidden = sig(inputHidden)
inputForOutputLayer = np.dot(outputHidden,wo)+bo
output = sig(inputForOutputLayer)
output

array([[0.47686028],
       [0.46796646],
       [0.42690004],
       [0.41943082]])

In [24]:
first_term_output_layer = output - Y
second_term_output_layer = derivativeSig(inputForOutputLayer)
first_two_output_layer = first_term_output_layer*second_term_output_layer

In [25]:
first_term_hidden_layer = np.dot(first_two_output_layer,wo.T)
second_term_hidden_layer = derivativeSig(inputHidden)
first_two_hidden_layer = first_term_hidden_layer*second_term_hidden_layer

In [26]:
changes_output = np.dot(outputHidden.T,first_two_output_layer)
changes_output_bias = np.sum(first_two_output_layer,axis=0,keepdims=True)

In [27]:
changes_hidden = np.dot(output0.T,first_two_hidden_layer)
changes_hidden_bias = np.sum(first_two_hidden_layer,axis=0,keepdims=True)

In [28]:
wo = wo -lr*changes_output
bo = bo -lr*changes_output_bias

In [29]:
wh = wh -lr*changes_hidden
bh = bh -lr*changes_hidden_bias

In [39]:
for iter in range(10000):
    output0 = X
    inputHidden = np.dot(output0,wh)+bh
    outputHidden = sig(inputHidden)
    inputForOutputLayer = np.dot(outputHidden,wo)+bo
    output = sig(inputForOutputLayer)
    
    first_term_output_layer = output - Y
    second_term_output_layer = derivativeSig(inputForOutputLayer)
    first_two_output_layer = first_term_output_layer*second_term_output_layer
    
    first_term_hidden_layer = np.dot(first_two_output_layer,wo.T)
    second_term_hidden_layer = derivativeSig(inputHidden)
    first_two_hidden_layer = first_term_hidden_layer*second_term_hidden_layer
    
    changes_output = np.dot(outputHidden.T,first_two_output_layer)
    changes_output_bias = np.sum(first_two_output_layer,axis=0,keepdims=True)
    
    changes_hidden = np.dot(output0.T,first_two_hidden_layer)
    changes_hidden_bias = np.sum(first_two_hidden_layer,axis=0,keepdims=True)
    
    wo = wo -lr*changes_output
    bo = bo -lr*changes_output_bias
    
    wh = wh -lr*changes_hidden
    bh = bh -lr*changes_hidden_bias

In [40]:
output0 = X
inputHidden = np.dot(output0,wh)+bh
outputHidden = sig(inputHidden)
inputForOutputLayer = np.dot(outputHidden,wo)+bo
output = sig(inputForOutputLayer)
output, wh,bh,wo,bo

(array([[0.01984259],
        [0.98293593],
        [0.98293498],
        [0.01744344]]),
 array([[6.04018615, 5.07845562],
        [6.0413082 , 5.07895184]]),
 array([[-2.67178874, -7.78863288]]),
 array([[ 9.49428434],
        [-9.85877775]]),
 array([[-4.50968664]]))