In [1]:
import numpy as np
import random 
import math

In [2]:
def sigmoid(x):
    return 1 / (1 + np.exp(-x))
def sigmoidPrime(x):
    return sigmoid(x) * (1 - sigmoid(x))
def relu(x):
    return x * (x > 0)
def reluPrime(x):
    return 1. * (x > 0)

In [3]:
X = np.array(
    [
        [0, 0],
        [0, 1],
        [1, 1],
        [1, 0]
    ]
)
y  = np.array([0, 0, 1, 0])

mu = 0
sigma = 0.1

w1 = np.random.normal(mu, sigma, (4, 2))
w2 = np.random.normal(mu, sigma, (1, 4))

In [4]:
def forward(X):
    a0 = X.reshape((2,1))
    
    z1 = np.dot(w1, a0)
    a1 = relu(z1)
    
    z2 = np.dot(w2, a1)
    a2 = relu(z2)
    
    return a2

def loss(X, y):
    a0 = X.reshape((2,1))
    
    z1 = np.dot(w1, a0)
    a1 = relu(z1)
    
    z2 = np.dot(w2, a1)
    a2 = relu(z2)
    
    delta2 = (a2 - y)
    delta1 = np.dot(w2.T, delta2)
    
    gradients2 = np.dot(delta2 * reluPrime(z2), a1.T)
    gradients1 = np.dot(delta1 * reluPrime(z1), a0.T)
    loss = 0.5 * ((a2 - y)**2)
    
    return loss, (gradients2, gradients1)

def train(X, Y):
    global w2
    global w1
    alpha = 0.25
    
    for iteration in range(500):   
        iterationCost = 0
        w2update = np.zeros_like(w2)
        w1update = np.zeros_like(w1)
        for i in range(len(X)):
            x = X[i]
            y = Y[i]    
            h = forward(x)
            cost, (w2grads, w1grads ) = loss(x, y)          
            iterationCost += cost
            w2update += w2grads
            w1update += w1grads
            
        print(iterationCost)
        w2 = w2 - alpha*w2update
        w1 = w1 - alpha*w1update

In [5]:
train(X,y)

[[0.50004534]]
[[0.48196726]]
[[0.46074397]]
[[0.42993126]]
[[0.38293596]]
[[0.3190378]]
[[0.24895938]]
[[0.1944431]]
[[0.16803806]]
[[0.16018976]]
[[0.15779895]]
[[0.15611341]]
[[0.15484577]]
[[0.15369714]]
[[0.15208908]]
[[0.15081659]]
[[0.14878413]]
[[0.14741014]]
[[0.14483882]]
[[0.14342864]]
[[0.14017586]]
[[0.13883474]]
[[0.13484074]]
[[0.13356597]]
[[0.12946241]]
[[0.1268605]]
[[0.1234014]]
[[0.11936334]]
[[0.11678803]]
[[0.11115983]]
[[0.10974269]]
[[0.10443613]]
[[0.10011173]]
[[0.09703662]]
[[0.09085432]]
[[0.08892789]]
[[0.08405048]]
[[0.07809933]]
[[0.07653475]]
[[0.07054769]]
[[0.06651099]]
[[0.06405485]]
[[0.05842442]]
[[0.05487352]]
[[0.05255648]]
[[0.0474309]]
[[0.04391489]]
[[0.04217885]]
[[0.03767955]]
[[0.03412034]]
[[0.03305476]]
[[0.02924941]]
[[0.02604696]]
[[0.02484357]]
[[0.02284196]]
[[0.01969911]]
[[0.01788325]]
[[0.01690643]]
[[0.01452042]]
[[0.0126567]]
[[0.01208659]]
[[0.0109461]]
[[0.00912863]]
[[0.00814908]]
[[0.00764601]]
[[0.00641095]]
[[0.0054827]]
[[0

In [7]:
print(forward(np.array([0,0])))
print(forward(np.array([0,1])))
print(forward(np.array([1,1])))
print(forward(np.array([1,0])))

[[0.]]
[[2.35916224e-16]]
[[1.]]
[[1.11022302e-16]]
