In [203]:
import numpy as np

class NeuralNetwork:
    def __init__(self, layers, alpha=0.1):
        self.W = []
        self.layers = layers
        self.alpha = alpha
        for i in np.arange(0, len(layers) - 2):
            w = np.random.randn(layers[i] + 1, layers[i + 1] + 1)
            self.W.append(w / np.sqrt(layers[i]))
        w = np.random.randn(layers[-2] + 1, layers[-1])
        self.W.append(w / np.sqrt(layers[-2]))

    def __repr__(self):
        return "NeuralNetwork: {}".format(
            "-".join(str(l) for l in self.layers))

    def sigmoid(self, x):
        return 1.0 / (1 + np.exp(-x))

    def sigmoid_deriv(self, x):
        return x * (1 - x)

    def fit(self, X, y, epochs=1000, displayUpdate=100):
        X = np.c_[X, np.ones((X.shape[0]))]

        for epoch in np.arange(0, epochs):

            for (x, target) in zip(X, y):
                self.fit_partial(x, target)
            
            if epoch == 0 or (epoch + 1) % displayUpdate == 0:
                loss = self.calculate_loss(X, y)
                print("[INFO] epoch={}, loss={:.7f}".format(
                    epoch + 1, loss))
    
    def fit_partial(self, x, y):
        A = [np.atleast_2d(x)]
    
        #FORWARD:
        #loop over the layers in the network
        for layer in np.arange(0, len(self.W)):
            #feedforward the activation at the current layer by
            #taking the dot product between the activation and 
            #the weight matrix -- this is called the "net input"
            # to the current layer
            net = A[layer].dot(self.W[layer])

            # computing the "net output" is simply applying our
            # nonlinear activation function to the net input
            out = self.sigmoid(net)

            # once we have the net ouput, add it to our list of
            # activations
            A.append(out)

            # BACKPROPAGATION
            # the first phase of backpropagation is to compute the
            # difference between our prediction and the true target
            # value
            print("A[-1] - y: ", A[-1], y)

            error = A[-1] - y

            # from here, we need to apply the chain rule and build our
            # list of deltas 'D'; the first entry in the deltas is
            # simply error of the output layer time the derivative
            # of our activation function for the output value
            D = [error * self.sigmoid_deriv(A[-1])]

        # once you understand the chain rule it becomes super easy 
        # to implement with a 'for' loop -- simply loop over the
        # layers in reverse order (ignoring the last two since we
        # already have taken them into account)
        for layer in np.arange(len(A) - 2, 0, -1):
            # the delta for the current layer is equal to the delta
            # of the previous layer dotted with the weight matrix
            # of the current layer, followed by multiplying the delta
            # by the derivative of the nonlinear acitvation function
            # for the activations of the current layer
            delta = D[-1].dot(self.W[layer].T)
            delta = delta * self.sigmoid_deriv(A[layer])
            D.append(delta)

        # since we looped over our layers in reverse order we need to
        # reverse the deltas
        D = D[::-1]

        # WEIGHT UPDATE PHASE
        # loop over the layers
        for layer in np.arange(0, len(self.W)):
            # update our weights by taking the dot product of the layer
            # activatios with heir respective deltas, then ultiplying
            # this value by some small learning rage and adding to our 
            # weight matrix -- this is where the actual "learning" take
            # place
            self.W[layer] += -self.alpha * A[layer].T.dot(D[layer])

    def predict(self, X, addBias=True):
        # initialize the output prediction as the input features -- this
        # value with be (forward) propagated through the network to 
        # obtain the final prediction
        p = np.atleast_2d(X)

        # check to see if the bias column should be added
        if addBias:
            # insert a column of 1's as the last entry in the feature
            # matrix (bias)
            p = np.c_[p, np.ones((p.shape[0]))]

        # loop over our layers in the network
        for layer in np.arange(0, len(self.W)):
            # computing the output prediction is a s simple as taking
            # the dot product between the crrent activation value 'p'
            # and the weight matrix associated with the current layer, 
            # then passing this value through a nonlinear activation 
            # function
            p = self.sigmoid(np.dot(p, self.W[layer]))
        
        # return the predicted value
        return p

    def calculate_loss(self, X, targets):
        # make predictions for the input data points then compute
        # the loss
        targets = np.atleast_2d(targets)
        predictions = self.predict(X, addBias=False)
        loss = 0.5 * np.sum((predictions - targets) ** 2)

        # return the loss
        return loss

In [204]:
from sklearn.preprocessing import LabelBinarizer
# construct the XOR dataset
X = np.array([[0, 0], [0, 1], [1, 0], [1, 1]])
y = np.array([[1, 0, 1], [0, 1, 1], [0, 1, 1], [0, 1, 1]])
for element in y:
    var = element
    var = np.array(var)
    print(var)
print(y)

[1 0 1]
[0 1 1]
[0 1 1]
[0 1 1]
[[1 0 1]
 [0 1 1]
 [0 1 1]
 [0 1 1]]


In [205]:
# define our 2-2-1 neural network and train it
nn = NeuralNetwork([2, 2, 1], alpha=0.5)
nn.fit(X, y, epochs=1000)

A[-1] - y:  [[0.27521785 0.87055679 0.44215562]] [1 0 1]
A[-1] - y:  [[0.49906429]] [1 0 1]


ValueError: shapes (1,3) and (1,3) not aligned: 3 (dim 1) != 1 (dim 0)

In [None]:
# now that our network is trained, loop over the XOR data points
for (x, target) in zip(X, y):
	# make a prediction on the data point and display the result
	# to our console
	pred = nn.predict(x)[0][0]
	step = 1 if pred > 0.5 else 0
	print("[INFO] data={}, ground-truth={}, pred={:.4f}, step={}".format(
		x, target[0], pred, step))

[INFO] data=[0 0], ground-truth=0, pred=0.0690, step=0
[INFO] data=[0 1], ground-truth=1, pred=0.9552, step=1
[INFO] data=[1 0], ground-truth=1, pred=0.9712, step=1
[INFO] data=[1 1], ground-truth=1, pred=0.9868, step=1
