In [None]:
import numpy as np
import matplotlib.pyplot as plt

In [None]:
X = np.array([[-0.5,-0.5],[0.5,-0.5],[-0.5, 0.5],[0.5, 0.5]])
Y = np.array([-0.5, 0.5, 0.5, -0.5])

In [None]:
def initTheta(inputLayerSize, hiddenLayerSize):
    return np.random.rand(inputLayerSize, hiddenLayerSize)

In [None]:
def dTanh(x):
    return (1/np.cosh(x))**2

# Part 1

In [None]:
def neuralNet(hiddenLayerSize, alpha, lossConverge):
    losses = []
    outputSize = 1
    # Initialize theta, hidden layer and output layer results
    inputVector = np.c_[np.ones(X.shape[0]),X]
    thetaInput  = initTheta(inputVector.shape[1],hiddenLayerSize) 
    thetaHidden = initTheta(hiddenLayerSize + 1, outputSize)
    # alpha = 0.1
    loss = 1
    epoch = 0
    while loss > lossConverge:
        for i in np.arange(X.shape[0]):
            # Forward propagation

                hiddenVector = np.tanh(inputVector[i,:].T@thetaInput)
                hiddenVectorBias = np.concatenate([[1],hiddenVector]) # add the bias term
                hiddenVectorBias = hiddenVectorBias.reshape((hiddenLayerSize+1,1))
                output = np.tanh(hiddenVectorBias.T@thetaHidden)
            # Backward propagation
                dtheta1 = (output - Y[i])* dTanh(output) * hiddenVectorBias
                dtheta2 = (output - Y[i])* dTanh(output) * np.outer(dTanh(hiddenVectorBias[1:]),inputVector[i,:])

                thetaHidden = thetaHidden - alpha*dtheta1
                thetaInput = thetaInput - alpha*dtheta2.T


        hiddenVector = np.tanh(inputVector@thetaInput)
        hiddenVectorBias = np.c_[np.ones(X.shape[0]),hiddenVector] # add the bias term
        outputAll = np.tanh(hiddenVectorBias@thetaHidden)
        loss = np.sum(0.5*(outputAll.T - Y)**2)/X.shape[0]
        epoch = epoch + 1
        losses.append([loss,epoch,outputAll.T])
    return losses

In [None]:
neuralNetResult2Hidden = np.asarray(neuralNet(2,0.01,0.0000001))
neuralNetResult2Hidden[-1,2]

In [None]:
neuralNetResult4Hidden = np.asarray(neuralNet(4,0.1,0.05))
neuralNetResult4Hidden[-1,2]

In [None]:
neuralNetResult6Hidden = np.asarray(neuralNet(6,0.1,0.05))
neuralNetResult6Hidden[-1,2]

In [None]:
plt.plot(neuralNetResult2Hidden[:,1],neuralNetResult2Hidden[:,0],label = '2 HL Neurons');
plt.plot(neuralNetResult4Hidden[:,1],neuralNetResult4Hidden[:,0],label = '4 HL Neurons');
plt.plot(neuralNetResult6Hidden[:,1],neuralNetResult6Hidden[:,0],label = '6 HL Neurons');
plt.legend();

# Part 2

In [None]:
def neuralNetMomentum(hiddenLayerSize, alpha, lossConverge, gamma, maxEpoch):
    losses = []
    outputSize = 1
    # Initialize theta, hidden layer and output layer results
    inputVector = np.c_[np.ones(X.shape[0]),X]
    thetaInput  = initTheta(inputVector.shape[1],hiddenLayerSize) 
    thetaHidden = initTheta(hiddenLayerSize + 1, outputSize)
    # alpha = 0.1
    loss = 10
    epoch = 0
    dtheta1Old = np.ones((hiddenLayerSize+1, 1))
    dtheta2Old = np.ones((hiddenLayerSize, hiddenLayerSize+1))
    while (loss > lossConverge) and epoch < maxEpoch:
        for i in np.arange(X.shape[0]):
            # Forward propagation

                hiddenVector = np.tanh(inputVector[i,:].T@thetaInput)
                hiddenVectorBias = np.concatenate([[1],hiddenVector]) # add the bias term
                hiddenVectorBias = hiddenVectorBias.reshape((hiddenLayerSize+1,1))
                output = np.tanh(hiddenVectorBias.T@thetaHidden)
            # Backward propagation
                dtheta1 = (output - Y[i]) * dTanh(output) * hiddenVectorBias
                dtheta2 = (output - Y[i]) * dTanh(output) * np.outer(dTanh(hiddenVectorBias[1:]),inputVector[i,:])
                thetaHidden = thetaHidden - gamma * alpha * dtheta1 + (1-gamma) * alpha * dtheta1Old
                thetaInput =  thetaInput - gamma * alpha * dtheta2.T + (1-gamma) * alpha * dtheta2Old.T
                dtheta1Old = dtheta1
                dtheta2Old = dtheta2
        hiddenVector = np.tanh(inputVector@thetaInput)
        hiddenVectorBias = np.c_[np.ones(X.shape[0]),hiddenVector] # add the bias term
        outputAll = np.tanh(hiddenVectorBias@thetaHidden)
        loss = np.sum(0.5*(outputAll.T - Y)**2)/X.shape[0]
        epoch = epoch + 1
        losses.append([loss,epoch,outputAll.T])
    return losses

In [30]:
result2 = np.asarray(neuralNetMomentum(2, 0.1, 0.00001, 0.9, 100000))

In [29]:
result2[-1,2]

array([[-0.49480688,  0.49637639,  0.49561028, -0.49562983]])

In [None]:
result4 = np.asarray(neuralNetMomentum(4, 0.1, 0.00001, 0.9, 10000000))

In [27]:
result4[-1,2]

array([[-0.49449601,  0.49647661,  0.49570156, -0.49579327]])

In [31]:
result6[-1,2]

array([[-0.49444899,  0.49653295,  0.49574771, -0.49582023]])