In [8]:
import numpy as np
N0, N1, NL=2,2,1
dimensions = [N0, N1, NL]
def initialization(dimensions):
    parameters = {}
    L = len(dimensions)
    for h in range(1, L):
        parameters[f'W{h}'] = np.random.randn(dimensions[h], dimensions[h-1]+1)
    return parameters

def forward_propagation(X, parameters):
    X = np.transpose(X)
    activations = {'X0' : X}
    L = len(parameters)
    for h in range(1, L+1):
        Z = np.concatenate((activations['X'+str(h-1)], np.ones((X.shape[0],1))), axis = 0)
        Y = parameters['W'+str(h)].dot(Z)
        activations['X'+str(h)] = 1/(1+np.exp(-Y))
    return activations

def predict(X, parameters):
    L = len(parameters)
    activations = forward_propagation(X, parameters)
    XL = activations['X'+str(L)]
    return XL >= 0.5

def log_loss(y, activations, parameters):
    L = len(parameters)
    activations = forward_propagation(X, parameters)
    train_loss = 1/len(y) * np.sum(-y * (np.log(activations['X'+str(L)]))-(1-y) * (np.log(1-
    activations['X'+str(L)])))
    return train_loss

def backpropagation(y, activations, parameters):
    m = len(y)
    L = len(parameters)
    adjoints = {'A'+str(L) : 1/m * (activations['X'+str(L)] -y)}
    for h in reversed(range(1, L)):
        W = np.delete(parameters['W'+str(h+1)], dimensions[h], axis = 1)
        B = np.transpose(W).dot(adjoints['A'+str(h+1)])
        adjoints['A'+str(h)] = B * activations['X'+str(h)] * (1-activations['X'+str(h)])
    return adjoints

def partial_derivatives(activations, adjoints, parameters):
    L = len(parameters)
    gradients = {}
    for h in reversed(range(1, L+1)):
        Z = np.concatenate((activations['X'+str(h-1)], np.ones((1,len(y)))), axis = 0)
        gradients['dW'+str(h)] = adjoints['A'+str(h)].dot(np.transpose(Z))
    return gradients

def update(parameters, gradients, learning_rate = 0.4):
    L = len(parameters)
    for h in range(1, L+1):
        parameters['W'+str(h)] = parameters['W'+str(h)] - learning_rate * gradients['dW'+str(h)]
    return parameters

from tqdm import tqdm

def neural_network(X, y, learning_rate = 0.5, iter = 2500):
    parameters = initialization(dimensions)
    train_loss = [ ]
    train_acc = [ ]
    for i in tqdm(range(iter)):
        activations = forward_propagation(X, parameters)
        adjoints = backpropagation(y, activations, parameters)
        gradients = partial_derivatives(activations, adjoints, parameters)
        parameters = update(parameters, gradients, learning_rate)
    return parameters

# 
X = np.array([[0, 0], [0, 1], [1, 0], [1, 1]])
y = np.array([0, 1, 1, 0])

W=neural_network(X, y, learning_rate = 0.5, iter = 2500)
W

W["W1"], W["W2"]

  0%|                                                  | 0/2500 [00:00<?, ?it/s]


ValueError: all the input array dimensions except for the concatenation axis must match exactly, but along dimension 1, the array at index 0 has size 4 and the array at index 1 has size 1

In [7]:
X.shape[0]

4

In [6]:
parameters=initialization(dimensions)
A=forward_propagation(X, parameters)
A

{'X0': array([[0, 0, 1, 1],
        [0, 1, 0, 1]]),
 'X1': array([[0.23219415, 0.21569264, 0.26759777, 0.24939747],
        [0.58573691, 0.67729515, 0.8671311 , 0.90643193]]),
 'X2': array([[0.61169173, 0.6310247 , 0.62290438, 0.6372971 ]])}

In [57]:
def H(x):
    return np.where(x >= 0, 1, 0)

#def sigma(x):
 #   return 1 / (1 + np.exp(-x))
def sigma(x):
    return np.where(x > 0, (1 / (1 + np.exp(-x))), (np.exp(x) / (1 + np.exp(x))))

def sigma_d(x):
    return sigma(x) * (1 - sigma(x))

In [58]:
def F_propagation(X0, W_1, W_2,sigma):
    Y1 = np.dot(W_1, X0)
    X1 = np.append(sigma(Y1), np.ones((1, 4)), axis=0)
    Y2 = np.dot(W_2, X1)
    X2 = sigma(Y2)
    return X2, Y2, X1, Y1

In [78]:
# 
W1,W2=W["W1"], W["W2"]
#
X0 = np.array([[0, 0, 1], [0, 1, 1], [1, 0, 1], [1, 1, 1]]).T
y = np.array([0, 1, 1, 0])
# 
N_0 = 3
N_1 = 3
N_2 = 1
X2, Y2, X1, Y1=F_propagation(X0, W1, W2,H)
X2

array([[0, 1, 1, 0]])