In [1]:
import numpy as np
import matplotlib.pyplot as plt

#### cost/activation

In [2]:
# cost(y[1,:].reshape(1,2),y_hat[1,:].reshape(1,2), derivative=0)
def cost(y,y_hat, derivative=0):
    if derivative:
            return np.sum(y - y_hat, axis=0)
    return 1/2*np.sum(np.power(y - y_hat, 2),\
                     axis=0)

def logistic_sigmoid(x, derivative=0):
    
    sigm = 1/(1 + np.exp(-x))
    if len(sigm.shape) < 2:
        sigm = sigm.reshape(sigm.shape[0],1)
        
    if derivative:
        return sigm*(1. - sigm)
    return sigm

#### NN functions 1
BACKEND

In [3]:
# >>>>>>>>>>>>>>>>>>> init_weights_biases >>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>
# W1, b1, W2, b2 = init_weights_biases(no_hidden_units=8)
def init_weights_biases(no_hidden_units=2, seed=1):
    #all vectors are column vectors
    np.random.seed(seed) #shown to converge for other XOR regression problem
    
    W1 = np.random.randn(no_hidden_units, 2)
    
    b1 = np.zeros((no_hidden_units, 1))
    W2 = np.random.randn(2, no_hidden_units) # 2 outputs, P(0) and P(1)
    b2 = np.zeros((2, 1))
    return W1, b1, W2, b2

# W1, b1, W2, b2 = init_weights_biases(no_hidden_units=8)
# A1, A2, Y = forward_prop(W1, b1, W2, b2, X)
# >>>>>>>>>>>>>>>>>>> forward_prop >>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>
def forward_prop(W1, b1, W2, b2, X):
    x = X.T

    if len(x.shape) < 2:
        no_of_samples = 1
        x = x.reshape(x.shape[0],1)
    else:
        no_of_samples = x.shape[0]

    Z2 = W1 @ x  + b1
    A2 = logistic_sigmoid(Z2)  # second layer)
    Z3 = W2 @ A2 + b2
    A3 = logistic_sigmoid(Z3)

    Y = np.zeros((no_of_samples,2))

    #scaling to making the pair a probability
    Y = np.divide(A3, np.sum(A3, axis=0)) #comuns are the samples now
    return Z2, Z3, Y

# W1, b1, W2, b2 = init_weights_biases(no_hidden_units=8)
# A1, A2, Y = forward_prop(W1, b1, W2, b2, X)
# grad_mid_layer, grad_output = backprop(W2, A1, A2, X, Y, t)
# backprop(W2, A1, A2, X, Y, t)
# >>>>>>>>>>>>>>>>>>> backprop >>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>
def backprop(W2, Z2, Z3, X, Y_hat, t):
    if len(X.shape) < 2:
        X = X.reshape(1,X.shape[0])
  ########  gradient W1    
    op1 = ( -(t.T - Y_hat ) * logistic_sigmoid(Z3, derivative=1)) #gets 6x4 matrix
    op2 = W2.T @ op1 
    op3 = op2 * logistic_sigmoid(Z2, derivative=1) # @ 6 x 4 still
    del_W1 = op3 @ X # 6x2
    
  ########  gradient W2
    step2 = logistic_sigmoid(Z2, derivative=0) #just A2
    del_W2 = op1 @ step2.T
    
    return del_W1, del_W2

#### NN functions 2
frontend

In [29]:
# >>>>>>>>>>>>>>>>>>> train >>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>
#online (sample by sample) training
# all samples (X), 4 x 2, are fed
def train(X, T, epochs, learning_rate,\
          NO_UNITS_L1=4, show_cost=0):
    
    converged = False
    rho = learning_rate
    Y = np.zeros((2,X.shape[0])) # 4 x 2 always (the whole dataset is fed)
    
    W1, b1, W2, b2 = init_weights_biases(no_hidden_units=NO_UNITS_L1)
    
    rho = 1
    show_cost = 0
    W1, b1, W2, b2 = init_weights_biases(no_hidden_units=8)
    Y = np.zeros((X.shape[0], 2)) # 4 x 2 always (the whole dataset is fed)

    j = 0
    for i in range(epochs):
        idx_done = i + 1
        Z2, Z3, Y_hat = forward_prop(W1, b1, W2, b2, X)
        grad_W1, graD_W2 = backprop(W2, Z2, Z3, X, Y_hat, T)

        #grad descent
        W1 = W1 - rho*grad_W1
        W2 = W2 - rho*graD_W2

        if show_cost:
            print("cost: " + str(cost(T,Y)))

        Y = np.round(Y_hat).T
        
        y_and_T_match = np.array_equal(Y, T)        

        if y_and_T_match: #converged
            j += 1
            if j > 1:
                converged = True
                break
    
    return [W1, b1, W2, b2, X, Y, idx_done, epochs, converged, rho]

# >>>>>>>>>>>>>>>>>>> predict >>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>
#train package is a list with [W1, b1, W2, b2, X]
# n: 0-3 selection of logical inputs; e.g. 0 == [0, 0]; 3 == [1,1]
def predict(train_pkg):
    Z2, Z3, Y = forward_prop(train_pkg[0], train_pkg[1],\
                             train_pkg[2], train_pkg[3], \
                             train_pkg[4])
    del Z2, Z3
    
    return np.round(Y).T

#### NN functions 2 (helpers)

In [30]:
# >>>>>>>>>>>>>>>>>>> train_all_gates >>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>
def train_all_gates(X, t, no_hidden_units=2,\
                    iterations=500, rho=.01, print_cost=0):
    train_gates = {} #init dictionary

    for i in t:
        # NO_UNITS_L1 = 6  yields max matches with rho = 1 and epochs = 500
#         train_gates[i] : [W1, b1, W2, b2, X, Y, idx_done, epochs, converged, rho]
        train_gates[i] = train(X, t[i], NO_UNITS_L1=no_hidden_units,\
                               epochs=iterations, learning_rate=rho,\
                               show_cost=print_cost)
    return train_gates

# >>>>>>>>>>>>>>>>>>> match_logic_gate >>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>
def match_logic_gate(train_pkg, T):

    Y = predict(train_pkg)
    prediction_match = np.array_equal(Y, T)
#         train_pkg : [W1, b1, W2, b2, X, Y, idx_done, epochs, converged, rho]
    # indeces used, especially:
    #                                 8. converged
    #                                 6. idx_done, 
    #                                 7. epochs
    #                                 9. rho
    match_pkg = [train_pkg[8], train_pkg[6],\
                  train_pkg[7], train_pkg[9],\
                  prediction_match, Y]
        
    return match_pkg

# >>>>>>>>>>>>>>>>>>> match_all_gate_outputs >>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>
def match_all_gate_outputs(train_pkg_all_gates, t):
    matches = {}

    for i in t:
        matches[i] = match_logic_gate(train_pkg_all_gates[i], t[i])
        
    return matches

def print_match(match):
    print(i + " converged: " + str(match[0]))
    print("===========================================")
    print("  iter. to converge: " + str(match[1]))
    print("  iter. max: " + str(match[2]))
    if match[4]:
        print("  ==== CORRECT prediction ==== ")
    else:
        print("  ==== INCORRECT prediction ==== ")
    print("  predicted y (y_hat): ")
    print(match[5])
    print()

#### dataset / targets
X: possible inputs of a logic function.
t: dictionary with possible outputs for each logic gates. 
    4 binary ouputs to match NN's output probabilities of 0 or 1. 
    - if [p(0) p(1)] == [1 0] then probability of 0 == 1 && probability of 1 == 1

In [31]:
X = np.array([[0,0],\
              [0,1],\
              [1,0],\
              [1,1]], dtype=np.float32)

t = { #dictionary for getting both the target logic values and the correlated string 
    # binary labels to represent the probabilities of 1 or 0 (first column is 0, 2nd 1)
    "AND": np.array([[1, 0],\
                     [1, 0],\
                     [1, 0],\
                     [0, 1]], dtype=np.float32),
    
    "NAND": np.array([[0, 1],\
                      [0, 1],\
                      [0, 1],\
                      [1, 0]], dtype=np.float32),
    
    "OR": np.array([[1, 0],\
                    [0, 1],\
                    [0, 1],\
                    [0, 1]], dtype=np.float32),
    
    "NOR": np.array([[0, 1],\
                     [1, 0],\
                     [1, 0],\
                     [1, 0]], dtype=np.float32),
    
    "XOR": np.array([[1, 0],\
                     [0, 1],\
                     [0, 1],\
                     [1, 0]], dtype=np.float32) }

#### running the NN

In [32]:
train_pkg_all_gates = train_all_gates(X, t, no_hidden_units=10, iterations=1000, rho=1)
matches = match_all_gate_outputs(train_pkg_all_gates, t)

### matches
matches is a Python dictionary. for ex: 

    matches["OR"]
    returns a list:
            matches["OR"][0] == boolean (converged or not, True or False)
            matches["OR"][1] == idx_done, no. of iterations to converge
            matches["OR"][2] == total iterations (epochs)
            matches["OR"][3] == learning rate used(rho)
            matches["OR"][4] == matches with target (comparing both p(0) and p(1))
            matches["OR"][5] == predicted Y

In [33]:
for i in matches:
    print_match(matches[i])
    
# predict(train_pkg_all_gates["XOR"])

AND converged: True
  iter. to converge: 22
  iter. max: 1000
  ==== CORRECT prediction ==== 
  predicted y (y_hat): 
[[1. 0.]
 [1. 0.]
 [1. 0.]
 [0. 1.]]

NAND converged: True
  iter. to converge: 27
  iter. max: 1000
  ==== CORRECT prediction ==== 
  predicted y (y_hat): 
[[0. 1.]
 [0. 1.]
 [0. 1.]
 [1. 0.]]

OR converged: True
  iter. to converge: 24
  iter. max: 1000
  ==== CORRECT prediction ==== 
  predicted y (y_hat): 
[[1. 0.]
 [0. 1.]
 [0. 1.]
 [0. 1.]]

NOR converged: True
  iter. to converge: 32
  iter. max: 1000
  ==== CORRECT prediction ==== 
  predicted y (y_hat): 
[[0. 1.]
 [1. 0.]
 [1. 0.]
 [1. 0.]]

XOR converged: True
  iter. to converge: 32
  iter. max: 1000
  ==== CORRECT prediction ==== 
  predicted y (y_hat): 
[[1. 0.]
 [0. 1.]
 [0. 1.]
 [1. 0.]]



In [34]:
# OR_pkg = train_pkg_all_gates["XOR"]
# Z2, Z3, Y = forward_prop(OR_pkg[0], OR_pkg[1],\
#                          OR_pkg[2], OR_pkg[3], \
#                          OR_pkg[4][2,:])

In [22]:
# Y

array([[0.0469975],
       [0.9530025]])

In [None]:
# matches

