In [1]:
import numpy as np
import matplotlib.pyplot as plt

#### cost/activation

In [2]:
def cost(y,y_hat):
    return 1/2*np.sum( np.power(y - y_hat, 2) )

def logistic_sigmoid(x, derivative=0):
    sigm = 1/(1 + np.exp(-x))
    
    if derivative:
        return sigm*(1. - sigm)
    
    return sigm

# from https://deepnotes.io/softmax-crossentropy
def stable_softmax(a, derivative=0):
    exps = np.exp(a - np.max(a))

    if len(exps.shape) > 1:
        ans = np.zeros((exps.shape[0],2))
        numerator = exps.sum(axis=1).reshape((exps.shape[0], 1))
        numerator = np.append(numerator, numerator, axis=1)
    else:
#         ans = np.zeros((exps.shape[0],))
        ans = np.zeros((2,2))
        numerator = np.sum(exps)
        
    S = exps/numerator
    
    if derivative:
        if len(exps.shape) > 1: # for more than one sample
            for i in range(exps.shape[0]):
                ans[i, 0] = S[i, 1]*(1 - S[i, 0])
                ans[i, 1] = S[i, 0]*(1 - S[i, 1])
        else:
            kro_delta = 0
            for i in range(ans.shape[0]):
                for j in range(ans.shape[1]):                        
                    if i==j:
                        kro_delta = 1
                    else:
                        kro_delta = 0
                        
                    ans[i,j] = S[i]*(kro_delta - S[j])
        return ans
    return S

#### NN functions

In [7]:
def forward_prop(W1, b1, W2, b2, X):
    A1 = (X @ W1 + b1)[0]
    Z1 = logistic_sigmoid(A1) 
    A2 = (Z1 @ W2 + b2)[0]
    Y = stable_softmax(A2)
    return A1, A2, Y

def backprop(W2, A1, A2, X, Y, t):
    if len(X.shape) < 2:
        X = X.reshape(1,X.shape[0])

## >>>>>>>>>>>>>>>>>> 2nd layer (hidden units)
    # 1 x 2 or 4 x 2 (batch)
    step1 = (Y - t) @ stable_softmax(A2, derivative=1)
    step1 = step1.reshape(X.shape[0], X.shape[1])
    
    # 1 X N hidden units
    step2 = logistic_sigmoid(A1, derivative=1)
    step2 = step2.reshape(1, step2.shape[0])

     # N x 1
    step3 = W2
    
    grad_mid_layer = step1.T @ step2 @ step3 @ X.T
    
## >>>>>>>>>>>>>>>>>> output layer (first step has been calculated already)    
    # now it's 2 x 1
    step1 = step1.T
    # 1 X N hidden units
    
    step2 = logistic_sigmoid(A1, derivative=0)
    N_no_hid_units = step2.shape[0]
    
    step2 = step2.reshape(1, N_no_hid_units)
    
    grad_output = step1 @ step2
    
    return grad_mid_layer, grad_output

#online (sample by sample) training
def train(X, T, NO_UNITS_L1=4, epochs=10_000, learning_rate=.1, show_cost=0):
    
    rho = learning_rate
    W1 = np.random.randn(2, NO_UNITS_L1)
    b1 = np.zeros((1, NO_UNITS_L1))
    W2 = np.random.randn(NO_UNITS_L1, 2) # 2 outputs, P(0) and P(1)
    b2 = np.zeros((1,2))
    
    for i in range(epochs):
        for j in range(X.shape[0]):
            A1, A2, Y = forward_prop(W1, b1, W2, b2, X[j,:])
            grad_mid_layer, grad_output = backprop(W2, A1, A2, X[j,:], Y, T[j,:])
            W1 = W1 - rho*grad_mid_layer
            W2 = W2 - rho*grad_output.T
            b1 = b1 - rho*np.mean(grad_mid_layer)
            b2 = b2 - rho*np.mean(grad_output)
            if show_cost:
                print("cost: " + str(cost(T[j,:],Y)))
    
    return [W1, b1, W2, b2, X]

#train package is a list with [W1, b1, W2, b2, X]
def predict(train_pkg, n):
    A1, A2, Y = forward_prop(train_pkg[0], train_pkg[1],\
                             train_pkg[2], train_pkg[3], \
                             train_pkg[4][n,:])
    
    return Y


#### dataset / targets
X: possible inputs of a logic function.
t: dictionary with possible outputs for each logic gates. 
    4 binary ouputs to match NN's output probabilities of 0 or 1. 
    - if [p(0) p(1)] == [1 0] then probability of 0 == 1 && probability of 1 == 1

In [4]:
X = np.array([[0,0],\
              [0,1],\
              [1,0],\
              [1,1]], dtype=np.float32)

t = { #dictionary for getting both the target logic values and the correlated string 
    # binary labels to represent the probabilities of 1 or 0 (first column is 0, 2nd 1)
    "AND": np.array([[1, 0],\
                     [1, 0],\
                     [1, 0],\
                     [0, 1]], dtype=np.float32),
    
    "NAND": np.array([[0, 1],\
                      [0, 1],\
                      [0, 1],\
                      [1, 0]], dtype=np.float32),
    
    "OR": np.array([[1, 0],\
                    [0, 1],\
                    [0, 1],\
                    [0, 1]], dtype=np.float32),
    
    "NOR": np.array([[0, 1],\
                     [1, 0],\
                     [1, 0],\
                     [1, 0]], dtype=np.float32),
    
    "XOR": np.array([[1, 0],\
                     [0, 1],\
                     [0, 1],\
                     [1, 0]], dtype=np.float32) 
}

#### running the NN

In [58]:
train_gates = {}

for i in t:
    train_gates[i] = train(X, t[i], NO_UNITS_L1=8, epochs=10_000)
#     train(X, T, NO_UNITS_L1=4, epochs=10_000, learning_rate=.1, show_cost=0)

#### test 

In [63]:
test = 3 # select the binary inputs (test = 0 = > [0,0]; test = 3 = > [1,1])
# gate = "NAND"
for i in t:
    y = predict(train_gates[i], test)
    print(i)
    print("t: " + str(t[i][test,:]))
    print("Y: " + str(y) )

AND
t: [0. 1.]
Y: [0.04395246 0.95604754]
NAND
t: [1. 0.]
Y: [0.95092582 0.04907418]
OR
t: [0. 1.]
Y: [1.69920905e-04 9.99830079e-01]
NOR
t: [1. 0.]
Y: [9.99738297e-01 2.61703058e-04]
XOR
t: [1. 0.]
Y: [0.79582512 0.20417488]
