In [1]:
import numpy as np
# from scipy.special import softmax
# from numpy import zeros, ones, eye
import matplotlib.pyplot as plt
# import tensorflow as tf

In [2]:
def forward_prop_A(W1, b1, W2, b2, X):
    A1 = (X @ W1 + b1)[0]
    Z1 = logistic_sigmoid(A1) 
    A2 = (Z1 @ W2 + b2)[0]
    Y = softmax_vector(A2)
    return A1, A2, Y


def logistic_sigmoid(x, derivative=0):
    sigm = 1/(1 + np.exp(-x))
    if derivative:
        return sigm * (1. - sigm)
    return sigm


def softmax_vector(x, derivative=0):
    x_exp = np.exp(x)
    
    if derivative: #this works only for 2-element vectors
        e = np.array([0, 0])
        e[:] = x_exp[0]*x_exp[1]/np.sum(x_exp)
        return e
    
    return np.exp(x)/np.sum(np.exp(x))

def backprop_A(W2, A1, A2, X, Y, t):
    grad_mid_layer = (t-Y) @ -softmax_vector(A2, derivative=1).T * logistic_sigmoid(A1, derivative=1) @ W2 @ X.T
    grad_output = (t-Y) @ -softmax_vector(A2, derivative=1).T * logistic_sigmoid(A1, derivative=0) 
    
    grad_output = grad_output @ np.eye(grad_output.shape[0],M=2)
    return grad_mid_layer, grad_output

In [3]:
X = np.array([[0,0],\
              [0,1],\
              [1,0],\
              [1,1]], dtype=np.float32)

t = { #dictionary for getting both the target logic values and the correlated string 
    # binary labels to represent the probabilities of 1 or 0 (first column is 0, 2nd 1)
    "AND": np.array([[1, 0], [1, 0], [1, 0], [0, 1]], dtype=np.float32),
    "NAND": np.array([[0, 1], [0, 1], [0, 1], [1, 0]], dtype=np.float32),
    "OR": np.array([[1, 0], [0, 1], [0, 1], [0, 1]], dtype=np.float32),
    "NOR": np.array([[0, 1], [1, 0], [1, 0], [1, 0]], dtype=np.float32),
    "XOR": np.array([[1, 0], [0, 1], [0, 1], [1, 0]], dtype=np.float32) 
}

#learning reates
# RHO = np.array([.0001,.001,.01,.01,.1,1], dtype=np.float32)

In [4]:
NO_UNITS_L1 = 2
W1 = np.random.randn(2,NO_UNITS_L1)
b1 = np.zeros((1,NO_UNITS_L1))
W2 = np.random.randn(NO_UNITS_L1,2) # 2 outputs, P(0) and P(1)
b2 = np.zeros((1,2))

In [10]:
rho = RHO[-2]
for i in range(10_000):
    for j in range(len(X[:,1])):
        A1, A2, Y = forward_prop_A(W1, b1, W2, b2, X[j,:])
        grad_mid_layer, grad_output = backprop_A(W2, A1, A2, X[j,:], Y, t["AND"][j,:])

        W1 = W1 - rho*grad_mid_layer
        W2 = W2 - rho*grad_output.T
        b1 = b1 - rho*np.mean(grad_mid_layer)
        b2 = b2 - rho*np.mean(grad_output)

In [11]:
test = 2
A1, A2, Y = forward_prop_A(W1, b1, W2, b2, X[test,:])
# print("X: " + str(X[test,:]))
print("t: " + str(t["AND"][test,:]))
print("Y: " + str(Y))

t: [1. 0.]
Y: [0.28810017 0.71189983]


In [12]:
# (t["AND"][test,:]-Y) @ -softmax_vector(A2, derivative=1).T #* logistic_sigmoid(A1, derivative=1) @ W2 @ X.T
-softmax_vector(A2, derivative=1).T

array([-1, -1])

In [None]:
W2 @ X.T

In [None]:
np.eye(A1.shape[0])