In [1]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt

In [2]:
def cost(y,y_hat, derivative=0):
    if derivative:
            return y - y_hat
    return np.mean(1/2*np.sum(np.power(y - y_hat, 2),\
                      axis=0))

def logistic_sigmoid(x, derivative=0):    
    sigm = 1/(1 + np.exp(-x))
    if len(sigm.shape) < 2:
        sigm = sigm.reshape(sigm.shape[0],1)
        
    if derivative:
        return sigm*(1. - sigm)
    return sigm

In [3]:
# >>>>>>>>>>>>>>>>>>> init_weights_biases >>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>
# W, B = init_weights_biases(4, 3, [2,2])
#no_hidden_units: needs a list with at least one element
def init_weights_biases(no_of_features, no_outputs, no_hidden_units, seed=1):
    
    W = []
    B = []
    rows, columns = 0, 0 
    last = len(no_hidden_units)
    np.random.seed(seed)
    
    if no_hidden_units: #list is not empty
        for i in range(last+1):
            if i == 0: #first weight
                rows = no_hidden_units[i]
                columns = no_of_features
            elif i > 0 and i < last:
                rows = no_hidden_units[i]
                columns = no_hidden_units[i-1]
            else: #last
                columns = rows # list ran out of indeces, so use last one
                rows = no_outputs            

            W.insert(i, np.random.randn(rows, columns))
            B.insert(i, np.zeros((rows, 1)))
    else: # no hidden units (perceptron)
        W.insert(0, np.random.randn(no_outputs, no_of_features))
        B.insert(0, np.zeros((no_outputs, 1)))
        
    return W, B

# >>>>>>>>>>>>>>>>>>> forward_prop >>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>
    # W1, b1, W2, b2 = init_weights_biases(no_hidden_units=8)
    # Z, A, Y = forward_prop(W, B, X)
    # X has n features x M samples
def forward_prop(W, B, X):
    no_of_samples = X.shape[1]
     #last weight matrix, rows correspond to outputs
    no_of_outputs = W[-1].shape[0]
    
    Z = []
    A = []
    A.append(X) #first layer is an activation
    
    for i in range(len(W)):
        Z.insert(i, W[i] @ A[i] + B[i])
        A.insert(i+1, logistic_sigmoid(Z[i]))
    
    Y = np.zeros((no_of_samples, no_of_outputs))
    #scaling to making the pair a probability
    Y = np.divide(A[i+1], np.sum(A[i+1], axis=0)) #comuns are the samples now
    return Z, A, Y


# >>>>>>>>>>>>>>>>>>> backprop >>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>
# W1, b1, W2, b2 = init_weights_biases(no_hidden_units=8)
# A1, A2, Y = forward_prop(W1, b1, W2, b2, X)
# grad_mid_layer, grad_output = backprop(W2, A1, A2, X, Y, t)
# backprop(W2, A1, A2, X, Y, t)
def backprop(W, Z, A, Y_hat, T):
    
    output_index = len(W)-1 # if 3, starts at 2
    error = {}
    
    error_output = ( -(T - Y_hat ) * logistic_sigmoid(Z[-1], derivative=1))
    error[output_index] = error_output
    
    dJ_dW = {}
    for i in range(output_index-1,-1,-1):
         # doesn't get to W[0], so updated after the foor loop again
#         dJ_dW.insert(i+1, error[i+1] @ A[i+1].T)
        dJ_dW[i+1] = error[i+1] @ A[i+1].T
        
        error_dummy = (W[i+1].T @ error[i+1]) * logistic_sigmoid(Z[i], derivative=1)
#         error.insert(i, error_dummy)
        error[i] = error_dummy
    
    dJ_dW[0] = error[0] @ A[0].T
    
    return dJ_dW

In [4]:
# >>>>>>>>>>>>>>>>>>> train >>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>
#online (sample by sample) training
# all samples (X), 4 x 2, are fed


# >>>>>>>>>>>>>>>>>>> predict >>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>
#train package is a list with [W1, b1, W2, b2, X]
# n: 0-3 selection of logical inputs; e.g. 0 == [0, 0]; 3 == [1,1]
def predict(train_pkg):
    Z2, Z3, Y = forward_prop(train_pkg[0], train_pkg[1],\
                             train_pkg[2], train_pkg[3], \
                             train_pkg[4])
    del Z2, Z3
    
    return np.round(Y).T

In [5]:
# >>>>>>>>>>>>>>>>>>> train_all_gates >>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>
def train_all_gates(X, t, no_hidden_units=2,\
                    iterations=500, rho=.01, print_cost=0):
    train_gates = {} #init dictionary

    for i in t:
        # NO_UNITS_L1 = 6  yields max matches with rho = 1 and epochs = 500
#         train_gates[i] : [W1, b1, W2, b2, X, Y, idx_done, epochs, converged, rho]
        train_gates[i] = train(X, t[i], NO_UNITS_L1=no_hidden_units,\
                               epochs=iterations, learning_rate=rho,\
                               show_cost=print_cost)
    return train_gates

# >>>>>>>>>>>>>>>>>>> match_logic_gate >>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>
def match_logic_gate(train_pkg, T):

    Y = predict(train_pkg)
    prediction_match = np.array_equal(Y, T)
#         train_pkg : [W1, b1, W2, b2, X, Y, idx_done, epochs, converged, rho]
    # indeces used, especially:
    #                                 8. converged
    #                                 6. idx_done, 
    #                                 7. epochs
    #                                 9. rho
    match_pkg = [train_pkg[8], train_pkg[6],\
                  train_pkg[7], train_pkg[9],\
                  prediction_match, Y]
        
    return match_pkg

# >>>>>>>>>>>>>>>>>>> match_all_gate_outputs >>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>
def match_all_gate_outputs(train_pkg_all_gates, t):
    matches = {}

    for i in t:
        matches[i] = match_logic_gate(train_pkg_all_gates[i], t[i])
        
    return matches

def print_match(match):
    print(i + " converged: " + str(match[0]))
    print("===========================================")
    print("  iter. to converge: " + str(match[1]))
    print("  iter. max: " + str(match[2]))
    if match[4]:
        print("  ==== CORRECT prediction ==== ")
    else:
        print("  ==== INCORRECT prediction ==== ")
    print("  predicted y (y_hat): ")
    print(match[5])
    print()

In [6]:
# >>>>>>>>>>>>>>>>>>> train >>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>
    # all samples (X), 4 x 2, are fed
    # X: dataset, n samples x N features
    # T: binary labels, n labels x L number of ouputs
    # hidden_layers : list with number of neurons for each inner layer.
        # e.g. [3, 4] will yield two layers with 3 and 4 units respectively
    # this function needs n samples > 1 (batch optimization).
def train(X, T, hidden_layers=[2], epochs=500, rho=.1, show_cost=0):
    no_of_features = X.shape[1]
    no_outputs = T.shape[1]
    W, B = init_weights_biases(no_of_features, no_outputs, hidden_layers)

    j = 0
    idx_done = 0
    converged = False
    for i in range(epochs):
        Z, A, Y_hat = forward_prop(W, B, X.T)
        dJ_dW = backprop(W, Z, A, Y_hat, T.T)

        #grad descent
        for i in range(len(W)):
            W[i] = W[i] - rho*dJ_dW[i]

        if show_cost:
            plt.scatter(i, cost(T,Y_hat.T))
#             print("cost: " + str(cost(T,Y_hat.T)))
        
        Y = np.round(Y_hat).T
        y_and_T_match = np.array_equal(Y, T)        

        if y_and_T_match: #converged
            j += 1 
            if j == 3:
                idx_done = i + 1 # already predicts corretly all the time
            if j > 100: #makes the prediction more robust 
                # ( probability considered 1 == .60 or greater )
                converged = True
                break
    
    return W, B, Y, X, idx_done, epochs, converged, rho

# >>>>>>>>>>>>>>>>>>> predict >>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>
#train package is a list with [W1, b1, W2, b2, X]
# n: 0-3 selection of logical inputs; e.g. 0 == [0, 0]; 3 == [1,1]
def predict(train_pkg):
    Z2, Z3, Y = forward_prop(train_pkg[0], train_pkg[1],\
                             train_pkg[2], train_pkg[3], \
                             train_pkg[4])
    del Z2, Z3
    
    return np.round(Y).T

In [7]:
df = pd.read_excel (r'fisheriris.xlsx')
names = ["setosa", "versicolor", "virginica"]
idx = [50,100,150]

# first four columns are 
# X = np.zeros((150,4))
X = np.array(df.values[:,0:4], dtype=np.float32)
T = np.zeros((150,3))
T[0:50,0] = 1 # setosa
T[50:100,1] = 1 #versicolor
T[100:150,2] = 1 #virginica
idx = np.random.randint(0,high=149,size=50)

In [8]:
# W, B = init_weights_biases(4, 3, [2,2])
# Z, A, Y_hat = forward_prop(W, B, X[0:50,0:4].T)
# dJ_dW = backprop(W, Z, A, Y_hat, T[0:50,:].T)

In [9]:
a = np.random.randint(0,high=149,size=50)

In [None]:
# train_pkg = train(X,T)
train_pkg = train(X, T, hidden_layers=[2,2,3,2,2], epochs=30_000, rho=.01, show_cost=1)

In [None]:
train_pkg

In [None]:
train_pkg