In [51]:
import numpy as np

telementary = 1
runNum = 0      ## Increment to utilise caching
batch_size=100       ## assumed size of dataset
learningRate = 0.001

Taking in input shape

# Layer Class

In [104]:
class nlayer:
    id = 0
    shape = 1               ## Defines self dimension (1D)
    input_layers = []        ## store layer pointer
    weights = None          ## assuming all input activations are concatenated (sorted on layer ID).
    bias = np.array([])      ## store self biases
    activationFn = "linear"         ## store self activation function


    ## Caching
    
    #### store last activation as cache to speed up when multiple layers use this layer as input. So this is evaluated only once.
    cachedRun = -2      # runNum when cache was calculated, can be old
    ## cachedRun = -1 & isAdaptive = 0 is for input layers
    cacheValue = None
    
    ## Flag indicating if it was being evaluated.
    #### This can help in case of self loops, when a layer was being evaluated was evaluated again
    #  meaning one of this layer's input_layer has this layer as one of the inputs (called self-loop is a graph).
    #  In this situation, the last cached value of this layer will be returned.
    # this may be used to simulate LSTM Network.
    beingEvaluated = 0  

    ## Error variance
    #### Store absolute sum of errors in terms of array of sum per node in 1D np array

    ## supports changing widths & depths. Not suitable for inputs and outputs
    isDynamic = 1
    


    ## Methods
    def __init__(self, shape=1, inputLayers=[], isInput=0, setInputValues=[], activationFn="linear", isDynamic=0) -> None:
        self.shape = shape
        self.activationFn = activationFn
        self.bias = np.zeros(shape)
        self.input_layers = []  ## Clearing on reinitializing

        if(isDynamic==0):
            self.isDynamic = 0
        if(isInput):
            self.cachedRun = -1
            self.isDynamic = 0
            if(len(setInputValues) != 0):
                self.cacheValue = np.array(setInputValues)
        else:
            # generating random weights if given
            if(type(inputLayers) == type([])):
                if(len(inputLayers) != 0):
                    for layer in inputLayers:
                        self.addInputLayer(layer)
            else:
                print("inputLayers should be a List.")
                if(type(inputLayers) == type(nlayer(1))):
                    self.addInputLayer(inputLayers)

    def addInputLayer(self, newInputLayer):
        # check if it doesn't already exists
        for layr in self.input_layers:
            if(newInputLayer == layr):
                print("Layer already exists.")
                return -1

        self.input_layers.append(newInputLayer)
        ## DONE: Generate random weights
        generatedColumn = np.random.rand(self.shape, newInputLayer.shape) - 0.5
        if(type(self.weights) == type(None)):
            self.weights = generatedColumn
        else:
            self.weights = np.concatenate((self.weights, generatedColumn), axis=1)

    def addWidth_to_Layer(self, addWidth):
        if(addWidth > 0):
            self.shape += addWidth
            self.bias = np.concatenate((self.bias, (np.random.rand(addWidth) - 0.5)))
            
            ## generating new row of random weights
            generatedRow = np.random.rand(addWidth, self.weights.shape[1]) - 0.5
            self.weights = np.concatenate((self.weights, generatedRow))
        else:
            print("error, doesn't support decrease.")


    def applyActivationFn(self,rawActivation):
        if(self.activationFn == "linear"):
            return rawActivation

        if(self.activationFn == "relu"):
            return np.maximum(rawActivation, 0)

        if(self.activationFn == "softmax"):
            A = np.exp(rawActivation) / sum(np.exp(rawActivation))
            return A
 
    def applyDerivActivationFn(self, input):
        if(self.activationFn == "linear"):
            return 1
        if(self.activationFn == "relu"):
            return (input > 0)
            
      

    def getActivation(self):    ## return np array of activation of current layer
        ## beingEvaluated == 1 means the node was triggered by a loop in the network. Returning last value cached prevents infinite loops.

        
        if(self.cachedRun == runNum or self.cachedRun == -1 or self.beingEvaluated == 1):   ## if activation was already calculated for this run OR is an input layer
            if(telementary): 
                if(self.cachedRun == -1):
                    print("Provided input from cache")
                else:
                    print("Re-used Cached Value")
            return(self.cacheValue)
        else:
            ## compiling a numpy array of all activation values listed in input layer. 
            # inputArr = np.array([])
            inputArr = np.array([[]])

            self.beingEvaluated = 1

            for layrIndx in range(len(self.input_layers)):
                if(inputArr.shape[1] > 0):      ##  Handle first situation when inputArr is empty.
                    inputArr = np.concatenate((inputArr, self.input_layers[layrIndx].getActivation()))
                else:
                    inputArr = self.input_layers[layrIndx].getActivation()


            self.beingEvaluated = 0

            # Checking if shape matches
            if(inputArr.shape[0] > self.weights.shape[1]):
                if(telementary): print("!!!SHAPE MISMATCH!!!")  
                ## Adjust matrix dimension & adding new random weights to match size
                generatedColumn = np.random.rand(self.weights.shape[0], (inputArr.shape[0] - self.weights.shape[1])) - 0.5
                self.weights = np.concatenate((self.weights, generatedColumn), axis=1)


            elif(inputArr.shape[0] < self.weights.shape[1]):       ## input layer may have been removed causing weight matrix to be larger than inputs
                print("!! Input Layer smaller than expected. !!")
                print("inputArr.shape[0] =", inputArr.shape[0], "self.weights.shape[1] =", self.weights.shape[1])
                return -1
            
            rawActivation = np.matmul(self.weights, inputArr.T) + self.bias
            activation = self.applyActivationFn(rawActivation=rawActivation)

            self.cachedRun = runNum
            # self.cacheValue = activation          ## storing a pointer to activation calculated
            self.cacheValue = np.copy(activation)   ## duplicating array

            if(telementary): print("activation =", activation, "& cached")  

            return activation


    def correct_error(self, activation_error):
        # if(type(self.cacheValue)==type(None)):
        #     self.
        if(self.cachedRun >= 0):    ## check if is run before
            ## compiling a numpy array of all activation values listed in input layer. 
            inputArr = np.array([])
            self.beingEvaluated = 1
            layerLengths = []   ## Store each layer's length to distribute corrections to them later

            for layrIndx in range(len(self.input_layers)):
                layerLengths.append(self.input_layers[layrIndx].shape)
                if(inputArr.shape[1] > 0):
                    inputArr = np.concatenate((inputArr, self.input_layers[layrIndx].getActivation()))
                else:
                    inputArr = self.input_layers[layrIndx].getActivation()
            self.beingEvaluated = 0


            inputArr2 = inputArr[np.newaxis]

            # if(len(inputArr.shape) == 1):       ## if array is 1D, convert to 2D to support Transpose.
            #     inputArrT = inputArr[np.newaxis].T
            # else:
            #     inputArrT = inputArr.T


            dZ = self.cacheValue - activation_error

            dW = (1/batch_size)*np.matmul(dZ, inputArr2)
            dB = (1/batch_size)*np.sum(dZ)

            oldWeights = self.weights
            ## Updating self weights & biases
            self.weights = self.weights - learningRate*dW
            self.bias = self.bias - learningRate*dB

            ## Finding errors for input layers
            # dIZ = np.matmul(np.transpose(self.weights),dZ)
            dIZ = np.matmul((oldWeights.T), dZ) * self.applyDerivActivationFn(inputArr)

            ## Splitting input corrections to their corresponding layers
            splitPoints = [0]
            lengthTillNow = 0
            for layerIndx in range(len(layerLengths)):
                lengthTillNow += layerLengths[layerIndx]
                splitPoints.append(lengthTillNow)

                self.input_layers[layerIndx].correct_error(dIZ[splitPoints[-2]:splitPoints[-1]])

            return [self.cacheValue]

                



# Network Class

In [105]:
class network:
    input_shape=1  # Currently only 1D
    output_shape=1 # Currently only 1D
 
    input_layer = None      ## Pointer to input nlayer
    output_layer = None     ## Pointer to output nlayer

    layers = []
    numberOfLayers = 0      ## used to assign ID to new layer in matrix

    adaptive = 1

    def __init__(self, input_shape, output_shape, insertDefault=0) -> None:
        self.input_shape = input_shape
        self.output_shape = output_shape

        # Connect output with 1 adaptive neuron input
        self.input_layer = nlayer(input_shape, isInput=1)

        if(insertDefault==1):
            hiddenLayer = nlayer(1,inputLayers=[self.input_layer],activationFn="relu")
            self.output_layer = nlayer(output_shape, inputLayers=[hiddenLayer], isDynamic=1)
        else:
            self.output_layer = nlayer(output_shape, inputLayers=[self.input_layer], isDynamic=1)


    def addLayerAtLast(self, shape, isDynamic=1, activationFn="linear"):
        oldInputs = self.output_layer.input_layers
        newLayer = nlayer(shape=shape, inputLayers=oldInputs, isDynamic=isDynamic, activationFn=activationFn)
        newLayer.weights = self.output_layer.weights
        self.output_layer.weights = None
        self.output_layer.input_layers=[]
        self.output_layer.addInputLayer(newLayer)

        ## Transferring Weight matrix

    def setInput(self, input_values):
        # print("SETTING INPUT LAYER & STORING VALUES")
        if(type(self.input_layer) != type(None)):
            if(len(input_values) < self.input_layer.shape):
                print("ERROR: Unable to reduce input layer shape. Insert len(input values) >= input_shape")
            else:
                self.input_layer.shape = len(input_values)
                self.input_layer.cachedRun = -1
                self.input_layer.isDynamic = 0
                self.input_layer.cacheValue = np.array(input_values)
        else:   ## Initialize new input layer
                self.input_layer = nlayer(len(input_values), isInput=1, setInputValues=np.array(input_values))

                
                linker = self.output_layer
                if(type(linker) != type(None)):
                    while(len(linker.input_layers) > 0):    ## following only oldest (1st in list) links to reach input
                        linker = linker.input_layers[0]                               
                    linker.input_layer = [self.input_layer]



    def forward_prop(self, input_values=None):    # find result activation from input activation and weights
        global runNum
        if(type(input_values) != type(None)):
            self.input_layer.cacheValue = input_values

        if(self.input_layer.cachedRun == -1 and type(self.input_layer.cacheValue) != type(None)):
            output_activations = self.output_layer.getActivation()
            runNum += 1
            return output_activations
        else:
            print("Input uninitialized")
            return -1

    def backward_prop(self, input_values, trueOutput):
        self.input_layer.cacheValue = input_values

        predictedOutput = self.output_layer.getActivation()
        
        return self.output_layer.correct_error(trueOutput - predictedOutput)


# TESTING

In [108]:
nt = network(784, 10, insertDefault=0)

In [109]:
nt.addLayerAtLast(10,isDynamic=0,activationFn="relu")
nt.addLayerAtLast(10,isDynamic=0,activationFn="relu")

In [110]:
import pandas as pd
from matplotlib import pyplot as plt

data = pd.read_csv('mnist-train.csv')
# Adaptive-Matrix/

In [111]:
data = np.array(data)
m, n = data.shape
np.random.shuffle(data) # shuffle before splitting into dev and training sets

data_dev = data[0:1000].T
Y_dev = data_dev[0]
X_dev = data_dev[1:n]
X_dev = X_dev / 255.

data_train = data[1000:m].T
Y_train = data_train[0]
X_train = data_train[1:n]
X_train = X_train / 255.
_,m_train = X_train.shape

X_trainT = X_train.T

def one_hot(Y, maxExpected):
    one_hot_Y = np.zeros((Y.size, maxExpected + 1))
    one_hot_Y[np.arange(Y.size), Y] = 1
    one_hot_Y = one_hot_Y.T
    return one_hot_Y

## find the most probable guessed by network
def get_predictions(A2):
    return np.argmax(A2, 0)

def get_accuracy(predictions, Y):
    print(predictions, Y)
    return np.sum(predictions == Y) / Y.size


In [112]:
telementary = 1

In [113]:
len(X_trainT)

41000

In [114]:
X_train.T[0].shape

(784,)

In [115]:
nt.output_layer.weights.shape

(10, 10)

In [116]:
# print(nt.forward_prop(X_trainT[1]))

In [117]:
print(nt.forward_prop(X_trainT[0:2]))

Provided input from cache
!! Input Layer smaller than expected. !!
inputArr.shape[0] = 2 self.weights.shape[1] = 784


AttributeError: 'int' object has no attribute 'shape'

In [13]:
accuracySum = 0
runCount = 0

for it in range(len(X_trainT)):
    one_hot_y = one_hot(Y_train[it], 9)
    pred = get_predictions(nt.backward_prop(X_trainT[it],one_hot_y))
    if(pred.all() == one_hot_y.all()):
        accuracySum += 1
    runCount += 1
    
# accuracy = accuracySum/len(X_train)
accuracy = accuracySum/runCount



Provided input from cache
activation = [34.5778307  40.3057012  36.65048974 36.13773697 39.60475285 38.40595201
 36.18061522 33.5145827  40.8114093  35.43933799] & cached
activation = [194.43320581 181.71247634 155.29674334 208.59397513 168.15118968
 226.45109374 230.2225758  200.42286536 152.38421142 169.54796298] & cached
activation = [ 972.43402768 1287.642818    807.98647891  847.43150933  722.90491912
  881.09102522  852.34527912  824.60244821  916.85821253 1100.53535724] & cached
Re-used Cached Value


ValueError: matmul: Input operand 1 has a mismatch in its core dimension 0, with gufunc signature (n?,k),(k,m?)->(n?,m?) (size 1 is different from 10)

In [None]:
telementary = 0

## Simulator

In [46]:
wt = nt.output_layer.input_layers[0].input_layers[0].weights
# print(wt.shape)

# x1 = X_trainT[0:2]
x1 = X_trainT[0]

print("wt", wt.shape)
print("x1", x1.shape)
print("x1T", x1.T.shape)

act1 = np.matmul(wt, x1.T)
act1

# Playground

In [88]:
x = np.array([[1,2,3], [5,6,7]])
x.shape

(2, 3)

In [89]:
empt = np.array([[]])
empt

array([], shape=(1, 0), dtype=float64)

In [103]:

if(empt.shape[1] > 0):
    empt = np.concatenate((empt, x))
else:
    empt = x

print(empt)


[[1 2 3]
 [5 6 7]
 [1 2 3]
 [5 6 7]
 [1 2 3]
 [5 6 7]
 [1 2 3]
 [5 6 7]
 [1 2 3]
 [5 6 7]]


In [27]:
y = np.array([[-1,0,1]])

x + y

array([[0, 2, 4],
       [4, 6, 8]])

In [36]:
x = np.random.rand(1,2) - 0.5
print(x)

[[ 0.12561546 -0.19739658]]


# Main

In [5]:
n1 = network(2,1)

in1 = np.array([0,1,2])
wtMat = np.array([[5,6,7],[8,9,10]])
# biases = np.array([5,25])
biases = np.array([0.5,0.25])

In [37]:
output_activations = np.matmul(wtMat, in1) + biases
print(output_activations)

[20.5  29.25]
