In [1]:
import numpy as np

telementary = 1
runNum = 0      ## Increment to utilise caching
# batch_size=100       ## assumed size of dataset
learningRate = 0.02

Taking in input shape

# Layer Class

In [2]:
class nlayer:
    id = 0
    shape = 1               ## Defines self dimension (1D)
    input_layers = []        ## store layer pointer
    weights = None          ## assuming all input activations are concatenated (sorted on layer ID).
    bias = np.array([])      ## store self biases
    activationFn = "linear"         ## store self activation function


    ## Caching
    
    #### store last activation as cache to speed up when multiple layers use this layer as input. So this is evaluated only once.
    cachedRun = -2      # runNum when cache was calculated, can be old
    ## cachedRun = -1 & isAdaptive = 0 is for input layers
    cacheValue = None
    
    ## Flag indicating if it was being evaluated.
    #### This can help in case of self loops, when a layer was being evaluated was evaluated again
    #  meaning one of this layer's input_layer has this layer as one of the inputs (called self-loop is a graph).
    #  In this situation, the last cached value of this layer will be returned.
    # this may be used to simulate LSTM Network.
    beingEvaluated = 0  

    ## Error variance
    #### Store absolute sum of errors in terms of array of sum per node in 1D np array

    ## supports changing widths & depths. Not suitable for inputs and outputs
    isDynamic = 1
    


    ## Methods
    def __init__(self, shape=1, inputLayers=[], isInput=0, setInputValues=[], activationFn="linear", isDynamic=0) -> None:
        self.shape = shape
        self.activationFn = activationFn
        self.bias = np.random.rand(shape, 1)
        self.input_layers = []  ## Clearing on reinitializing

        if(isDynamic==0):
            self.isDynamic = 0
        if(isInput):
            self.cachedRun = -1
            self.isDynamic = 0
            if(len(setInputValues) != 0):
                self.cacheValue = np.array(setInputValues)
        else:
            # generating random weights if given
            if(type(inputLayers) == type([])):
                if(len(inputLayers) != 0):
                    for layer in inputLayers:
                        self.addInputLayer(layer)
            else:
                print("inputLayers should be a List.")
                if(type(inputLayers) == type(nlayer(1))):
                    self.addInputLayer(inputLayers)

    def addInputLayer(self, newInputLayer):
        # check if it doesn't already exists
        for layr in self.input_layers:
            if(newInputLayer == layr):
                print("Layer already exists.")
                return -1

        self.input_layers.append(newInputLayer)
        ## DONE: Generate random weights
        generatedColumn = np.random.rand(self.shape, newInputLayer.shape) - 0.5
        if(type(self.weights) == type(None)):
            self.weights = generatedColumn
        else:
            self.weights = np.concatenate((self.weights, generatedColumn), axis=1)

    def addWidth_to_Layer(self, addWidth):
        if(addWidth > 0):
            self.shape += addWidth
            self.bias = np.concatenate((self.bias, (np.random.rand(addWidth) - 0.5)))
            
            ## generating new row of random weights
            generatedRow = np.random.rand(addWidth, self.weights.shape[1]) - 0.5
            self.weights = np.concatenate((self.weights, generatedRow))
        else:
            print("error, doesn't support decrease.")


    def applyActivationFn(self,rawActivation):
        if(self.activationFn == "linear"):
            return rawActivation

        if(self.activationFn == "relu"):
            return np.maximum(rawActivation, 0)

        if(self.activationFn == "softmax"):
            A = np.exp(rawActivation) / sum(np.exp(rawActivation))
            return A
 
    def applyDerivActivationFn(self, input):
        if(self.activationFn == "linear"):
            return 1
        if(self.activationFn == "relu"):
            return (input > 0)
        else:
            if(telementary): print("Activation Function =", self.activationFn, " didn't match, returning as ReLU")
            return (input > 0)
            
      

    def getActivation(self):    ## return np array of activation of current layer
        ## beingEvaluated == 1 means the node was triggered by a loop in the network. Returning last value cached prevents infinite loops.

        
        if(self.cachedRun == runNum or self.cachedRun == -1 or self.beingEvaluated == 1):   ## if activation was already calculated for this run OR is an input layer
            if(telementary): 
                if(self.cachedRun == -1):
                    print("Provided input from cache")
                else:
                    print("Re-used Cached Value")
            return(self.cacheValue)
        else:
            ## compiling a numpy array of all activation values listed in input layer. 
            # inputArr = np.array([])
            inputArr = np.array([[]])

            self.beingEvaluated = 1

            for layrIndx in range(len(self.input_layers)):
                if(inputArr.shape[1] > 0):      ##  Handle first situation when inputArr is empty.
                    inputArr = np.concatenate((inputArr, self.input_layers[layrIndx].getActivation()))
                else:
                    inputArr = self.input_layers[layrIndx].getActivation()


            self.beingEvaluated = 0

            ##  Checking dimensions of input matrix
            if(len(inputArr.shape) == 1):
                if(telementary): print("Input values should be a 2D array.")
                inputArr = inputArr[:, np.newaxis]



            # Checking if shape matches
            if(inputArr.shape[0] > self.weights.shape[1]):
                if(telementary): print("!!!SHAPE MISMATCH!!!", "inputArr.shape[0] =", inputArr.shape[0], "self.weights.shape[1] =", self.weights.shape[1])
                ## Adjust matrix dimension & adding new random weights to match size
                generatedColumn = np.random.rand(self.weights.shape[0], (inputArr.shape[0] - self.weights.shape[1])) - 0.5
                self.weights = np.concatenate((self.weights, generatedColumn), axis=1)


            elif(inputArr.shape[0] < self.weights.shape[1]):       ## input layer may have been removed causing weight matrix to be larger than inputs
                print("!! Input Layer smaller than expected. !!", "inputArr.shape[0] =", inputArr.shape[0], "self.weights.shape[1] =", self.weights.shape[1])
                return -1
            
            
            rawActivation = np.matmul(self.weights, inputArr) + self.bias
            activation = self.applyActivationFn(rawActivation=rawActivation)

            self.cachedRun = runNum
            # self.cacheValue = activation          ## storing a pointer to activation calculated
            self.cacheValue = np.copy(activation)   ## duplicating array

            if(telementary): print("activation =", activation, "& cached")  

            return activation


    def correct_error(self, activation_error):
        # if(type(self.cacheValue)==type(None)):
        #     self.
        if(self.cachedRun >= 0):    ## check if is run before
            ## compiling a numpy array of all activation values listed in input layer. 
            inputArr = np.array([[]])
            self.beingEvaluated = 1
            layerLengths = []   ## Store each layer's length to distribute corrections to them later

            for layrIndx in range(len(self.input_layers)):
                layerLengths.append(self.input_layers[layrIndx].shape)
                if(inputArr.shape[1] > 0):
                    inputArr = np.concatenate((inputArr, self.input_layers[layrIndx].getActivation()))
                else:
                    inputArr = self.input_layers[layrIndx].getActivation()
            self.beingEvaluated = 0


            # inputArr2 = inputArr[np.newaxis]

            batch_size = activation_error.shape[1]

            # if(len(inputArr.shape) == 1):       ## if array is 1D, convert to 2D to support Transpose.
            #     inputArrT = inputArr[np.newaxis].T
            # else:
            #     inputArrT = inputArr.T


            # dZ = self.cacheValue - activation_error
            dZ = activation_error

            dW = (1/batch_size)*np.matmul(dZ, inputArr.T)
            dB = (1/batch_size)*np.sum(dZ)

            oldWeights = self.weights
            ## Updating self weights & biases
            self.weights = self.weights - learningRate*dW
            self.bias = self.bias - learningRate*dB

            ## Finding errors for input layers
            # dIZ = np.matmul(np.transpose(self.weights),dZ)
            dIZ = np.matmul((oldWeights.T), dZ) * self.applyDerivActivationFn(inputArr)

            ## Splitting input corrections to their corresponding layers
            splitPoints = [0]
            lengthTillNow = 0
            for layerIndx in range(len(layerLengths)):
                lengthTillNow += layerLengths[layerIndx]
                splitPoints.append(lengthTillNow)

                self.input_layers[layerIndx].correct_error(dIZ[splitPoints[-2]:splitPoints[-1]])

            return [self.cacheValue]

                



# Network Class

In [3]:
class network:
    input_shape=1  # Currently only 1D
    output_shape=1 # Currently only 1D
 
    input_layer = None      ## Pointer to input nlayer
    output_layer = None     ## Pointer to output nlayer

    layers = []
    numberOfLayers = 0      ## used to assign ID to new layer in matrix

    adaptive = 1

    def __init__(self, input_shape, output_shape, insertDefault=0) -> None:
        self.input_shape = input_shape
        self.output_shape = output_shape

        # Connect output with 1 adaptive neuron input
        self.input_layer = nlayer(input_shape, isInput=1)

        if(insertDefault==1):
            hiddenLayer = nlayer(1,inputLayers=[self.input_layer],activationFn="relu")
            self.output_layer = nlayer(output_shape, inputLayers=[hiddenLayer], isDynamic=1)
        else:
            self.output_layer = nlayer(output_shape, inputLayers=[self.input_layer], isDynamic=1)


    def addLayerAtLast(self, shape, isDynamic=1, activationFn="linear"):
        oldInputs = self.output_layer.input_layers
        newLayer = nlayer(shape=shape, inputLayers=oldInputs, isDynamic=isDynamic, activationFn=activationFn)
        newLayer.weights = self.output_layer.weights
        self.output_layer.weights = None
        self.output_layer.input_layers=[]
        self.output_layer.addInputLayer(newLayer)

        ## Transferring Weight matrix

    def setInput(self, input_values):
        # print("SETTING INPUT LAYER & STORING VALUES")
        if(type(self.input_layer) != type(None)):
            if(len(input_values) < self.input_layer.shape):
                print("ERROR: Unable to reduce input layer shape. Insert len(input values) >= input_shape")
            else:
                self.input_layer.shape = len(input_values)
                self.input_layer.cachedRun = -1
                self.input_layer.isDynamic = 0
                self.input_layer.cacheValue = np.array(input_values)
        else:   ## Initialize new input layer
                self.input_layer = nlayer(len(input_values), isInput=1, setInputValues=np.array(input_values))

                
                linker = self.output_layer
                if(type(linker) != type(None)):
                    while(len(linker.input_layers) > 0):    ## following only oldest (1st in list) links to reach input
                        linker = linker.input_layers[0]                               
                    linker.input_layer = [self.input_layer]



    def forward_prop(self, input_values=None):    # find result activation from input activation and weights
        global runNum
        if(type(input_values) != type(None)):
            self.input_layer.cacheValue = input_values

        if(self.input_layer.cachedRun == -1 and type(self.input_layer.cacheValue) != type(None)):
            output_activations = self.output_layer.getActivation()
            runNum += 1
            return output_activations
        else:
            print("Input uninitialized")
            return -1

    def backward_prop(self, input_values, trueOutput):
        global runNum
        self.input_layer.cacheValue = input_values

        if(telementary): print("getting forward prop predictions")

        predictedOutput = self.output_layer.getActivation()

        if(telementary): print("starting backprop")
        predictions = self.output_layer.correct_error(predictedOutput - trueOutput)
        runNum += 1
        return predictions


# TESTING

## MNIST Dataset Testing

In [4]:
nt = network(784, 10, insertDefault=0)

In [5]:
nt.addLayerAtLast(10,isDynamic=0,activationFn="relu")
nt.addLayerAtLast(10,isDynamic=0,activationFn="relu")

nt.output_layer.activationFn = "softmax"

In [6]:
import pandas as pd
from matplotlib import pyplot as plt

data = pd.read_csv('mnist-train.csv')
# Adaptive-Matrix/

In [7]:
data = np.array(data)
m, n = data.shape
np.random.shuffle(data) # shuffle before splitting into dev and training sets

data_dev = data[0:1000].T
Y_dev = data_dev[0]
X_dev = data_dev[1:n]
X_dev = X_dev / 255.

data_train = data[1000:m].T
Y_train = data_train[0]
X_train = data_train[1:n]
X_train = X_train / 255.
_,m_train = X_train.shape

X_trainT = X_train.T

def one_hot(Y, maxExpected):
    one_hot_Y = np.zeros((Y.size, maxExpected + 1))
    one_hot_Y[np.arange(Y.size), Y] = 1
    one_hot_Y = one_hot_Y.T
    return one_hot_Y

## find the index of most probable number guessed by network
def get_predictions(A2):
    return np.argmax(A2, 0)

## find ratio of correct predictions to all data
def get_accuracy(predictions, Y):
    print(predictions, Y)
    return np.sum(predictions == Y) / Y.size


In [8]:
telementary = 1

In [9]:
len(X_trainT)

41000

In [10]:
telementary = 0
out = (nt.forward_prop(X_train.T[0].T))
telementary = 1
out

array([[0.11452963],
       [0.09934475],
       [0.05360177],
       [0.07270456],
       [0.15829415],
       [0.06190901],
       [0.2381791 ],
       [0.03075123],
       [0.14226216],
       [0.02842365]])

In [11]:
out = (nt.forward_prop(X_train))
print(out)
# print(nt.forward_prop(X_train))
# print(nt.forward_prop(X_train.T[0:2].T))

Provided input from cache
activation = [[5.38530324 4.57071658 0.28362176 ... 3.47333291 4.0276361  3.06415105]
 [0.5119491  2.47515365 0.         ... 2.67386402 1.63788273 0.        ]
 [0.         0.         0.         ... 0.         0.         0.        ]
 ...
 [3.60482738 0.         0.01530972 ... 6.36090621 3.95942382 3.28466385]
 [0.         0.         0.         ... 0.         0.         2.88317708]
 [1.41110897 0.7565865  0.         ... 0.         1.00644792 0.        ]] & cached
activation = [[0.79707908 1.14811668 0.         ... 2.10023655 0.99539751 0.40653972]
 [2.04753067 1.90278643 0.         ... 0.42258991 1.55846223 0.09055716]
 [0.         0.         0.         ... 1.04882467 0.0651973  0.56414718]
 ...
 [0.         0.         0.11350083 ... 0.         0.         0.        ]
 [1.54486532 1.83627676 0.         ... 2.11924752 1.12363961 2.4525121 ]
 [0.         0.         0.         ... 0.         0.         0.49777765]] & cached
activation = [[0.11452963 0.11261101 0.070

### Backprop Testing

In [12]:
Y_train_oneHot = one_hot(Y_train, maxExpected=9)
predictions = nt.backward_prop(input_values=X_train, trueOutput=Y_train_oneHot)

getting forward prop predictions
Provided input from cache
activation = [[5.38530324 4.57071658 0.28362176 ... 3.47333291 4.0276361  3.06415105]
 [0.5119491  2.47515365 0.         ... 2.67386402 1.63788273 0.        ]
 [0.         0.         0.         ... 0.         0.         0.        ]
 ...
 [3.60482738 0.         0.01530972 ... 6.36090621 3.95942382 3.28466385]
 [0.         0.         0.         ... 0.         0.         2.88317708]
 [1.41110897 0.7565865  0.         ... 0.         1.00644792 0.        ]] & cached
activation = [[0.79707908 1.14811668 0.         ... 2.10023655 0.99539751 0.40653972]
 [2.04753067 1.90278643 0.         ... 0.42258991 1.55846223 0.09055716]
 [0.         0.         0.         ... 1.04882467 0.0651973  0.56414718]
 ...
 [0.         0.         0.11350083 ... 0.         0.         0.        ]
 [1.54486532 1.83627676 0.         ... 2.11924752 1.12363961 2.4525121 ]
 [0.         0.         0.         ... 0.         0.         0.49777765]] & cached
activatio

In [13]:
learningRate = 0.5

In [14]:
telementary = 0

oldWeights = nt.output_layer.weights

Y_train_oneHot = one_hot(Y_train, maxExpected=9)
predictions = nt.backward_prop(input_values=X_train, trueOutput=Y_train_oneHot)

newWeights = nt.output_layer.weights

diffWeight = newWeights - oldWeights

print(diffWeight)

[[ 0.04781619  0.00595349  0.00704501  0.04998885 -0.00300287  0.08181268
  -0.00114323 -0.00234215  0.07187464 -0.00654329]
 [-0.02851183  0.03851538  0.00809333 -0.02422061  0.02542729 -0.06588512
  -0.00399128 -0.00731265  0.00786051  0.0119373 ]
 [ 0.06391461  0.09195657  0.03667291  0.08143166  0.00913149  0.07738329
   0.00387466  0.00446119  0.05195837  0.00161715]
 [ 0.05279666  0.0322814   0.03640766  0.0565544  -0.0007606   0.04187297
   0.00526429  0.00673634  0.048056   -0.0008378 ]
 [ 0.01277355 -0.03769351 -0.00627722 -0.01468997 -0.00830263 -0.034437
  -0.00361026 -0.00091876 -0.00089168 -0.01762817]
 [-0.11535952 -0.10020584 -0.08771097 -0.16534689 -0.01744377 -0.11784822
  -0.01107233 -0.00912338 -0.1285089  -0.0068546 ]
 [-0.04171493 -0.06551522 -0.01921415 -0.06260114 -0.00547124 -0.06407336
   0.01594849  0.0079549  -0.08227586  0.017372  ]
 [-0.00584451  0.0205351  -0.00322728  0.03009845 -0.00075193  0.05765138
  -0.00226119 -0.00160356  0.01450366 -0.00024477]
 [

In [15]:
print(predictions[0])

[[0.12476102 0.12450594 0.07358703 ... 0.01754357 0.08459271 0.02450812]
 [0.10018581 0.03892316 0.13957181 ... 0.14857635 0.13203048 0.16235255]
 [0.05839076 0.07863778 0.01939762 ... 0.01048137 0.0161731  0.03518837]
 ...
 [0.03008702 0.07500503 0.12190086 ... 0.07443209 0.04485464 0.05220022]
 [0.13372779 0.10999085 0.21180755 ... 0.25266259 0.20021159 0.17904962]
 [0.03070728 0.06034503 0.00814539 ... 0.00901936 0.00791292 0.00638979]]


### Gradient Descent on MNIST

In [16]:
# accuracySum = 0
# runCount = 0

telementary = 0

for it in range(250):
    Y_train_oneHot = one_hot(Y_train, maxExpected=9)
    predictedRAW = nt.backward_prop(input_values=X_train, trueOutput=Y_train_oneHot)[0]

    if(it % 10 == 0):
        print("iterations =", it)
        predictions = get_predictions(predictedRAW)
        print("Accuracy =", get_accuracy(predictions, Y_train))
    

telementary = 1

# # accuracy = accuracySum/len(X_train)
# accuracy = accuracySum/runCount



iterations = 0
[0 0 4 ... 0 0 4] [4 2 4 ... 9 7 8]
Accuracy = 0.12921951219512195
iterations = 10
[0 0 7 ... 0 7 1] [4 2 4 ... 9 7 8]
Accuracy = 0.3607560975609756
iterations = 20
[0 2 9 ... 9 7 8] [4 2 4 ... 9 7 8]
Accuracy = 0.34087804878048783
iterations = 30
[0 2 9 ... 4 7 3] [4 2 4 ... 9 7 8]
Accuracy = 0.4948780487804878
iterations = 40
[0 2 9 ... 4 7 3] [4 2 4 ... 9 7 8]
Accuracy = 0.6324146341463415
iterations = 50
[0 2 9 ... 4 7 8] [4 2 4 ... 9 7 8]
Accuracy = 0.6360243902439024
iterations = 60
[0 2 9 ... 4 7 8] [4 2 4 ... 9 7 8]
Accuracy = 0.6778048780487805
iterations = 70
[0 2 4 ... 4 7 5] [4 2 4 ... 9 7 8]
Accuracy = 0.6685853658536586
iterations = 80
[0 2 4 ... 4 7 5] [4 2 4 ... 9 7 8]
Accuracy = 0.7091951219512195
iterations = 90
[0 2 9 ... 4 7 5] [4 2 4 ... 9 7 8]
Accuracy = 0.7036097560975609
iterations = 100
[0 2 9 ... 9 7 8] [4 2 4 ... 9 7 8]
Accuracy = 0.7628780487804878
iterations = 110
[0 2 9 ... 9 7 8] [4 2 4 ... 9 7 8]
Accuracy = 0.7648048780487805
iterations = 

KeyboardInterrupt: 

## Simulator

In [46]:
wt = nt.output_layer.input_layers[0].input_layers[0].weights
# print(wt.shape)

# x1 = X_trainT[0:2]
x1 = X_trainT[0]

print("wt", wt.shape)
print("x1", x1.shape)
print("x1T", x1.T.shape)

act1 = np.matmul(wt, x1.T)
act1

# Playground

In [88]:
x = np.array([[1,2,3], [5,6,7]])
x.shape

(2, 3)

In [89]:
empt = np.array([[]])
empt

array([], shape=(1, 0), dtype=float64)

In [103]:

if(empt.shape[1] > 0):
    empt = np.concatenate((empt, x))
else:
    empt = x

print(empt)


[[1 2 3]
 [5 6 7]
 [1 2 3]
 [5 6 7]
 [1 2 3]
 [5 6 7]
 [1 2 3]
 [5 6 7]
 [1 2 3]
 [5 6 7]]


In [27]:
y = np.array([[-1,0,1]])

x + y

array([[0, 2, 4],
       [4, 6, 8]])

In [35]:
x = np.random.rand(4,2) - 0.5
print(x)

[[-0.21379049  0.318942  ]
 [ 0.33342107 -0.20130355]
 [-0.22479054  0.23240682]
 [-0.10478904 -0.04715419]]


In [40]:
def applyDerivActivationFn(input):
    return (input > 0)

In [41]:
applyDerivActivationFn(x)

array([[False,  True],
       [ True, False],
       [False,  True],
       [False, False]])

# Main

In [5]:
n1 = network(2,1)

in1 = np.array([0,1,2])
wtMat = np.array([[5,6,7],[8,9,10]])
# biases = np.array([5,25])
biases = np.array([0.5,0.25])

In [37]:
output_activations = np.matmul(wtMat, in1) + biases
print(output_activations)

[20.5  29.25]
