In [92]:
import numpy as np
import copy

Interesting details on weight initialization https://pouannes.github.io/blog/initialization/

Use Kaiming method 

"The Kaiming paper accordingly suggests to initialize the weights of layer l with a zero-mean Gaussian distribution with a standard deviation of sqrt(s/Nl) , and null biases."

Nl is the number of neurons in layer l

In [103]:
def relu(number):
    return max(0, number)

In [179]:
class Neuron:
    def __init__(self, activation=0, inNeurons=[], outNeurons=[], weights=[], bias=0):
        self.inNeurons = inNeurons
        self.outNeurons = outNeurons
        self.activation = activation
        self.weights = weights
        self.bias = bias
        
    def addIn(self, n, weight):
        self.inNeurons.append(n)
        self.weights.append(weight)
        
    def addOut(self, neuron):
        self.outNeurons.append(neuron)
    
    def getActivation(self):
        return self.activation
    
    def setActivation(self, a):
        self.activation = a
        return a


class Layer:
    def __init__(self, neurons=[], generate=True, size=0):
        self.neurons = neurons.copy()
        print("Initializing Layer")
        if generate:
            for i in range(size):
                self.neurons.append(Neuron())
        print("Created layer of size %d" % len(self.neurons))
        self.upLayer = None
        self.downLayer = None
        self.weights = None
        self.activations = np.array(list(map(Neuron.getActivation, self.neurons)))
    
    def setActivations(self, actList):
        """
        Sets activations of neurons in layer to actList
        """
        if len(actList) == len(self.activations):
            for i in range(len(self.neurons)):
                self.neurons[i].setActivation(actList[i])
            self.activations = actList
        else:
            raise Exception("Invalid activation list length")
                
    
    def getActivations(self):
        """
        Returns activation of neurons in layer
        """
        return self.activations
    
    def downstreamConnect(self, l, weights=None):
        """
        Connects neuron layer "l" to self 
        Connection is such that "l" is downstream in the neural network
        Unless specified, initializes weights randomly within Kaiming distribution
        layer1.upstreamConnect(layer2) is equivalent to layer2.downstreamConnect(layer1)
        IMPORTANT NOTE: WEIGHT MATRIX MIGHT NOT PROPERLY FOLLOW KAIMING
        INSTEAD THEY SAMPLE RANDOMLY FROM A GAUSSIAN DISTRIBUTION WITH VARIANCE DETERMINED BY KAIMING
        MAY CAUSE PROBLEMS WITH SMALL LAYERS, POSSIBLY NEEDS FIX LATER
        """
        upLayerNeurons = self.neurons
        downLayerNeurons = l.neurons
        
        if not weights:
            #  Create random weight initialization matrix
            #  Weights are picked randomly from gaussian of mean=0 and variance according to Kaiming
            weightVariance = np.sqrt(2/len(upLayerNeurons))
            weights = np.random.normal(scale=weightVariance, size=(len(downLayerNeurons), len(upLayerNeurons)))
            print("Created %d by %d weight matrix" % weights.shape)
            
        for d in range(len(downLayerNeurons)):
            for u in range(len(upLayerNeurons)):
                upLayerNeurons[u].addOut(downLayerNeurons[d])
                downLayerNeurons[d].addIn(upLayerNeurons[u], weights[d, u])
        
        l.weights = weights   
        l.upLayer = self
        self.downLayer = l
        
        return weights
                
    def upstreamConnect(self, l):
        """
        Connects neuron layer "l" to self 
        Connection is such that "l" is upstream in the neural network
        layer1.upstreamConnect(layer2) is equivalent to layer2.downstreamConnect(layer1)
        """
        for n in l.neurons:
            for m in self.neurons:
                m.addIn(n)
                n.addOut(m)
    
    def update(self):
        """
        Updates neuron activations based on weight matrix and activations of upstream layer
        """
        weightedSum = np.matmul(self.weights, self.upLayer.getActivations())
        newActivations = np.array(list(map(relu, weightedSum)))
        self.setActivations(newActivations)


In [181]:
layer1 = Layer(size=10)
layer1.setActivations(list(range(10)))
print("layer1 activations: %s" % layer1.activations)
layer2 = Layer(size=5)

w = layer1.downstreamConnect(layer2)
print(layer1.weights)
print(layer2.weights)
print("layer2 activations: %s" % layer2.activations)
layer2.update()
print("layer2 activations: %s" % layer2.activations)

Initializing Layer
Created layer of size 10
layer1 activations: [0, 1, 2, 3, 4, 5, 6, 7, 8, 9]
Initializing Layer
Created layer of size 5
Created 5 by 10 weight matrix
None
[[-1.47612657e-01  7.00266436e-02 -3.86857952e-01  2.44231871e-01
  -5.11581105e-01  6.29678468e-01  9.69320771e-02 -1.14095097e-01
   8.70955712e-02  5.25006423e-01]
 [-3.27654066e-02  8.35222746e-01 -4.33882671e-01  2.30364963e-01
  -6.07749374e-01 -2.08042401e-01 -3.43093467e-01  3.83549804e-02
   4.54905837e-01 -5.47256141e-01]
 [-1.02174289e-01  8.32014127e-01 -5.62639431e-01 -7.09635658e-01
  -2.57103406e-01  5.42895129e-01 -2.55470158e-01 -1.14638487e-01
   4.75743832e-02  1.30422402e-02]
 [-7.68870245e-01  6.21537055e-01 -2.20485065e-01 -3.37433382e-01
  -7.35873928e-01  2.19016727e-01  5.47458948e-01  7.02717176e-01
  -2.33518773e-01  3.25378269e-01]
 [ 3.57918967e-01  3.60815909e-01  3.49489333e-01  2.82763751e-01
   7.51037453e-01 -2.74775861e-01  3.91986022e-01  1.04634436e-01
  -6.26162139e-04  1.352297

In [133]:
print(layer1.neurons)
print(layer2.neurons)

[<__main__.Neuron object at 0x000002174CFB1EC8>, <__main__.Neuron object at 0x000002174CFB1AC8>, <__main__.Neuron object at 0x000002174CFB19C8>, <__main__.Neuron object at 0x000002174CFB14C8>, <__main__.Neuron object at 0x000002174CFB1E48>, <__main__.Neuron object at 0x000002174CFB1608>, <__main__.Neuron object at 0x000002174CFB1248>, <__main__.Neuron object at 0x000002174CFB11C8>, <__main__.Neuron object at 0x000002174CFB1388>, <__main__.Neuron object at 0x000002174CFB1B08>]
[<__main__.Neuron object at 0x000002174CFB1288>, <__main__.Neuron object at 0x000002174CFB1348>, <__main__.Neuron object at 0x000002174CFB1E88>, <__main__.Neuron object at 0x000002174CFB1148>, <__main__.Neuron object at 0x000002174CFB1088>]


In [110]:
print(w.shape)

(10, 5)
