In [1]:
import numpy as np

In [9]:
class DenseLayer(object):
    
    def __init__(self, input_dim, output_dim, use_bias=True):
        sq = np.sqrt(1. / input_dim)
        print(sq)
        self.use_bias = use_bias
        self.weights = np.random.uniform(-sq, sq, (output_dim, input_dim))
        if use_bias == True:
            self.bias = np.random.uniform(-sq, sq, output_dim)
        else:
            self.bias = np.zeros((output_dim))
        
    def forward(self, X_in):
        return np.tensordot(X_in, self.weights.T, axes=((-1), (0))) + self.bias
        
    def backward(self, dEdY, X_in):
        # dEdW = dEdY * dYdW = dEdY * X
        # dEdb = dEdY * dYdb = dEdY
        # dEdX = dEdY * dYdX = dEdY * W
        axis = tuple(range(len(X_in.shape) - 1))
        dEdW = np.tensordot(dEdY,X_in, axes=((axis), (axis)))
        dEdB = np.sum(dEdY, axis=(axis))
        dEdX = np.tensordot(dEdY, self.weights, axes=(-1, 0))
        
        return dEdX, dEdW, dEdB
    
    def refresh(self, dEdW, dEdB, learning_rate):
        self.weights = self.weights - learning_rate * dEdW
        if self.use_bias == True:
            self.bias = self.bias - learning_rate * dEdB

In [4]:
dense = DenseLayer(3, 1, True)
X = np.array([[0.2, 0.5, 0.3],
[0.2, 0.4, 0.4],
[0.3, 0.1, 0.6],
[0.2, 0.3, 0.5],
[0.5, 0.3, 0.2]])
Y = np.array([[1.], [1.], [1.], [1.], [1.]])

num_iter = 1

for i in range(num_iter):
    Y_c= dense.forward(X)
    dEdY = Y - Y_c
    _, dEdW, dEdB = dense.backward(dEdY, X)
    dense.refresh(dEdW, dEdB, 0.1)

0.5773502691896257
(5, 3)


In [24]:
dense = DenseLayer(5,1,True)
X = np.array([[[-0.8807276 ,  0.35987167,  0.02073506, -0.9615799 ,
          0.50550362],
        [-0.93750772,  0.46892908,  0.65712107, -0.92156692,
          0.81547692],
        [-0.94841669,  0.35337583,  0.13251452, -0.92171502,
          0.96708332]],

       [[-0.98389102,  0.55071545,  0.50947363, -0.99544429,
          0.94198477],
        [-0.88292272,  0.46245851, -0.09000733, -0.72606425,
          0.81062736],
        [-0.58283391, -0.40435749,  0.48705154, -0.8502781 ,
          0.20791288]]])

Y = np.array([[[1],[1],[1]],[[1],[1],[1]]])

num_iter = 10
l = []
for i in range(num_iter):
    Y_c= dense.forward(X)
    dEdY = Y - Y_c
    dEdX, dEdW, dEdB = dense.backward(dEdY, X)
    dense.refresh(dEdW, dEdB, 0.01)
    print(dEdW)
    print(dense.weights)

0.4472135954999579
[[-3.58117345  1.26396149  1.04113282 -3.65308963  2.93671066]]
[[ 0.11673595  0.04498842  0.24055984 -0.26035724 -0.11388993]]
[[-4.28575571  1.53147037  1.27251577 -4.37003311  3.52568363]]
[[ 0.1595935   0.02967372  0.22783469 -0.21665691 -0.14914677]]
[[-5.1293508   1.85186057  1.54962808 -5.22839676  4.23092535]]
[[ 0.21088701  0.01115511  0.2123384  -0.16437294 -0.19145602]]
[[-6.1393865   2.23556647  1.88149242 -6.25608313  5.07537652]]
[[ 0.27228088 -0.01120055  0.19352348 -0.10181211 -0.24220979]]
[[-7.34870223  2.6950812   2.27891169 -7.48650001  6.08650415]]
[[ 0.3457679  -0.03815137  0.17073436 -0.02694711 -0.30307483]]
[[-8.79661677  3.24536283  2.75482023 -8.9596467   7.29719458]]
[[ 0.43373407 -0.07060499  0.14318616  0.06264936 -0.37604677]]
[[-10.53020662   3.90432075   3.32470439 -10.72341449   8.74682278]]
[[ 0.53903613 -0.1096482   0.10993912  0.1698835  -0.463515  ]]
[[-12.60583667   4.69339793   4.00710601 -12.83514385  10.48253255]]
[[ 0.665094