In [1]:
import numpy as np
np.random.seed(1)

In [2]:
class Module:
    def __init__(self,module_sequence):
        self.module_sequence = module_sequence
        self.retain_forward_graph = []
    
    def clear_grad(self):
        self.retain_forward_graph = []
    
    def forward(self):
        pass

class Linear:
    def __init__(self,input_dim,output_dim):
        self.input_dim = input_dim
        self.output_dim = output_dim
        self.weight = np.random.randn(output_dim,input_dim)
        self.bias = np.random.randn(output_dim)
    
    def forward(self,x):
        weight_output = np.dot(self.weight,x)
        output = weight_output+self.bias
        return output
    
    def backward(self,retain_forward,learning_rate,loss,):
        bias_delta = loss
        weight_delta = np.dot(np.transpose(np.expand_dims(loss, 0)),np.expand_dims(retain_forward, 0))
        self.weight += learning_rate*weight_delta
        self.bias += learning_rate*bias_delta
        new_loss = np.sum(np.transpose(self.weight)*loss,axis=0)
        return new_loss
    
class Sigmoid:
    def __init__(self):
        pass
    
    def forward(self,x):
        return 1 / (1 + np.exp(-x))
    
    def backward(self,retain_forward,*args):
        return retain_forward*(1-retain_forward)
    
    
class ReLU:
    def __init__(self):
        self.selected = None
    
    def forward(self,x):
        self.selected = None
        self.selected = np.where(x>0,True,False)
        return np.where(x>0,x,0)
    
    def backward(self,retain_forward,*args):
        return np.where(self.selected,retain_forward,0)

In [3]:
class BPNN(Module):
    def __init__(self,module_sequence):
        super().__init__(module_sequence)
        
    def forward(self,x):
        for module in module_sequence:
            self.retain_forward_graph.append(x)
            x = module.forward(x)
        return x
    
    def __call__(self,x):
        return self.forward(x)

In [4]:
class SGD:
    def __init__(self,model,learning_rate,loss_fn):
        self.learning_rate = learning_rate
        self.loss_fn = loss_fn
        self.model = model
        
    def step(self):
        loss = self.loss_fn.gradient
        for idx,module in enumerate(reversed(self.model.module_sequence),start=1):
            retain_output = self.model.retain_forward_graph[len(model.retain_forward_graph)-idx]
            loss = module.backward(retain_output,self.learning_rate,loss) 
    
    def clear_grad(self):
        self.model.clear_grad()

In [5]:
class MSELoss:
    def __init__(self,reduction="mean"):
        self.reduction = reduction
        self.gradient = None
    
    def __call__(self,x,target):
        self.gradient = -x + target
        if self.reduction=="mean":
            return 0.5 * np.mean((target - x) ** 2)
        elif self.reduction=="sum":
            return 0.5 * np.sum((target - x) ** 2)

In [6]:
x = np.random.randn(18*18)
target = np.array([-5,12,4,5,7])
lr = 1
step = 10

In [7]:
module_sequence = [
    Linear(18*18,8*8),
    Sigmoid(),
    Linear(8*8,5),
]

In [8]:
model = BPNN(module_sequence)
loss_fn = MSELoss()
optim = SGD(model,lr,loss_fn)

In [9]:
for i in range(step):
    output = model(x)
    print(output)
    loss = loss_fn(output,target)
    optim.step()
    optim.clear_grad()

[-3.92631494 -5.16808013  5.45055228  5.8279906   2.34512934]
[-2.67208952 28.17708479 -4.11494535 -2.48718072  6.1132957 ]
[-4.33112791  1.32384249  6.55699638  5.50181609  5.03508597]
[-5. 12.  4.  5.  7.]
[-5. 12.  4.  5.  7.]
[-5. 12.  4.  5.  7.]
[-5. 12.  4.  5.  7.]
[-5. 12.  4.  5.  7.]
[-5. 12.  4.  5.  7.]
[-5. 12.  4.  5.  7.]


  return 1 / (1 + np.exp(-x))
  new_loss = np.sum(np.transpose(self.weight)*loss,axis=0)
  return ufunc.reduce(obj, axis, dtype, out, **passkwargs)
  return retain_forward*(1-retain_forward)


In [10]:
x = np.random.randn(3,3,3)
y = np.random.randn(3,3,3)
np.dot(x,y)

array([[[[ 0.63640447,  1.01621261,  0.43774848],
         [-0.47185267,  0.86775743,  0.66854956],
         [-2.27093016, -0.42945002,  0.19162493]],

        [[ 0.35389845,  1.18698968,  1.05602888],
         [-0.42719643, -0.67942703,  0.83295452],
         [-0.79880131,  0.24355193,  0.54970751]],

        [[ 0.72544038,  0.37172429,  3.31349744],
         [ 0.3471035 , -3.2019965 ,  1.17825467],
         [ 0.04018322, -0.22598931,  0.90885128]]],


       [[[ 0.44673656, -0.90781407,  1.79864027],
         [ 0.73391993, -1.67988974,  0.20607687],
         [ 0.2172338 , -0.73812071,  0.15450631]],

        [[ 0.35786555,  2.62989873, -0.11409718],
         [-1.34782512,  1.13531223,  1.10905464],
         [-2.29362658,  0.66162576,  0.58159607]],

        [[ 1.12531504, -0.2747098 ,  2.8821374 ],
         [ 0.56238331, -1.6887801 ,  0.89843189],
         [-1.53153329, -1.2713458 ,  0.43026626]]],


       [[[-1.76937648, -0.23312321,  0.01346761],
         [ 0.24521459, -4.07870712