In [1]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
import joblib
from tqdm import tqdm

In [2]:
import numpy as np
import struct
import os

# Load the MNIST data for this exercise
# mat_data contain the training and testing images or labels.
#   Each matrix has size [m,n] for images where:
#      m is the number of examples.
#      n is the number of pixels in each image.
#   or Each matrix has size [m,1] for labels contain the corresponding labels (0 to 9) where:
#      m is the number of examples.
def load_mnist(file_dir, is_images='True'):
    # Read binary data
    bin_file = open(file_dir, 'rb')
    bin_data = bin_file.read()
    bin_file.close()
    # Analysis file header
    if is_images:
        # Read images
        fmt_header = '>iiii'
        magic, num_images, num_rows, num_cols = struct.unpack_from(fmt_header, bin_data, 0)
    else:
        # Read labels
        fmt_header = '>ii'
        magic, num_images = struct.unpack_from(fmt_header, bin_data, 0)
        num_rows, num_cols = 1, 1
    data_size = num_images * num_rows * num_cols
    mat_data = struct.unpack_from('>' + str(data_size) + 'B', bin_data, struct.calcsize(fmt_header))
    mat_data = np.reshape(mat_data, [num_images, num_rows * num_cols])
    print('Load images from %s, number: %d, data shape: %s' % (file_dir, num_images, str(mat_data.shape)))
    return mat_data

# tranfer the image from gray to binary and get the one-hot style labels
def data_convert(x, y, m, k):
    x[x<=40]=0
    x[x>40]=1
    ont_hot_y = np.zeros((m,k))    #(60000,10)
    for t in np.arange(0,m):
        ont_hot_y[t,y[t]]=1
    ont_hot_y=ont_hot_y.T #(10,60000)
    return x, ont_hot_y

# call the load_mnist function to get the images and labels of training set and testing set
def load_data(mnist_dir, train_data_dir, train_label_dir, test_data_dir, test_label_dir):
    print('Loading MNIST data from files...')
    
    print(os.path.join(mnist_dir, train_data_dir))
    train_images = load_mnist(os.path.join(mnist_dir, train_data_dir), True)
    train_labels = load_mnist(os.path.join(mnist_dir, train_label_dir), False)
    test_images = load_mnist(os.path.join(mnist_dir, test_data_dir), True)
    test_labels = load_mnist(os.path.join(mnist_dir, test_label_dir), False)
    return train_images, train_labels, test_images, test_labels


In [3]:
from LeNet import LeNet
from model import LeNet5

model = LeNet5()
model1 = LeNet()

In [37]:
test = np.random.normal(size=(1,1,28,28))

In [38]:
a = Conv(1,6,5)

In [39]:
a.forward(test)

TypeError: Conv.conv2d() missing 1 required positional argument: 'kernel'

In [4]:
mnist_dir = "./mnist_data/"
train_data_dir = "train-images.idx3-ubyte"
train_label_dir = "train-labels.idx1-ubyte"
test_data_dir = "t10k-images.idx3-ubyte"
test_label_dir = "t10k-labels.idx1-ubyte"

In [5]:
train_images, train_labels, test_images, test_labels = load_data(mnist_dir, train_data_dir, train_label_dir, test_data_dir, test_label_dir)
print("Got data. ") 

Loading MNIST data from files...
./mnist_data/train-images.idx3-ubyte
Load images from ./mnist_data/train-images.idx3-ubyte, number: 60000, data shape: (60000, 784)
Load images from ./mnist_data/train-labels.idx1-ubyte, number: 60000, data shape: (60000, 1)
Load images from ./mnist_data/t10k-images.idx3-ubyte, number: 10000, data shape: (10000, 784)
Load images from ./mnist_data/t10k-labels.idx1-ubyte, number: 10000, data shape: (10000, 1)
Got data. 


In [None]:
for i in range(2):
    img = np.reshape(train_images [i, :], (28, 28))
    label = np.argmax(train_images [i, :])
    plt.matshow(img, cmap = plt.get_cmap('gray'))
    plt.figure(figsize=(1,1))
    plt.show()


In [40]:
class Relu:
    def __init__(self):
        self.x = None
    
    def forward(self, x):
        self.x = x
        return np.maximum(0, x)
    
    def backprop(self,grad):
        return np.where(self.x > 0, 1, 0) * grad

In [69]:
class Conv:
    def __init__(self, in_channels, out_channels, filter_size, stride=1, padding=0):
        """
        params: in_channels: the number of channels of the input image
                out_channels: the number of channels of the output image
                filter_size:(x,y) the size of the filter
                stride: the stride of the filter
                padding: the padding of the filter
        """
        self.input = None
        self.output = None
        self.in_channels = in_channels
        self.out_channels = out_channels
        self.filter_size = filter_size
        self.stride = stride
        self.padding = padding
        self.Weight = np.random.normal(scale=1, size=(out_channels,in_channels,filter_size,filter_size))
        #第一个维度是输出通道数，对应着有这么多个卷积核；第二个维度是输入通道数，这是因为卷积核需要负责讲in个channel的输入变成out个channel的输出
        self.W_grad = np.zeros_like(self.Weight)
        self.Bias = np.zeros(out_channels) #每个卷积核有一个偏置参数
        self.B_grad = np.zeros_like(self.Bias)
    
    def conv2d(self,input,kernel,padding,Bias = True):
        N,C,H,W = input.shape
        if padding!= 0:
            input= np.pad(input, ((0,0),(0,0),(padding, padding), (padding, padding)),
                                     'constant',constant_values = (0,0))
        num_kernels,_,filter_size,_ = kernel.shape
        # 计算输出特征图的宽度和高度
        output_W = (W + 2*padding - filter_size) // self.stride + 1
        output_W = int(output_W)
        output_H = (H + 2*padding - filter_size) // self.stride + 1
        output_H = int(output_H)

        # 初始化输出矩阵
        output_matrix = np.zeros((N, num_kernels, output_H, output_W))
        for h in range(output_H):
            for w in range(output_W):
                h_start = h * self.stride
                h_end = h_start + filter_size
                w_start = w * self.stride
                w_end = w_start + filter_size
                input_region = input[:, :, h_start:h_end, w_start:w_end].reshape((N, 1, self.in_channels, filter_size, filter_size))
                output_matrix[:, :, h, w] = np.sum(input_region * kernel, axis=(2, 3, 4))
                if Bias is True:
                    output_matrix[:, :, h, w] += self.Bias
        return output_matrix
    
                    
    def forward(self, X):
        N,C,H,W = X.shape  
        # N for Batch, C for Channels, W for Width, H for Height
        assert(C == self.in_channels)
        self.input = X.copy()
        self.output = self.conv2d(X,self.Weight,self.padding)
        return self.output
    
    
    def backprop(self,grad):
        N, C, H, W = self.input.shape
        _, _, output_H, output_W = grad.shape
        reverse_kernel = self.Weight.transpose((1,0,2,3))
        reverse_kernel  = np.flip(reverse_kernel,axis=(2,3))
        grad_next =  self.conv2d(grad,reverse_kernel,self.filter_size-1,Bias=False)
        self.W_grad = np.zeros_like(self.Weight)
        for h in range(output_H):
            for w in range(output_W):
                tmp_back_grad = grad[:, :, h, w].T.reshape((self.out_channels, 1, 1, 1, N))
                tmp_x = self.input[:, :, h * self.stride:h * self.stride + self.filter_size, w * self.stride:w * self.stride + self.filter_size].transpose((1, 2, 3, 0))
                self.W_grad += np.sum(tmp_back_grad * tmp_x, axis=4)

        self.B_grad = np.sum(grad, axis=(0, 2, 3))

        return grad_next
    
    
    def update_params(self, alpha):
        self.Weight -= alpha * self.W_grad
        self.Bias -= alpha * self.B_grad
        

In [43]:
a = np.random.normal(size=(1,1,28,28))

In [70]:
b = Conv(1,6,5)

In [72]:
b.forward(a) == b.forward1(a)

array([[[[ True,  True,  True, ...,  True,  True,  True],
         [ True,  True,  True, ...,  True,  True,  True],
         [ True,  True,  True, ...,  True,  True,  True],
         ...,
         [ True,  True,  True, ...,  True,  True,  True],
         [ True,  True,  True, ...,  True,  True,  True],
         [ True,  True,  True, ...,  True,  True,  True]],

        [[ True,  True,  True, ...,  True,  True,  True],
         [ True,  True,  True, ...,  True,  True,  True],
         [ True,  True,  True, ...,  True,  True,  True],
         ...,
         [ True,  True,  True, ...,  True,  True,  True],
         [ True,  True,  True, ...,  True,  True,  True],
         [ True,  True,  True, ...,  True,  True,  True]],

        [[ True,  True,  True, ...,  True,  True,  True],
         [ True,  True,  True, ...,  True,  True,  True],
         [ True,  True,  True, ...,  True,  True,  True],
         ...,
         [ True,  True,  True, ...,  True,  True,  True],
         [ True,  True,  T

In [68]:
b.forward1(a)

array([[[[-5.41934138e+00, -6.16516581e+00,  4.68784192e+00, ...,
           1.44542758e+00,  5.98582228e-01, -4.38124030e+00],
         [-4.42667188e+00, -2.16845807e+00,  7.21180638e-01, ...,
           3.80599048e+00, -5.28166068e+00,  1.97568536e+00],
         [ 3.99127564e+00,  5.89520639e+00, -7.09542100e-01, ...,
          -3.06690332e+00, -6.62753130e+00, -5.73501170e+00],
         ...,
         [ 2.47267156e+00, -6.37388209e+00,  7.16148801e+00, ...,
           1.22034149e+01, -3.42501237e+00,  4.01261293e+00],
         [-7.41248380e+00, -1.83466029e-01, -7.31782129e+00, ...,
           8.94719624e+00, -8.13719368e+00,  1.01133808e+01],
         [-5.31921758e+00,  3.53231372e+00, -8.14025954e+00, ...,
          -9.72576497e+00,  8.16103446e+00,  1.19593904e-01]],

        [[-1.13918624e+00, -4.45870321e-01, -8.56330417e+00, ...,
          -1.91534743e-01, -4.93611111e+00,  7.88504712e+00],
         [ 8.33577642e+00,  1.03483981e+01,  2.78805904e+00, ...,
          -7.98600078e

In [584]:
class MaxPool:
    def __init__(self,pool_size=None):
        if pool_size is None:
            pool_size = 2
        self.pool_size = pool_size
        self.output = None
        self.input = None
        self.mask = None
    
    def forward(self, X):
        N,C,W,H = X.shape 
        self.input = X.copy()
        output_h = H // self.pool_size
        output_w = W // self.pool_size
        self.output = np.zeros((N,C,output_h,output_w))
        self.mask = np.zeros_like(X)  # 初始化最大值位置的掩码
        for i in range(output_h):
            for j in range(output_w):
                pool_window = X[:, :, i * self.pool_size:(i + 1) * self.pool_size, j * self.pool_size:(j + 1) * self.pool_size]
                self.mask[:, :, i * self.pool_size:(i + 1) * self.pool_size, j * self.pool_size:(j + 1) * self.pool_size] = (pool_window == np.max(pool_window, axis=(2, 3), keepdims=True))
                self.output[:,:,i,j] = np.max(X[:,:, i*self.pool_size:(i+1)*self.pool_size, j*self.pool_size:(j+1)*self.pool_size], axis=(2,3))
        return self.output
    
    def backprop(self, back_grad):
        # return back_grad[:, :, :, :, np.newaxis, np.newaxis] * self.mask
        N, C, H,W = back_grad.shape
        grad_next = np.zeros_like(self.input)

        for i in range(H):
            for j in range(W):
                # 获取在最大池化层前向传播时的最大值位置
                grad_next[:, :, i*self.pool_size:(i+1)*self.pool_size, j*self.pool_size:(j+1)*self.pool_size]= \
                self.mask[:,:, i*self.pool_size:(i+1)*self.pool_size, j*self.pool_size:(j+1)*self.pool_size]* back_grad[:, :, i, j][:, :, None, None]

        return grad_next
        

In [586]:
class Linear:
    def __init__(self,input_size,output_size) :
        self.Weight = np.random.normal(scale=1, size=(input_size, output_size))
        self.W_grad = None
        self.Bias = np.zeros(output_size)
        self.B_grad = None
        self.input = None
        
    
    def forward(self,X):
        self.input = X.copy()
        return np.dot(X,self.Weight) + self.Bias
    
    def backprop(self,back_grad):
        self.W_grad = np.dot(self.input.T, back_grad)
        self.B_grad = np.sum(back_grad, axis=0)

        # 计算输入的梯度，用于传递给上一层
        grad_next = np.dot(back_grad, self.Weight.T)

        return grad_next
        
    
    def update_params(self, alpha):
        # 使用梯度下降更新权重和偏置
        self.Weight -= alpha * self.W_grad
        self.Bias -= alpha * self.B_grad
        

In [592]:
class LeNet:
    def __init__(self):
        self.Conv1 = Conv(1,6,5)    # 卷积层1，输入通道为1，6个5*5的卷积核,输出N*6*24*24
        self.Relu1 = Relu() 
        self.Pool1 = MaxPool(2)     # 池化层1，2*2大小的最大池化,输出N*6*12*12
        self.Conv2 = Conv(6,16,5)   # 卷积层2，6输入通道，16个5*5的卷积核,输出N*16*8*8
        self.Relu2 = Relu()
        self.Pool2 = MaxPool(2)     # 池化层2，2*2大小的最大池化，输出N*16*4*4
        self.Fc1 = Linear(16*4*4,120)   # 全连接层1，16*4*4的输入，120个神经元
        self.Relu3 = Relu()
        self.Fc2 = Linear(120,84)       # 全连接层2，120输入，84个神经元
        self.Relu4 = Relu()
        self.Output = Linear(84,10)     # 输出层，84输入，10个输出

    def fit(self,x,batch_size):
        x = x.reshape((batch_size,1,28,28))
        x = self.Pool1.forward(self.Relu1.forward(self.Conv1.forward(x)))
        x = self.Pool2.forward(self.Relu2.forward(self.Conv2.forward(x)))
        x = x.reshape(batch_size,-1)
        x = self.Relu3.forward(self.Fc1.forward(x))
        x = self.Relu4.forward(self.Fc2.forward(x))
        x = self.Output.forward(x)
        return x
    
    def back_prop(self,grad):
        #print(1,grad.sum())
        grad = self.Output.backprop(grad)
        #print(2,grad.sum())
        grad = self.Relu4.backprop(grad)
        #print(3,grad.sum())
        grad = self.Fc2.backprop(grad)
        #print(4,grad.sum())
        grad = self.Relu3.backprop(grad)
        #print(5,grad.sum())
        grad = self.Fc1.backprop(grad)
        #print(6,grad.sum())
        grad = grad.reshape(grad.shape[0],16,4,4)
        grad = self.Pool2.backprop(grad)
        #print(7,grad.sum())
        grad = self.Relu2.backprop(grad)
        #print(8,grad.sum())
        grad = self.Conv2.backprop(grad)
        #print(9,grad.sum())
        grad = self.Pool1.backprop(grad)
        #print(10,grad.sum())
        grad = self.Relu1.backprop(grad)
        #print(grad.sum())
        grad = self.Conv1.backprop(grad)
        #print(grad.sum())

    def update(self,alpha):
        self.Output.update_params(alpha)
        self.Fc2.update_params(alpha)
        self.Fc1.update_params(alpha)
        self.Conv2.update_params(alpha)
        self.Conv1.update_params(alpha)
    
    def get_params(self):
        return [self.Conv1.Weight, self.Conv1.Bias, self.Conv2.Weight, self.Conv2.Bias, self.Fc1.Weight, self.Fc1.Bias, 
                self.Fc2.Weight, self.Fc2.Bias, self.Output.Weight, self.Output.Bias]

    def set_params(self, params):
        self.Conv1.Weight = params[0]
        self.Conv1.Bias = params[1]
        self.Conv2.Weight = params[2]
        self.Conv2.Bias = params[3]
        self.Fc1.Weight = params[4]
        self.Fc1.Bias = params[5]
        self.Fc2.Weight = params[6]
        self.Fc2.Bias = params[7]
        self.Output.Weight = params[8]
        self.Output.Bias = params[9]


In [605]:
from model import LeNet5

In [606]:
model = LeNet5()

In [8]:
model.forward(x[1].reshape(1,1,28,28))

array([[ 3.81404400e-11, -2.37234934e-11, -4.91745721e-12,
         2.28259650e-11, -8.64045514e-13,  8.00566744e-12,
         6.03567807e-14,  4.20152268e-11, -2.87800620e-11,
        -2.08701756e-11]])

In [10]:
model1.fit(x[1],1)

array([[ 3.81404400e-11, -2.37234934e-11, -4.91745721e-12,
         2.28259650e-11, -8.64045514e-13,  8.00566744e-12,
         6.03567807e-14,  4.20152268e-11, -2.87800620e-11,
        -2.08701756e-11]])

In [11]:
def softmax(y_pred,y):
    batch_size ,_ = y_pred.shape
    y_pred = y_pred / y_pred.max(axis=1)[:,None] #防止溢出
    y_pred = np.exp(y_pred)
    y_sum = y_pred.sum(axis = 1)
    y_pred = y_pred/y_sum[:,None]
    loss = -np.log(y_pred).T * y
    loss = loss.sum()/batch_size
    grad = y_pred - y.T
    return loss,grad,y_pred

In [14]:
loss,grad,y_pred = softmax(t,y.T[1])

In [15]:
model.backward(grad)

In [29]:
(model.conv1.W['grad'] - model1.Conv1.W_grad).sum()

5.327823343409133e-09

In [27]:
model1.Conv1.W_grad

array([[[[-1.22502720e-09, -1.73214161e-09, -1.15539599e-09,
          -1.07133327e-09, -9.31249718e-10],
         [-1.31408023e-09, -1.53573719e-09, -1.73652196e-09,
          -1.87546635e-09, -7.99641708e-10],
         [-1.49131644e-09, -1.11743224e-09, -1.73763816e-09,
          -1.00624426e-09, -2.48166321e-10],
         [-1.04192977e-09, -6.66840874e-10, -2.48128432e-10,
          -1.22048931e-09,  1.51851126e-10],
         [ 4.24930277e-10,  6.76333302e-10, -2.57448187e-11,
           2.16812417e-10, -1.32809398e-10]]],


       [[[-1.57758730e-09, -1.92809391e-09, -2.85020775e-09,
          -2.28323365e-09, -1.12391636e-09],
         [-1.73838534e-09, -2.64434623e-09, -3.07336198e-09,
          -1.02564789e-09, -1.66584582e-10],
         [-1.84580698e-09, -1.61343339e-09, -2.35056536e-09,
          -2.20845994e-10,  1.84900211e-09],
         [-1.40295927e-09, -1.28144245e-09, -1.70415394e-09,
           1.56384184e-09,  2.11178317e-09],
         [-1.01276555e-09, -1.59218454e-09

In [16]:
model1.back_prop(grad)

In [13]:
t = model.forward(x[1].reshape(1,1,28,28))

In [608]:
model1 = LeNet()

In [9]:
model1.set_params(model.get_params())

get


In [None]:
model.fc3.W['value'] == model1.Output.Weight

In [11]:
model1.fit(x[1],1)

array([[ 1.13605291e-11,  2.81847630e-11,  7.09720589e-12,
        -3.01094245e-11, -2.17618869e-11, -2.36586596e-11,
         2.75163649e-12,  1.94782353e-11, -4.14228129e-11,
        -1.23027567e-11]])

In [264]:
model = LeNet()

In [14]:
def softmax(y_pred,y):
    batch_size ,_ = y_pred.shape
    y_pred = y_pred / y_pred.max(axis=1)[:,None] #防止溢出
    y_pred = np.exp(y_pred)
    y_sum = y_pred.sum(axis = 1)
    y_pred = y_pred/y_sum[:,None]
    loss = -np.log(y_pred).T * y
    loss = loss.sum()/batch_size
    grad = y_pred - y.T
    return loss,grad,y_pred


In [7]:
x,y = data_convert(train_images, train_labels,60000,10)

In [180]:
def shuffle_batch(batch_size):

    index = np.random.randint(0,len(x),batch_size)
    return x[index],y.T[index].T

In [182]:
x_t,y_t = shuffle_batch(256)

In [591]:
model = LeNet()
for i in range(10):
    x_test,y_test = shuffle_batch(256)
    res = model.fit(x_test,256)
    loss,grad,y_pred = softmax(res,y_test)
    print(loss)
    model.back_prop(grad)
    model.update(0.001)
    
    

2.4922092519987555
2.511818798021163
2.7562971539706056
2.4833074903326406
2.657285158486305
2.568274031734236
2.503898026234402


  output_matrix[:, :, h, w] += np.sum(input_region * kernel, axis=(2, 3, 4))
  return ufunc.reduce(obj, axis, dtype, out, **passkwargs)
  return np.where(self.x > 0, 1, 0) * grad


nan
nan
nan


In [320]:
x_test,y_test = shuffle_batch(x,y,256)
res = model.fit(x_test,256)
loss,grad,y_pred = softmax(res,y_test)

In [321]:
y_pred

array([[nan, nan, nan, ..., nan, nan, nan],
       [nan, nan, nan, ..., nan, nan, nan],
       [nan, nan, nan, ..., nan, nan, nan],
       ...,
       [nan, nan, nan, ..., nan, nan, nan],
       [nan, nan, nan, ..., nan, nan, nan],
       [nan, nan, nan, ..., nan, nan, nan]])

In [278]:
test = np.random.randint(0,60000,256)

In [280]:
x[test].shape

(256, 784)

In [221]:
model = LeNet()

In [222]:
t = model.fit(train_images[0:500],500)

In [223]:
y.shape

(10, 60000)

In [234]:
loss,grad,y_pred = softmax(t,y[:,0:500])

In [269]:
model.back_prop(grad)

In [235]:
grad.sum()

-1.3322676295501878e-15

In [236]:
grad = model.Output.backprop(grad)

In [238]:
grad.shape

(500, 84)

In [239]:
grad = model.Relu4.backprop(grad)

In [241]:
grad = model.Fc2.backprop(grad)

In [242]:
grad = model.Relu3.backprop(grad)

In [243]:
grad = model.Fc1.backprop(grad)

In [244]:
grad = grad.reshape(grad.shape[0],16,4,4)

In [245]:
grad = model.Pool2.backprop(grad)

In [246]:
grad = model.Relu2.backprop(grad)

In [256]:
grad.sum()


43847.29020596151

In [248]:
grad.shape

(500, 16, 8, 8)

In [257]:
grad_T = grad.copy()

In [258]:
grad_T = model.Conv2.backprop(grad_T)

In [260]:
grad_T.sum()

0.0

In [264]:
test = np.random.normal(size=(4,4,4,4))

In [245]:
grad = model.Pool1.backprop(grad)

In [246]:
grad = model.Relu1.backprop(grad)

In [247]:
grad.shape

(500, 6, 24, 24)

In [248]:
grad = model.Conv1.backprop(grad)

UFuncTypeError: Cannot cast ufunc 'add' output from dtype('float64') to dtype('int32') with casting rule 'same_kind'

In [131]:
mi = model.Pool2.input[:, :,2:4, 2:4].reshape(500,16, -1).argmax(axis=2)
                

In [133]:
mi.shape

(500, 16)

In [95]:
mt = model.Pool2.input[:,:,2:4,2:4].reshape(500,16,4)

In [102]:
mt[np.arange(500)[:, np.newaxis], np.arange(16), mi].shape

(500, 16)

In [85]:
model.Pool2.input[:,:,2:4,2:4]

array([[[[ 9.86711572,  0.        ],
         [ 1.51915339, 21.21100715]],

        [[ 7.65888681, 30.15070413],
         [ 0.        , 33.16707785]],

        [[30.37715686, 33.70428073],
         [32.94276686, 32.43304771]],

        ...,

        [[ 0.        ,  0.        ],
         [ 7.17809404,  0.        ]],

        [[16.20247357,  0.        ],
         [29.6646568 , 11.52468127]],

        [[11.73594983,  7.67706397],
         [11.92821407,  8.95725976]]],


       [[[ 6.17965717,  0.        ],
         [ 0.        ,  0.        ]],

        [[ 0.        ,  7.56332347],
         [15.63301541, 19.54469505]],

        [[18.04512809, 28.56861855],
         [ 0.        , 47.38906657]],

        ...,

        [[ 2.83754295,  0.        ],
         [ 0.        ,  1.54956409]],

        [[ 0.        ,  3.89271246],
         [ 0.        ,  0.        ]],

        [[ 0.        , 14.75062209],
         [ 4.23649145, 23.78524426]]],


       [[[17.66890572, 15.24785235],
         [ 8.706252

In [108]:
grad[:,:,2,3]

array([[-1.79257543e+01, -2.53911295e+01,  1.23671979e+01, ...,
        -1.59201843e+00, -7.01397059e+01, -3.28906994e+01],
       [ 2.98740904e+01, -3.12921181e+00,  2.88165821e+01, ...,
         1.48594720e+00,  7.61223499e+01, -2.62200688e+01],
       [ 7.06003224e+00, -2.41324059e+01,  1.92353741e+01, ...,
         1.57584522e+01, -3.25260191e+01, -3.00504193e+01],
       ...,
       [ 3.09714979e+01,  1.05840733e+02,  5.71238819e+01, ...,
        -4.31135078e+00,  5.47352759e+01,  6.78577106e+01],
       [-1.37619080e+02,  9.58543493e+01,  6.92735236e+01, ...,
        -4.34110019e+01, -3.97469715e+01, -1.88472771e-02],
       [-3.60705859e+01, -2.15855608e+01,  6.22782318e+01, ...,
         9.22443336e+00, -4.72492595e+01,  5.16671019e+01]])

In [86]:
model.Pool2.input[:,:,2:4,2:4].reshape(model.Pool2.input.shape[0], model.Pool2.input.shape[1], 4).max(axis=2)

SyntaxError: 'function call' is an illegal expression for augmented assignment (1860721356.py, line 1)

In [21]:
grad = model.Pool2.backprop(grad)

ValueError: not enough values to unpack (expected 4, got 2)

In [433]:
m, n = train_images.shape #(60000,784),28*28 image
    # data processing
x, y = data_convert(train_images, train_labels, m, 10) # x:[m,n], y:[1,m]