In [37]:
import numpy as np
import torch
import torch.nn.functional as F
import torchvision.datasets as datasets #加载数据
import torchvision.transforms as transforms #数据增强
import matplotlib.pyplot as plt
class Conv_2D():
    def __init__(self, input_dim, output_dim, ksize=3,
                 stride=1, padding=(0,0), dilataion=None):
        self.input_dim = input_dim
        self.output_dim = output_dim
        self.ksize = ksize
        self.stride = stride
        self.padding = padding #(1,2) 左边 和 上边 填充一列 右边和下边填充2列
        self.dilatation = dilataion
        self.output_h = None
        self.output_w = None

        self.patial_w = None
        # 产生服从正态分布的多维随机随机矩阵作为初始卷积核
        # OCHW
        # self.conv_kernel = np.random.randn(self.output_dim, self.input_dim, self.kernelsize, self.kernelsize)  # O*I*k*k
        self.grad = np.zeros((self.output_dim, self.ksize, self.ksize, self.input_dim), dtype=np.float64)
        # 产生服从正态分布的多维随机随机矩阵作为初始卷积核
        self.input = None
        # OCh,w
        self.weights = np.random.normal(scale=0.1,
            size= (output_dim, input_dim, ksize, ksize))
        self.weights.dtype =np.float64
        self.bias = np.random.normal(scale=0.1,size = output_dim)
        self.bias.dtype = np.float64

        self.weights_grad = np.zeros(self.weights.shape)  # 回传到权重的梯度
        self.bias_grad = np.zeros(self.bias.shape)  # 回传到bias的梯度
        self.Jacobi = None  # 反传到输入的梯度

    def forward(self, input):
        '''

        :param input: (N,C,H,W)
        :return:
        '''
        assert len(np.shape(input)) == 4
        input = np.pad(input, ((0, 0), (0, 0), (self.padding[0], self.padding[1]),
                               (self.padding[0], self.padding[1])), mode='constant', constant_values=0)
        self.input = input

        self.Jacobi = np.zeros(input.shape)
        N, C, H, W = input.shape


        # 输出大小
        self.output_h = (H - self.ksize) / self.stride + 1
        self.output_w = (W - self.ksize ) / self.stride + 1

        # 检查是否是整数
        assert self.output_h % 1 == 0
        assert self.output_w % 1 == 0
        self.output_h = int(self.output_h)
        self.output_w = int(self.output_w)

        imgcol = self.im2col(input, self.ksize, self.stride)  # (N*X,C*H*W)


        output = np.dot(imgcol,
                        self.weights.reshape(self.output_dim, -1).transpose(1, 0))  # (N*output_h*output_w,output_dim)


        output += self.bias
        output = output.reshape(N, self.output_w * self.output_h, self.output_dim). \
            transpose(0, 2, 1).reshape(N, int(self.output_dim), int(self.output_h), int(self.output_w))

        return output

    def backward(self, last_layer_delta,lr):
        '''
        计算传递到上一层的梯度
        计算到weights 和bias 的梯度 并更新参数
        :param last_layer_delta: 输出层的梯度 (N，output_dim,output_h,output_w)
        :return:
        '''
        def judge_h(x):
            if x % 1 == 0 and x <= self.output_h-1 and x >= 0:
                return int(x)
            else:
                return -1
        def judge_w(x):
            if x % 1 == 0 and x <= self.output_w - 1 and x >= 0:
                return int(x)
            else:
                return -1
        # 根据推到出的公司 找出索引 与卷积权重相乘
        for i in range(self.Jacobi.shape[2]):  # 遍历输入的高
            for j in range(self.Jacobi.shape[3]):  # W
                mask = np.zeros((self.input.shape[0], self.output_dim,
                                 self.ksize, self.ksize))  # (N,O,k,k)
                index_h = [(i - k) / self.stride for k in range(self.ksize)]
                index_w = [(j - k) / self.stride for k in range(self.ksize)]
                index_h_ = list(map(judge_h, index_h))
                index_w_ = list(map(judge_w, index_w))

                for m in range(self.ksize):
                    for n in range(self.ksize):
                        if index_h_[m] != -1 and index_w_[n] != -1:
                            mask[:, :, m, n] = last_layer_delta[:, :, index_h_[m], index_w_[n]]  # (N,O,1,1)
                        else:
                            continue
                mask = mask.reshape(self.input.shape[0], 1, self.output_dim, self.ksize, self.ksize)
                self.Jacobi[:, :, i, j] = np.sum(mask * self.weights.transpose(1, 0, 2, 3), axis=(2, 3, 4))
        # 去掉padding
        self.Jacobi = self.Jacobi[:, :, self.padding[0]:self.input.shape[2]-self.padding[1],
                      self.padding[0]:self.input.shape[3] - self.padding[1]]

        # 计算 w
        N,C,K,H,W = self.input.shape[0],self.input.shape[1],self.ksize**2,self.output_h,self.output_w
        tmp = np.zeros((N,C,K,H,W))
        for i in range(self.ksize):
            for j in range(self.ksize):
                #取出和对应位置相乘得数组
                tmp[:,:,i*self.ksize+j,:,:] = self.input[:, :,i:self.output_h + i:self.stride, j:self.output_w + j:self.stride]
       # print(tmp.shape)
        tmp_new = np.sum(last_layer_delta.reshape(N,self.output_dim,1,1,H,W)*tmp.reshape(N,1,C,K,H,W),axis=(4,5)) #(N,O,C,K)
       # print(tmp_new.shape)

        self.weights_grad = np.sum(tmp_new.reshape(N,self.output_dim,C,self.ksize,self.ksize).transpose(1,2,0,3,4),axis=2) #(O,C,ksize,ksize)
        # # 计算bias的梯度
        tmp_bias = np.sum(last_layer_delta, axis=(2, 3))
        self.bias_grad = np.sum(tmp_bias, axis=0)

        ##
        # for o in range(self.output_dim):  # 遍历每一个输出通道 每次处理一个卷积核
        #     last_dz = last_layer_delta[:, o, :, :].reshape(last_layer_delta.shape[0], 1, self.output_h,
        #                                                    self.output_w)  # (N,1,h,w)
        #     #last_dz = np.repeat(last_dz, self.input_dim, axis=1)  # (N，inputdim,h,w)
        #     for i in range(self.ksize):
        #         for j in range(self.ksize):
        #             tmp = np.sum(last_dz * self.input[:, :,
        #                                    i:self.output_h + i:self.stride, j:self.output_w + j:self.stride],
        #                          axis=(2, 3))
        #             self.weights_grad[o, :, i, j] = np.sum(tmp, axis=0).reshape(-1, self.input_dim)
        # # 计算bias的梯度
        tmp_bias = np.sum(last_layer_delta, axis=(2, 3))
        self.bias_grad = np.sum(tmp_bias, axis=0)
        self.update(lr)
        return self.Jacobi
    def update(self,lr):
        # print(self.weights_grad[0,0,0,:])
        # print(self.bias_grad[:])

        self.weights_grad[np.abs(self.weights_grad)<1e-10] = 0
        self.bias_grad[np.abs(self.bias_grad) < 1e-10] = 0

        self.weights_grad[self.weights_grad > 100] = 100
        self.bias_grad[self.bias_grad>100] = 100

        self.weights_grad[self.weights_grad < -100] = -100
        self.bias_grad[self.bias_grad < -100] = -100


        self.weights -= lr * self.weights_grad
        self.bias -= lr * self.bias_grad


    def im2col(self, image, ksize, stride):
        '''
        将输入图片矩阵化
        N,C,H,W
        :param image:
        :param ksize:
        :param stride:
        :return:
        '''
        # image is a 4d tensor(N,C,H,W)
        N, C, H, W = image.shape
        image_col = []
        for i in range(0, H - ksize + 1, stride):
            for j in range(0, W - ksize + 1, stride):
                col = image[:, :, i:i + ksize, j:j + ksize].reshape(N, -1)

                image_col.append(col)
        image_col = np.array(image_col)

        return image_col.transpose(1, 0, 2).reshape(-1, C * ksize * ksize)  # (N*X,C*H*W) #swap_axis

class max_pooling_2D():
    def __init__(self,input_dim =3,stride = 2,ksize=2,padding=0):
        '''
        :param input_dim:
        :param stride:
        :param padding: padding数量
        '''
        self.input_dim = input_dim
        self.input = None
        self.output = None
        self.stride = stride
        self.ksize = ksize
        self.padding = padding
        self.record  = None #记录取元素的位置

        self.Jacobi = None
    def forward(self,input):
        '''

        :param input: (batchsize,c,h,w)
        :return:
        '''

        assert len(np.shape(input)) == 4

        self.record = np.zeros(input.shape)
        #padding
        input = np.pad(input, ((0, 0), (0, 0), (self.padding, self.padding),
                               (self.padding, self.padding)), mode='constant', constant_values=0)
        self.input = input
         #
        input_N, input_C, input_h, input_w = input.shape[0], input.shape[1], \
                                                 input.shape[2], input.shape[3]
        # padding 操作
        output_h = int((input_h - self.ksize + 2*self.padding) / self.stride + 1) #padding 操作
        output_w = int((input_w - self.ksize + 2 * self.padding) / self.stride + 1)

        output = np.zeros(((int(input_N),int(input_C),int(output_h),int(output_w))))

        for n in np.arange(input_N):
            for c in np.arange(input_C):
                for i in range(output_h):
                    for j in range(output_w):
                        #（batchsize,c,k,k）
                        x_mask = input[n,c,i*self.stride:i*self.stride+self.ksize,
                                           j*self.stride:j*self.stride+self.ksize]
                        # print(x_mask)
                        # print(np.max(x_mask))
                        # print(output[n, c, i, j])
                        output[n,c,i,j] = np.max(x_mask)

        self.output = output
        return  output

    def backward(self,next_dz):
        '''

        :param next_dz: (N，C，H,W)
        :return:
        '''
        self.Jacobi = np.zeros(self.input.shape)
        N, C, H, W = self.input.shape
        _, _, out_h, out_w = next_dz.shape
        for i in range(out_h):
            for j in range(out_w):
                #print(self.input[:,:, i * self.stride:i * self.stride + self.ksize,j * self.stride:j * self.stride + self.ksize].shape)
                # print(input[n, c, i * self.stride:i * self.stride + self.ksize,j * self.stride:j * self.stride + self.ksize].shape)
                flat_idx = np.argmax(self.input[:,:,i*self.stride:i*self.stride+self.ksize,
                                   j*self.stride:j*self.stride+self.ksize].reshape(N,C,self.ksize*self.ksize),axis=2)

                h_idx = (i*self.stride +flat_idx//self.ksize).reshape(-1) #(N*C) 确定行位置
                w_idx = (j*self.stride +flat_idx%self.ksize).reshape(-1) #确定列位置

                for k in range(N*C):
                    self.Jacobi[k//C,k%C,h_idx[k],w_idx[k]] = next_dz[k//C,k%C,i,j] #对应回原来位置

                # self.Jacobifor k in range(N*C)
                # self.Jacobi[, c_list, h_idx.reshape(-1),w_idx.reshape(-1)] = next_dz[:,:,i,j]
        # 返回去掉padding的雅可比矩阵
        return self.Jacobi[:,:,self.padding:H-self.padding,self.padding:W-self.padding]

class Relu(): #ReLu激活层
    def __init__(self):
        self.Jacobi = None
    def forward(self,input):
        '''
        :param input: (N,C,H,W)
        :return:
        '''
        output = (np.abs(input)+input)/2
        self.Jacobi = output.copy()
        self.Jacobi[self.Jacobi>0] = 1
        return output

    def backward(self,next_dz):
        '''
        :param next_dz: 上一层的梯度
        :return:
        '''
        self.Jacobi = self.Jacobi*next_dz

        return self.Jacobi


class softmax():
    def __init__(self):
        self.output = None
        self.input_delta = None #记录计算过程的雅可比矩阵
        self.Jacobi = None #反传到输入的雅可比矩阵
    def forward(self,input):
        '''
        :param input: (batchsize,n) np数组
        :return:
        '''

        batch_size = input.shape[0]
        #n = input.shape[1]
        self.Jacobi = np.zeros(input.shape)
        self.input_delta = np.zeros(input.shape)

        x = np.exp(input)
        ##
        y = np.sum(x,axis=1).reshape(batch_size,1)
        output = x/y
        self.output = output

        # z = np.zeros((batch_size,n,n)) #(batchsize,n,n)
        #
        # z =  (1/y*y).repeat(n**2,axis=1).reshape(z.shape)
        # z = np.repeat(x,n,axis=1).reshape(z.shape)*z
        # z = -z*x.reshape(batch_size,n,1)
        # for i in range(n):
        #     # print(z[:,i,i],x[:,i]/y)
        #     z[:,i,i] += x[:,i]/y.reshape(batch_size)
        # self.input_delta = z #记录中间计算过程
        return output

    def backward(self,last_layer_delta):
        '''
        :param last_layer_delta: (N,n)
        :return:
        '''
        for n in range(last_layer_delta.shape[1]): #遍历 n

            tmp = -(self.output*self.output[:,n].reshape(-1,1))
            tmp[:,n]+=self.output[:,n]
            self.Jacobi[:,n] = np.sum(last_layer_delta*tmp,axis=1)



        # batchsize = last_layer_delta.shape[0]
        # n = last_layer_delta.shape[1]

        return self.Jacobi


class Linear():
    def __init__(self,input_num,output_num):
        self.input_num, self.output_num = input_num,output_num
        self.weights = np.random.normal(scale=0.1,size = (self.output_num,self.input_num)) #构建矩阵 (output_num,intput_num)
        self.bias = np.random.normal(scale=0.1,size = (1,self.output_num)) #bias (output_dim,1)
        self.input_delta = None
        self.weights_grad=np.zeros(self.weights.shape) #记录W 梯度
        self.bias_grad = np.zeros(self.bias.shape) #记录bias 梯度
        self.Jocobi = None

    def forward(self, input):
        '''
        :param input: batchsize * input_num
        :return:
        '''

        output = np.dot(self.weights,input.transpose(1,0))
        self.input_delta = input  #记录计算过程
        return output.T + self.bias #(B，output_num)

    def backward(self,last_layer_delta,lr):
        '''

        :param last_layer_delta:  (batchsize,output_num)
        :param lr: 学习率
        :return:
        '''
        #计算传到上一层的梯度
        self.Jacobi = np.dot(self.weights.T, last_layer_delta.T).T  # (input_dim,batchsize)

        self.bias_grad =  np.sum(last_layer_delta,axis = 0) #沿行方向求和 （1,n）
        self.weights_grad = np.dot(last_layer_delta.T,self.input_delta) #(output_num,input_num)
        # self.Jacobi = np.dot(self.w_matrix.T,last_layer_delta.T) # (input_dim,batchsize)
        ##update
        # self.bias_grad[np.abs(self.bias_grad) > 5] = 0.0001
        # self.weight_grad[np.abs(self.weight_grad) > 5] = 0.0001
        self.weights_grad[np.abs(self.weights_grad) < 1e-10] = 0
        self.bias_grad[np.abs(self.bias_grad) < 1e-10] = 0
        self.weights_grad[self.weights_grad > 100] = 100
        self.bias_grad[self.bias_grad > 100] = 100
        self.weights_grad[self.weights_grad < -100] = -100
        self.bias_grad[self.bias_grad < -100] = -100

        self.bias-= lr*self.bias_grad
        self.weights-= lr*self.weights_grad

        return self.Jacobi # (batchsize,n)

class Normal():
    def __init__(self):
        self.Jacobi = None
        self.mean = None
        self.tad = None
    def forward(self,input):
        pass

class sigmoid():
    def __init__(self):
        self.Jacobi = None
        self.input_delta = None
    def forward(self, input):
        '''
        :param input: (batchsize,input_num)
        :return:
        '''
        x = np.exp(-input)
        output = 1/(1+x)

        self.input_delta = x/(1+x)**2
        return output

    def backward(self, last_layer_delta):
        '''
        需要先将last_layer_delta reshape成输入相同形状
        :param last_layer_delta:
        :return:
        '''
        self.Jacobi = self.input_delta*last_layer_delta
        return  self.Jacobi


class CNN_Nets():
    def __init__(self,lr=0.0001,batchsize=10):
        '''
        28*28 的输入大小 训练一个minist分类网络
        '''
        self.lr = lr
        self.bachsize = batchsize

        self.conv1 = Conv_2D(input_dim=1,output_dim=26,ksize = 5,stride = 1, padding =(0,0)) #(24,24)
        self.Relu_1 = Relu()
        #self.maxpooling_1 = max_pooling_2D(input_dim=26,stride=2,ksize=2) #(12,12)

        self.conv2 = Conv_2D(input_dim=26,output_dim=52 ,ksize = 3, stride = 1, padding= (0,0)) #(10,10)
        self.Relu_2 = Relu()

        self.conv3 = Conv_2D(input_dim=52, output_dim=10, ksize=1, stride=1, padding=(0, 0))  # (10,10) #降维
        self.Relu_3 = Relu()
        #self.maxpooling_3 = max_pooling_2D(input_dim=52,stride=2,ksize=2) #(5,5)

        self.fc_1 = Linear(input_num=22*22*10,output_num=1000)
        self.sigmoid_1 = sigmoid()
        self.fc_2 = Linear(input_num=1000,output_num=10)
        self.softmax = softmax()

        self.CrossEntropy = CrossEntropy()

        self.outut = None
        self.loss = None
        self.Jacobi = None

    def forward(self,input,labels):
        '''
        :param input: (n,c,h,w)
        :param labels: (batchsize,10) 的one_hot编码
        :return:
        '''
        N,C,H,W = input.shape
        #卷积层1
        output = self.conv1.forward(input)
        output = self.Relu_1.forward(output)

        #output = self.maxpooling_1.forward(input=output)
        #卷积层2

        output = self.conv2.forward(output)
        output = self.Relu_2.forward(output)

        output = self.conv3.forward(output)
        output = self.Relu_3.forward(output)

        #output = self.maxpooling_3.forward(input=output)
        #卷积层3

        #第一个全连接层
        output = np.reshape(output,(N,-1))
        output = self.fc_1.forward(output)
        output = self.sigmoid_1.forward(output)
        #第二个全连接层
        output= self.fc_2.forward(output)
        output = self.softmax.forward(output) #(batchsize,10)
        self.output = output
        #计算交叉熵和反传梯度
        self.loss = self.CrossEntropy.forward(output,labels) #交叉熵

    def backward(self):

         grad = self.CrossEntropy.Jacobi
         grad = self.softmax.backward(grad)
         grad = self.fc_2.backward(grad,lr =self.lr)
         grad = self.sigmoid_1.backward(grad)
         grad = self.fc_1.backward(grad,lr = self.lr)
         grad = grad.reshape(self.bachsize,10,22,22) #重新恢复成图像

         #grad = self.maxpooling_3.backward(grad)
         grad = self.Relu_3.backward(grad)
         grad = self.conv3.backward(grad,lr= self.lr)
         grad = self.Relu_2.backward(grad)
         grad = self.conv2.backward(grad,lr=self.lr)
         #grad = self.maxpooling_1.backward(grad)
         grad = self.Relu_1.backward(grad)
         grad = self.conv1.backward(grad,lr=self.lr)

         return grad

class CrossEntropy():
    def __init__(self):
        self.loss = None
        self.Jacobi = None

    def forward(self,input,labels):
        bachsize = input.shape[0]
        loss = np.sum(-(labels * np.log(input) + (1 - labels) * np.log(1 - input)) / bachsize)
        self.loss = loss
        self.Jacobi = -(labels / input - input * (1 - labels) / (1 - input)) / bachsize
        return loss
    def backwards(self):
        return self.Jacobi

if __name__ =="__main__":
    pass
    #卷积层梯度反传检查 没毛病
    #conv_layer_grad_check()
    #线性层梯度反传检查 没毛病
    #Linear_grad_check()

    #softmax sigmoid 检查 反向传播存在问题  1029 14:00 已解决
    #softmax_grad_check()

    # torch_fc = torch.nn.Linear(500,100)
    # print(torch_fc.weight.shape)
    #softmax_grad_check()
    # Loss_grad_check()
    #Loss_grad_check()



In [38]:
import numpy as np
import torch
import torchvision.datasets as datasets #加载数据
import torchvision.transforms as transforms #数据增强
import json
import os


def Test(Net, dataloader):
    num = 0
    for i, (data, label) in enumerate(dataloader):
        # print(data.shape,label.shape)
        # print(data,label)
        input = data.numpy()
        label = label.numpy()
        one_hot_labels = np.zeros((batch_size, 10))
        one_hot_labels[[i for i in range(batch_size)], label] = 1
        Net.forward(input, one_hot_labels)
        predict = np.argmax(Net.output, axis=1)
        num += np.sum(predict == label)
        if i%20==0 and i!=0:
            print('--------iter:{} num [{}]  acc: [{}]--------'.format(i, num, num / ((i+1)* input.shape[0])))
    print('---------acc: [{}] ------'.format(num/10000))

    return num/10000

def validate(Net,dataloader,interval):

    num = 0
    for i, (data, label) in enumerate(dataloader):
        # print(data.shape,label.shape)
        # print(data,label)
        input = data.numpy()
        label = label.numpy()
        one_hot_labels = np.zeros((batch_size, 10))
        one_hot_labels[[i for i in range(batch_size)], label] = 1
        Net.forward(input, one_hot_labels)
        predict = np.argmax(Net.output,axis=1)
        num+= np.sum(predict==label)
        if i==interval-1:

            print('--------iter:{} num [{}]  acc: [{}]--------'.format(i+1,num,num/(interval*input.shape[0])))
            break
    return  num/(interval*input.shape[0])


if __name__=="__main__":
    # hyper parameter
    input_size = 28 * 28  # image size of MNIST data
    num_classes = 10
    num_epochs = 10
    batch_size = 20
    lr = 1e-2
    epoch_num = 100
    train_dataset = datasets.MNIST(root='mnist/',  # 选择数据的根目录
                                train=True,  # 选择训练集
                                transform=transforms.ToTensor(),  # 转换成tensor变量
                                download=True)  # 不从网络上download图片
    test_dataset = datasets.MNIST(root='mnist/',  # 选择数据的根目录
                               train=False,  # 选择训练集
                               transform=transforms.ToTensor(),  # 转换成tensor变量
                               download=True)  # 不从网络上download图片
    # 加载数据
    print(len(train_dataset))
    print(len(test_dataset))
    train_loader = torch.utils.data.DataLoader(dataset=train_dataset,
                                               batch_size=batch_size,
                                               shuffle=True)  # 将数据打乱
    test_loader = torch.utils.data.DataLoader(dataset=test_dataset,
                                              batch_size=batch_size,
                                              shuffle= True)
    Net = CNN_Nets(lr , batch_size)


    for epoch in range(epoch_num):
        for i,(data,label) in enumerate(train_loader):
            # print(data.shape,label.shape)
            # print(data,label)
            input = data.numpy()
            label = label.numpy()
            one_hot_labels = np.zeros((batch_size,10))
            one_hot_labels[[i for i in range(batch_size)], label] = 1
            Net.forward(input,one_hot_labels)
            ##损失

            print("epoch [{}/{}]  loss [{}]".format(epoch,i,Net.loss))
            Net.backward()

            if i%20 == 0 and i!=0:
                acc = validate(Net, test_loader,10)
        if epoch%5==0 and epoch!=0:
            Test(Net, test_loader)



60000
10000
epoch [0/0]  loss [4.1014198023905974]
epoch [0/1]  loss [4.283676503826402]
epoch [0/2]  loss [3.3736903549021857]
epoch [0/3]  loss [3.354127463373911]
epoch [0/4]  loss [3.3217924974329422]
epoch [0/5]  loss [3.308854791434271]
epoch [0/6]  loss [3.0876504625564687]
epoch [0/7]  loss [3.7753452484603525]
epoch [0/8]  loss [3.267147692239119]
epoch [0/9]  loss [3.397591270279108]
epoch [0/10]  loss [3.1613858553794687]
epoch [0/11]  loss [3.0686869653919535]
epoch [0/12]  loss [3.3100674943785062]
epoch [0/13]  loss [2.968953871615481]
epoch [0/14]  loss [3.3181840082785863]
epoch [0/15]  loss [3.227108847812257]
epoch [0/16]  loss [3.2638823124117233]
epoch [0/17]  loss [3.1302704003535493]
epoch [0/18]  loss [3.229630423284961]
epoch [0/19]  loss [3.1310155905281647]
epoch [0/20]  loss [3.1625478865602097]
--------iter:10 num [61]  acc: [0.305]--------
epoch [0/21]  loss [2.9384713215958893]
epoch [0/22]  loss [3.009825793234111]
epoch [0/23]  loss [3.344176189046636]
e

KeyboardInterrupt: 