In [1]:
#include <iostream>

/*a workaround to solve cling issue*/
#include "../inc/macos_cling_workaround.hpp"
/*set libtorch path, load libs*/
#include "../inc/load_libtorch.hpp"
/*import custom defined macros*/
#include "../inc/custom_def.hpp"
/*import matplotlibcpp*/
#include "../inc/load_matplotlibcpp.hpp"
/*import opencv*/
#include "../inc/load_opencv.hpp"

/*import libtorch header file*/
#include <torch/torch.h>
#include <opencv2/opencv.hpp>
#include <cmath>

// Use (void) to silent unused warnings.
#define assertm(exp, msg) assert(((void)msg, exp))

In [2]:
#define VAR_NAME(Variable) (#Variable)

void print_tensor_size(std::string name, torch::Tensor t)
{
    int dims = t.dim();
    std::cout << name << " dims is (";
    for (int i = 0; i < dims; i++) {
        std::cout << t.size(i);
        if (i < (dims - 1)) std::cout << " x ";
    }
    std::cout << ")" << std::endl;
}

# 池化层

**实现池化层的正向传播**

In [3]:
// def pool2d(X, pool_size, mode='max'):
//     p_h, p_w = pool_size
//     Y = torch.zeros((X.shape[0] - p_h + 1, X.shape[1] - p_w + 1))
//     for i in range(Y.shape[0]):
//         for j in range(Y.shape[1]):
//             if mode == 'max':
//                 Y[i, j] = X[i:i + p_h, j:j + p_w].max()
//             elif mode == 'avg':
//                 Y[i, j] = X[i:i + p_h, j:j + p_w].mean()
//     return Y

In [4]:
using Slice = torch::indexing::Slice;


In [5]:
torch::Tensor pool2d(torch::Tensor X, at::IntArrayRef pool_size, std::string mode="max")
{
    int p_h = pool_size[0];
    int p_w = pool_size[1];
    
    auto Y = torch::zeros({X.size(0) - p_h + 1, X.size(1) - p_w + 1});
    for (int i = 0; i < Y.size(0); i++) {
        for (int j = 0; j < Y.size(1); j++) {
            if (mode == "max") {
                Y[i][j] = X.index({Slice(i, i+p_h), Slice(j, j+p_w)}).max();
            } else if (mode == "avg") {
                Y[i][j] = X.index({Slice(i, i+p_h), Slice(j, j+p_w)}).mean();
            }
        }
    }
    
    return Y;
}

**验证二维最大池化层的输出**

In [6]:
// X = torch.tensor([[0.0, 1.0, 2.0], [3.0, 4.0, 5.0], [6.0, 7.0, 8.0]])
// pool2d(X, (2, 2))


auto X = torch::tensor(at::ArrayRef<float>({0.0, 1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0}));
X = X.reshape({3, 3});

printT(pool2d(X, {2, 2}));

pool2d(X, {2, 2}) = 
 4  5
 7  8
[ CPUFloatType{2,2} ]
<<--->>



**验证平均池化层**

In [7]:
// pool2d(X, (2, 2), 'avg')

printT(pool2d(X, {2, 2}, "avg"));

pool2d(X, {2, 2}, "avg") = 
 2  3
 5  6
[ CPUFloatType{2,2} ]
<<--->>



**填充和步幅**

In [8]:
// X = torch.arange(16, dtype=torch.float32).reshape((1, 1, 4, 4))
// X

auto X1 = torch::arange(16, torch::kFloat).reshape({1, 1, 4, 4});
printT(X1);

X1 = 
(1,1,.,.) = 
   0   1   2   3
   4   5   6   7
   8   9  10  11
  12  13  14  15
[ CPUFloatType{1,1,4,4} ]
<<--->>



**深度学习框架中的步幅与池化窗口的大小相同**

In [9]:
// pool2d = nn.MaxPool2d(3)
// pool2d(X)

printT(torch::nn::MaxPool2d(3));

torch::nn::Sequential max_pool(torch::nn::MaxPool2d(3));
printT(max_pool->forward(X1));

torch::nn::MaxPool2d(3) = 
torch::nn::MaxPool2d(kernel_size=[3, 3], stride=[3, 3], padding=[0, 0], dilation=[1, 1], ceil_mode=false)
<<--->>

max_pool->forward(X1) = 
(1,1,.,.) = 
  10
[ CPUFloatType{1,1,1,1} ]
<<--->>



**填充和步幅可以手动设定**

In [10]:
// pool2d = nn.MaxPool2d(3, padding=1, stride=2)
// pool2d(X)
using namespace torch::nn;

torch::nn::Sequential max_pool1(torch::nn::MaxPool2d(MaxPool2dOptions(3).padding(1).stride({2, 2})));
printT(max_pool1->forward(X1));

max_pool1->forward(X1) = 
(1,1,.,.) = 
   5   7
  13  15
[ CPUFloatType{1,1,2,2} ]
<<--->>



**设定一个任意大小的矩形池化窗口，并分别设定填充和步幅的高度和宽度**

In [11]:
// pool2d = nn.MaxPool2d((2, 3), padding=(1, 1), stride=(2, 3))
// pool2d(X)

torch::nn::Sequential max_pool2(torch::nn::MaxPool2d(MaxPool2dOptions({2, 3}).padding({1, 1}).stride({2, 3})));
printT(max_pool2->forward(X1));

max_pool2->forward(X1) = 
(1,1,.,.) = 
   1   3
   9  11
  13  15
[ CPUFloatType{1,1,3,2} ]
<<--->>



**池化层在每个输入通道上单独运算**

In [12]:
// X = torch.cat((X, X + 1), 1)
// X

X1 = torch::cat({X1, X1+1}, 1);
printT(X1);

X1 = 
(1,1,.,.) = 
   0   1   2   3
   4   5   6   7
   8   9  10  11
  12  13  14  15

(1,2,.,.) = 
   1   2   3   4
   5   6   7   8
   9  10  11  12
  13  14  15  16
[ CPUFloatType{1,2,4,4} ]
<<--->>



In [13]:
// pool2d = nn.MaxPool2d(3, padding=1, stride=2)
// pool2d(X)

torch::nn::Sequential max_pool3(torch::nn::MaxPool2d(MaxPool2dOptions(3).padding(1).stride(2)));
printT(max_pool3->forward(X1));

max_pool3->forward(X1) = 
(1,1,.,.) = 
   5   7
  13  15

(1,2,.,.) = 
   6   8
  14  16
[ CPUFloatType{1,2,2,2} ]
<<--->>

