In [1]:
#include <iostream>

/*a workaround to solve cling issue*/
#include "../inc/macos_cling_workaround.hpp"
/*set libtorch path, load libs*/
#include "../inc/load_libtorch.hpp"
/*import custom defined macros*/
#include "../inc/custom_def.hpp"
/*import matplotlibcpp*/
#include "../inc/load_matplotlibcpp.hpp"
/*import opencv*/
#include "../inc/load_opencv.hpp"

/*import libtorch header file*/
#include <torch/torch.h>
#include <opencv2/opencv.hpp>
#include <cmath>

// Use (void) to silent unused warnings.
#define assertm(exp, msg) assert(((void)msg, exp))

In [2]:
#define VAR_NAME(Variable) (#Variable)

void print_tensor_size(std::string name, torch::Tensor t)
{
    int dims = t.dim();
    std::cout << name << " dims is (";
    for (int i = 0; i < dims; i++) {
        std::cout << t.size(i);
        if (i < (dims - 1)) std::cout << " x ";
    }
    std::cout << ")" << std::endl;
}

# 图像卷积

**互相关运算**

In [3]:
////// 原教程代码，python实现
// def corr2d(X, K):  
//     """计算二维互相关运算。"""
//     h, w = K.shape
//     Y = torch.zeros((X.shape[0] - h + 1, X.shape[1] - w + 1))
//     for i in range(Y.shape[0]):
//         for j in range(Y.shape[1]):
//             Y[i, j] = (X[i:i + h, j:j + w] * K).sum()
//     return Y

In [4]:
torch::Tensor corr2d(torch::Tensor X, torch::Tensor K)
{
    int h = K.size(0);
    int w = K.size(1);
    
    torch::Tensor Y = torch::zeros({X.size(0) - h + 1, X.size(1) - w + 1});
    for (int i = 0; i < Y.size(0); i++) {
        for (int j = 0; j < Y.size(1); j++) {
            Y[i][j] = (X.index({torch::indexing::Slice(i, i+h), torch::indexing::Slice(j, j+w)}) * K).sum();
//             printT(Y[i][j]);
        }        
    }
    
    return Y;
}

**验证上述二维互相关运算的输出**

In [5]:


auto X = torch::tensor(at::ArrayRef<float>({0.0, 1.0, 2.0,3.0, 4.0, 5.0,6.0, 7.0, 8.0}));
auto K = torch::tensor(at::ArrayRef<float>({0.0, 1.0, 2.0, 3.0}));

X = X.view({3,3});
K = K.view({2,2});

printT(corr2d(X, K));

corr2d(X, K) = 
 19  25
 37  43
[ CPUFloatType{2,2} ]
<<--->>



**实现二维卷积层**

In [6]:
////// 原教程代码，python实现
// class Conv2D(nn.Module):
//     def __init__(self, kernel_size):
//         super().__init__()
//         self.weight = nn.Parameter(torch.rand(kernel_size))
//         self.bias = nn.Parameter(torch.zeros(1))

//     def forward(self, x):
//         return corr2d(x, self.weight) + self.bias

In [7]:
struct Conv2D : torch::nn::Module {
  Conv2D(at::IntArrayRef kernel_size) {
    weight = register_parameter("weight", torch::randn(kernel_size));
    bias   = register_parameter("bias", torch::randn(1));
  }
  torch::Tensor forward(torch::Tensor X) {
    return corr2d(X, weight) + bias;
  }
  torch::Tensor weight, bias;
};

**卷积层的一个简单应用： 检测图像中不同颜色的边缘**

In [8]:
namespace idx = torch::indexing;
using Slice = torch::indexing::Slice;
torch::Tensor X1 = torch::ones({6, 8});

In [9]:
X1.index_put_({Slice(idx::None, idx::None), Slice(2, 6)}, 0);

In [10]:
printT(X1);

X1 = 
 1  1  0  0  0  0  1  1
 1  1  0  0  0  0  1  1
 1  1  0  0  0  0  1  1
 1  1  0  0  0  0  1  1
 1  1  0  0  0  0  1  1
 1  1  0  0  0  0  1  1
[ CPUFloatType{6,8} ]
<<--->>



In [11]:
auto K1 = torch::tensor(at::ArrayRef<float>({1.0, -1.0}));
K1 = K1.view({1,2});

**输出Y中的1代表从白色到黑色的边缘，-1代表从黑色到白色的边缘**

In [12]:
auto Y1 = corr2d(X1, K1);
printT(Y1);

Y1 = 
 0  1  0  0  0 -1  0
 0  1  0  0  0 -1  0
 0  1  0  0  0 -1  0
 0  1  0  0  0 -1  0
 0  1  0  0  0 -1  0
 0  1  0  0  0 -1  0
[ CPUFloatType{6,7} ]
<<--->>



**卷积核K只可以检测垂直边缘**

In [13]:
printT(corr2d(X1.t(), K1));

corr2d(X1.t(), K1) = 
 0  0  0  0  0
 0  0  0  0  0
 0  0  0  0  0
 0  0  0  0  0
 0  0  0  0  0
 0  0  0  0  0
 0  0  0  0  0
 0  0  0  0  0
[ CPUFloatType{8,5} ]
<<--->>



**学习由X生成Y的卷积核**

In [14]:
// conv2d = nn.Conv2d(1, 1, kernel_size=(1, 2), bias=False)

// X = X.reshape((1, 1, 6, 8))
// Y = Y.reshape((1, 1, 6, 7))

// for i in range(10):
//     Y_hat = conv2d(X)
//     l = (Y_hat - Y)**2
//     conv2d.zero_grad()
//     l.sum().backward()
//     conv2d.weight.data[:] -= 3e-2 * conv2d.weight.grad
//     if (i + 1) % 2 == 0:
//         print(f'batch {i+1}, loss {l.sum():.3f}')

In [15]:
////// python:
/// Conv2d. class torch.nn. Conv2d (in_channels, out_channels, kernel_size, stride=1, padding=0, dilation=1, groups=1, bias=True, padding_mode='zeros')

////// c++
/// ConvOptions(int64_t in_channels, int64_t out_channels, ExpandingArray<D> kernel_size)
// torch::nn::Conv2d conv2d(torch::nn::Conv2dOptions(1, 1, {1, 2}).stride(1).bias(false));

torch::nn::Sequential conv2d{torch::nn::Conv2d(torch::nn::Conv2dOptions(1, 1, {1, 2}).stride(1).bias(false))};

X1 = X1.reshape({1,1,6,8});
Y1 = Y1.reshape({1,1,6,7});

In [16]:
for (int i = 0; i < 10; i++) {
    auto Y1_hat = conv2d->forward(X1);
    auto l = (Y1_hat - Y1).mul((Y1_hat - Y1));
    conv2d->zero_grad();
    l.sum().backward();
    conv2d[0]->named_parameters()["weight"].data() -= 0.03 * conv2d[0]->named_parameters()["weight"].grad();
    if ((i+1) % 2 == 0) {
        std::cout << "batch " << i+1 << ", loss = " << l.sum().data() << std::endl;
    }
}

batch 2, loss = 7.84459
[ CPUFloatType{} ]
batch 4, loss = 2.46581
[ CPUFloatType{} ]
batch 6, loss = 0.884616
[ CPUFloatType{} ]
batch 8, loss = 0.341303
[ CPUFloatType{} ]
batch 10, loss = 0.136269
[ CPUFloatType{} ]


**所学的卷积核的权重张量**

In [17]:
printT(conv2d[0]->named_parameters()["weight"].data().reshape({1,2}));

conv2d[0]->named_parameters()["weight"].data().reshape({1,2}) = 
 1.0286 -0.9531
[ CPUFloatType{1,2} ]
<<--->>

