In [1]:
#include <iostream>

/*a workaround to solve cling issue*/
#include "../inc/macos_cling_workaround.hpp"
/*set libtorch path, load libs*/
#include "../inc/load_libtorch.hpp"
/*import custom defined macros*/
#include "../inc/custom_def.hpp"
/*import matplotlibcpp*/
#include "../inc/load_matplotlibcpp.hpp"
/*import opencv*/
#include "../inc/load_opencv.hpp"

/*import libtorch header file*/
#include <torch/torch.h>
#include <opencv2/opencv.hpp>
#include <cmath>

// Use (void) to silent unused warnings.
#define assertm(exp, msg) assert(((void)msg, exp))

In [2]:
#define VAR_NAME(Variable) (#Variable)

void print_tensor_size(std::string name, torch::Tensor t)
{
    int dims = t.dim();
    std::cout << name << " dims is (";
    for (int i = 0; i < dims; i++) {
        std::cout << t.size(i);
        if (i < (dims - 1)) std::cout << " x ";
    }
    std::cout << ")" << std::endl;
}

# 参数管理

我们首先关注具有单隐藏层的多层感知机

In [3]:
torch::nn::Sequential net(torch::nn::Linear(4, 8),
                          torch::nn::ReLU(),
                          torch::nn::Linear(8, 1)
                         );


auto X = torch::rand({2, 4});
printT(X);

printT(net->forward(X));

X = 
 0.8977  0.9492  0.2640  0.6172
 0.6931  0.3670  0.9472  0.1460
[ CPUFloatType{2,4} ]
<<--->>

net->forward(X) = 
-0.4400
-0.1426
[ CPUFloatType{2,1} ]
<<--->>



参数访问

In [4]:
printT(net[2]->parameters());

// or 

printT(net[2]->name());

auto od = net[2]->named_parameters();

for(auto iter = od.begin(); iter != od.end(); iter++) {
    std::cout << iter->key() << " = " << std::endl;
    std::cout << iter->value() << std::endl;
    std::cout << "<<<--->>>" << std::endl << std::endl;
}

net[2]->parameters() = 
-0.2337  0.3503 -0.1338 -0.3453  0.0732  0.3135  0.2123  0.2527
[ CPUFloatType{1,8} ] -0.3481
[ CPUFloatType{1} ]
<<--->>

net[2]->name() = 
torch::nn::LinearImpl
<<--->>

weight = 
-0.2337  0.3503 -0.1338 -0.3453  0.0732  0.3135  0.2123  0.2527
[ CPUFloatType{1,8} ]
<<<--->>>

bias = 
-0.3481
[ CPUFloatType{1} ]
<<<--->>>



目标参数

In [5]:
//*** 原教程代码
// print(type(net[2].bias))
// print(net[2].bias)
// print(net[2].bias.data)
    
auto b = od.find("bias");
if(b != nullptr) {
    printT(b->options());
    printT(*b);
    printT(b->data());
}

b->options() = 
TensorOptions(dtype=float, device=cpu, layout=Strided, requires_grad=false (default), pinned_memory=false (default), memory_format=(nullopt))
<<--->>

*b = 
-0.3481
[ CPUFloatType{1} ]
<<--->>

b->data() = 
-0.3481
[ CPUFloatType{1} ]
<<--->>



In [6]:
auto w = od.find("weight");

if(w != nullptr) {
    printT(w->options());
    printT(w->grad());
}

w->options() = 
TensorOptions(dtype=float, device=cpu, layout=Strided, requires_grad=false (default), pinned_memory=false (default), memory_format=(nullopt))
<<--->>

w->grad() = 
[ Tensor (undefined) ]
<<--->>



一次性访问所有参数(同上)

In [7]:
auto od1 = net->named_parameters();

for(auto iter = od1.begin(); iter != od1.end(); iter++) {
    std::cout << iter->key() << " = " << std::endl;
    std::cout << iter->value() << std::endl;
    std::cout << "<<<--->>>" << std::endl << std::endl;
}

0.weight = 
-0.0865  0.0004 -0.0283 -0.3699
 0.1496  0.3299 -0.3751 -0.2932
 0.4911  0.2212  0.0100  0.1815
 0.3170  0.2273 -0.4936  0.4463
-0.0703  0.1112  0.1605  0.1676
 0.3138  0.4383  0.3995 -0.1057
 0.4003 -0.1541 -0.0448  0.1465
-0.3959  0.4331  0.3080 -0.0293
[ CPUFloatType{8,4} ]
<<<--->>>

0.bias = 
-0.4599
-0.1276
 0.2174
 0.4324
 0.0921
 0.1287
-0.3005
 0.2993
[ CPUFloatType{8} ]
<<<--->>>

2.weight = 
-0.2337  0.3503 -0.1338 -0.3453  0.0732  0.3135  0.2123  0.2527
[ CPUFloatType{1,8} ]
<<<--->>>

2.bias = 
-0.3481
[ CPUFloatType{1} ]
<<<--->>>



In [8]:
printT(od1["2.bias"]);

od1["2.bias"] = 
-0.3481
[ CPUFloatType{1} ]
<<--->>



从嵌套块收集参数

In [9]:
/*design a net*/
struct block1 : torch::nn::Module {
    block1() {
        net = torch::nn::Sequential(torch::nn::Linear(4, 8),
                                    torch::nn::ReLU(),
                                    torch::nn::Linear(8, 4),
                                    torch::nn::ReLU());
    }

    // Implement the Net's algorithm.
    torch::Tensor forward(torch::Tensor x) {
        x = net->forward(x);
        return x;
    }

    // Use one of many "standard library" modules.
    torch::nn::Sequential net{nullptr};
};

In [10]:
struct block2 : torch::nn::Module {
    block2() {
        net = torch::nn::Sequential(block1(),
                                    block1(),
                                    block1(),
                                    block1()
                                   );
    }

    // Implement the Net's algorithm.
    torch::Tensor forward(torch::Tensor x) {
        x = net->forward(x);
        return x;
    }

    // Use one of many "standard library" modules.
    torch::nn::Sequential net{nullptr};
};

In [11]:
torch::nn::Sequential rgnet(block2(), torch::nn::Linear(4, 1));
printT(rgnet->forward(X));

rgnet->forward(X) = 
-0.5016
-0.5015
[ CPUFloatType{2,1} ]
<<--->>



In [13]:
std::cout << rgnet << std::endl;

torch::nn::Sequential(
  (0): __cling_N510::block2
  (1): torch::nn::Linear(in_features=4, out_features=1, bias=true)
)


In [15]:
for (int i = 0; i < rgnet->size(); i++) {
   printT(rgnet[i]->name()); 
}

rgnet[i]->name() = 
__cling_N510::block2
<<--->>

rgnet[i]->name() = 
torch::nn::LinearImpl
<<--->>



对某些块应用不同的初始化方法

In [None]:
def xavier(m):
    if type(m) == nn.Linear:
        nn.init.xavier_uniform_(m.weight)

def init_42(m):
    if type(m) == nn.Linear:
        nn.init.constant_(m.weight, 42)

net[0].apply(xavier)
net[2].apply(init_42)
print(net[0].weight.data[0])
print(net[2].weight.data)

In [18]:
void init_weights(torch::nn::Module& m)
{
//     if ((typeid(m) == typeid(torch::nn::LinearImpl)) || (typeid(m) == typeid(torch::nn::Linear))) {
    if (typeid(m) == typeid(torch::nn::Linear)) {
        auto p = m.named_parameters(false);
        auto w = p.find("weight");
        auto b = p.find("bias");

//         if (w != nullptr) torch::nn::init::xavier_uniform_(*w);
        if (w != nullptr) torch::nn::init::ones_(*w);
        if (b != nullptr) torch::nn::init::constant_(*b, 0.01);
    }
}

net->apply(init_weights);

printT(net->parameters());

net->parameters() = 
-0.0865  0.0004 -0.0283 -0.3699
 0.1496  0.3299 -0.3751 -0.2932
 0.4911  0.2212  0.0100  0.1815
 0.3170  0.2273 -0.4936  0.4463
-0.0703  0.1112  0.1605  0.1676
 0.3138  0.4383  0.3995 -0.1057
 0.4003 -0.1541 -0.0448  0.1465
-0.3959  0.4331  0.3080 -0.0293
[ CPUFloatType{8,4} ] -0.4599
-0.1276
 0.2174
 0.4324
 0.0921
 0.1287
-0.3005
 0.2993
[ CPUFloatType{8} ] -0.2337  0.3503 -0.1338 -0.3453  0.0732  0.3135  0.2123  0.2527
[ CPUFloatType{1,8} ] -0.3481
[ CPUFloatType{1} ]
<<--->>



引申一点，看看net的参数，以及如何为net的参数做初始化：

In [None]:
torch::nn::Sequential net1(torch::nn::Linear(4, 8),
                          torch::nn::ReLU(),
                          torch::nn::Linear(8, 1)
                         );

printT(net1->parameters());