# 使用前，需要先导入需要的头文件

In [2]:
#include <iostream>

/*a workaround to solve cling issue*/
#include "../inc/macos_cling_workaround.hpp"
/*set libtorch path, load libs*/
#include "../inc/load_libtorch.hpp"
/*import custom defined macros*/
#include "../inc/custom_def.hpp"
/*import matplotlibcpp*/
#include "../inc/load_matplotlibcpp.hpp"
/*import opencv*/
#include "../inc/load_opencv.hpp"

/*import libtorch header file*/
#include <torch/torch.h>
#include <opencv2/opencv.hpp>
#include <cmath>

// Use (void) to silent unused warnings.
#define assertm(exp, msg) assert(((void)msg, exp))

In [3]:
class MyDataset : public torch::data::Dataset<MyDataset>
{
    private:
        torch::Tensor states_, labels_;

    public:
        explicit MyDataset(torch::Tensor states, torch::Tensor labels) 
            : states_(states),
              labels_(labels) {   };

        torch::data::Example<> get(size_t index) override {
            return {states_[index], labels_[index]};
        };

        torch::optional<size_t> size() const override {
            return states_.size(0);
        };
};

# 模型选择、欠拟合和过拟合

## 使用以下三阶多项式来生成训练和测试数据的标签

$y = 5 + 1.2x - 3.4 \frac{x^2}{2!} + 5.6 \frac{x^3}{3!} + \epsilon \quad where \quad\epsilon \sim \mathcal{N} (0,0.01^2)$

In [4]:
constexpr int max_degree = 4;
constexpr int n_train = 3000;
constexpr int n_test = 3000;

//多项式系数
torch::Tensor true_w = torch::zeros(max_degree);
float temp[] = {5.0, 1.2, -3.4, 5.6};
memcpy(true_w.data_ptr(), temp, sizeof(temp));

//准备输入
torch::Tensor features = torch::randn({n_train + n_test, 1});
// features *= 10.0;
torch::Tensor poly_features = features.pow(torch::arange(max_degree)+1);
// poly_features *= 10.0;

//生成输出
true_w = true_w.reshape({max_degree, 1});
torch::Tensor labels = poly_features.mm(true_w);
labels += torch::rand_like(labels) * 0.1;

In [5]:
printT(features.index({torch::indexing::Slice(torch::indexing::None, 2, torch::indexing::None)}));    
  
printT(poly_features.index({torch::indexing::Slice(torch::indexing::None, 2, torch::indexing::None)}));    

printT(labels.index({torch::indexing::Slice(torch::indexing::None, 2, torch::indexing::None)}));    

features.index({torch::indexing::Slice(torch::indexing::None, 2, torch::indexing::None)}) = 
 0.5346
 2.1823
[ CPUFloatType{2,1} ]
<<--->>

poly_features.index({torch::indexing::Slice(torch::indexing::None, 2, torch::indexing::None)}) = 
  0.5346   0.2858   0.1528   0.0817
  2.1823   4.7624  10.3930  22.6806
[ CPUFloatType{2,4} ]
<<--->>

labels.index({torch::indexing::Slice(torch::indexing::None, 2, torch::indexing::None)}) = 
   2.9713
 108.3969
[ CPUFloatType{2,1} ]
<<--->>



In [6]:
torch::Tensor train(torch::Tensor train_features, 
           torch::Tensor test_features, 
           torch::Tensor train_labels, 
           torch::Tensor test_labels,
           int num_epochs = 400,
           int batch_size = 10)
{
    assertm(train_features.dim() == 2, "train_features should have 2 dims");
    assertm(test_features.dim() == 2, "test_features should have 2 dims");
    
    auto train_data_set = MyDataset(train_features, train_labels).map(torch::data::transforms::Stack<>());
    auto test_data_set = MyDataset(test_features, test_labels).map(torch::data::transforms::Stack<>());

    auto train_data_loader = torch::data::make_data_loader<torch::data::samplers::RandomSampler>(
                                    std::move(train_data_set), 
                                    batch_size);

    auto test_data_loader = torch::data::make_data_loader<torch::data::samplers::RandomSampler>(
                                    std::move(test_data_set), 
                                    batch_size);

    int input_shape = train_features.size(1);
    torch::nn::Sequential net({{"fc", torch::nn::Linear(torch::nn::LinearOptions(input_shape, 1).bias(false))}});
    auto optimizer = torch::optim::SGD(net->parameters(), /*lr*/0.01);
        
    for (int epoch = 0; epoch < num_epochs; epoch++) 
    {
        torch::Tensor loss_values;
        for (auto& batch : *train_data_loader) {
            auto data = batch.data;
            auto labels = batch.target;

            optimizer.zero_grad();
            auto training_prediction = net->forward(data);
            loss_values = torch::mse_loss(training_prediction, labels);
            loss_values = loss_values / training_prediction.size(0);
            loss_values.backward(); 
            optimizer.step();
        }
        
        if (epoch % 100 == 0) {
        // Report the error with respect to y_training. 
        double max_loss = loss_values.max().item<double>();
        std::cout << "Epoch " << epoch 
            << ", max(loss_values) = " << max_loss << std::endl;
        }
    }
    
    std::cout << net->parameters() << std::endl;
    return net->parameters()[0];
}

In [7]:
auto train_data = 
    poly_features.index({torch::indexing::Slice(0, n_train, torch::indexing::None),
                         torch::indexing::Slice(0, 4, torch::indexing::None)});
auto train_label = 
    labels.index({torch::indexing::Slice(0, n_train, torch::indexing::None),
                         torch::indexing::Slice(0, 4, torch::indexing::None)});

auto test_data = 
    poly_features.index({torch::indexing::Slice(n_train, torch::indexing::None, torch::indexing::None),
                         torch::indexing::Slice(0, 4, torch::indexing::None)});
auto test_label = 
    labels.index({torch::indexing::Slice(n_train, torch::indexing::None, torch::indexing::None),
                         torch::indexing::Slice(0, 4, torch::indexing::None)});


printT(train_data.size(0));
printT(train_data.index({torch::indexing::Slice(torch::indexing::None, 2, torch::indexing::None)}));    
printT(train_label.index({torch::indexing::Slice(torch::indexing::None, 2, torch::indexing::None)}));    

train_data.size(0) = 
3000
<<--->>

train_data.index({torch::indexing::Slice(torch::indexing::None, 2, torch::indexing::None)}) = 
  0.5346   0.2858   0.1528   0.0817
  2.1823   4.7624  10.3930  22.6806
[ CPUFloatType{2,4} ]
<<--->>

train_label.index({torch::indexing::Slice(torch::indexing::None, 2, torch::indexing::None)}) = 
   2.9713
 108.3969
[ CPUFloatType{2,1} ]
<<--->>



In [14]:
auto w = train(train_data, test_data, train_label, test_label, 5000, 100);

Epoch 0, max(loss_values) = 6.01369
Epoch 100, max(loss_values) = 0.0652082
Epoch 200, max(loss_values) = 0.125175
Epoch 300, max(loss_values) = 0.059115
Epoch 400, max(loss_values) = 0.0168231
Epoch 500, max(loss_values) = 0.011893
Epoch 600, max(loss_values) = 0.00653694
Epoch 700, max(loss_values) = 0.00327698
Epoch 800, max(loss_values) = 0.00221733
Epoch 900, max(loss_values) = 0.00375814
Epoch 1000, max(loss_values) = 0.000958485
Epoch 1100, max(loss_values) = 0.000503585
Epoch 1200, max(loss_values) = 0.00068492
Epoch 1300, max(loss_values) = 0.000185952
Epoch 1400, max(loss_values) = 0.000130376
Epoch 1500, max(loss_values) = 8.84963e-05
Epoch 1600, max(loss_values) = 6.48121e-05
Epoch 1700, max(loss_values) = 0.000115948
Epoch 1800, max(loss_values) = 3.83882e-05
Epoch 1900, max(loss_values) = 3.14614e-05
Epoch 2000, max(loss_values) = 2.48275e-05
Epoch 2100, max(loss_values) = 2.82564e-05
Epoch 2200, max(loss_values) = 2.41616e-05
Epoch 2300, max(loss_values) = 2.11008e-05
Ep

In [15]:
//true_w
// = {5.0, 1.2, -3.4, 5.6};

w = w.reshape({4,1});
printT(train_label[0]);
printT(train_data[0].reshape({1,4}).mm(w));

printT(test_label[3]);
printT(test_data[3].reshape({1,4}).mm(w));

train_label[0] = 
 2.9713
[ CPUFloatType{1} ]
<<--->>

train_data[0].reshape({1,4}).mm(w) = 
 2.9632
[ CPUFloatType{1,1} ]
<<--->>

test_label[3] = 
-1.4345
[ CPUFloatType{1} ]
<<--->>

test_data[3].reshape({1,4}).mm(w) = 
-1.4618
[ CPUFloatType{1,1} ]
<<--->>

