In [1]:
import torch
from torch import nn
from d2l import torch as d2l
from transformers import Trainer, TrainingArguments

true_w = torch.tensor([2, -3.4])
true_b = 4.2

Error in cpuinfo: prctl(PR_SVE_GET_VL) failed


In [2]:
class CustDatasetForRegression(torch.utils.data.Dataset):
    def __init__(self, true_w, true_b, num_samples):
        self.true_w = true_w
        self.true_b = true_b
        self.num_samples = num_samples

        self.features, self.labels = d2l.synthetic_data(true_w, true_b, num_samples)

    def __getitem__(self, idx):
        item = {"inputs": self.features[idx], "labels": self.labels[idx]}
        return item

    def __len__(self):
        return len(self.features)

In [3]:
data = CustDatasetForRegression(true_w, true_b, 1000)

In [4]:
class CustomModelForRegression(nn.Module):
    def __init__(self):
        super(CustomModelForRegression, self).__init__()
        self.net = nn.Sequential(nn.Linear(2, 1))

    def forward(self, inputs, labels=None):
        logits = self.net(inputs)

        if labels is not None:
            loss_fn = nn.MSELoss()
            loss = loss_fn(logits, labels)
            return {"logits": logits, "loss": loss}
        else:
            return {"logits": logits}

    def from_pretrained(cls, pretrained_model_name_or_path, *model_args, **kwargs):
        model = cls(*model_args, **kwargs)
        state_dict = torch.load(pretrained_model_name_or_path)
        model.load_state_dict(state_dict)
        return model

In [5]:
model = CustomModelForRegression()

In [6]:
model

CustomModelForRegression(
  (net): Sequential(
    (0): Linear(in_features=2, out_features=1, bias=True)
  )
)

In [7]:
optimizer = torch.optim.SGD(model.parameters(), lr=0.1)

In [8]:
training_args = TrainingArguments(
    output_dir="./results",
    num_train_epochs=100,
    logging_strategy="epoch",
    per_device_train_batch_size=512,
)

trainer = Trainer(
    model=model,  # 自定义模型实例
    args=training_args,  # 训练参数
    train_dataset=data,
    optimizers=(optimizer, None),  # 传递优化器
)

trainer.train()  # 开始训练

Step,Training Loss
2,32.6374
4,30.3982
6,28.2607
8,26.2205
10,24.2754
12,22.4268
14,20.672
16,19.0009
18,17.4232
20,15.93


TrainOutput(global_step=200, training_loss=3.541565516781775, metrics={'train_runtime': 0.4852, 'train_samples_per_second': 206093.519, 'train_steps_per_second': 412.187, 'total_flos': 0.0, 'train_loss': 3.541565516781775, 'epoch': 100.0})

In [9]:
print(model.net[0].weight, "\n", model.net[0].bias)

Parameter containing:
tensor([[ 2.0000, -3.4003]], requires_grad=True) 
 Parameter containing:
tensor([4.2002], requires_grad=True)
