In [1]:
import torch
from torch import nn
from d2l import torch as d2l
from transformers import Trainer, TrainingArguments

true_w = torch.tensor([2, -3.4])
true_b = 4.2

Error in cpuinfo: prctl(PR_SVE_GET_VL) failed


In [2]:
class CustDatasetForRegression(torch.utils.data.Dataset):
    def __init__(self, true_w, true_b, num_samples):
        self.true_w = true_w
        self.true_b = true_b
        self.num_samples = num_samples

        self.features, self.labels = d2l.synthetic_data(true_w, true_b, num_samples)

    def __getitem__(self, idx):
        item = {"inputs": self.features[idx], "labels": self.labels[idx]}
        return item

    def __len__(self):
        return len(self.features)

In [3]:
data = CustDatasetForRegression(true_w, true_b, 1000)

In [4]:
class CustomModelForRegression(nn.Module):
    def __init__(self):
        super(CustomModelForRegression, self).__init__()
        self.net = nn.Sequential(nn.Linear(2, 1))

    def forward(self, inputs, labels=None):
        logits = self.net(inputs)

        if labels is not None:
            loss_fn = nn.MSELoss()
            loss = loss_fn(logits, labels)
            return {"logits": logits, "loss": loss}
        else:
            return {"logits": logits}

    @classmethod
    def from_pretrained(cls, pretrained_model_name_or_path, *model_args, **kwargs):
        model = cls(*model_args, **kwargs)
        state_dict = torch.load(f"{pretrained_model_name_or_path}/model.bin")
        model.load_state_dict(state_dict)
        return model

    def save_pretrained(self, save_directory):
        self.to(torch.device("cpu"))
        torch.save(self.state_dict(), f"{save_directory}/model.bin")

    def predict(self, inputs, device):
        device = device or self.device
        with torch.no_grad():
            inputs = inputs.to(device)
            out = self(inputs, None)
            return out["logits"].flatten()

In [5]:
model = CustomModelForRegression()

In [6]:
model

CustomModelForRegression(
  (net): Sequential(
    (0): Linear(in_features=2, out_features=1, bias=True)
  )
)

In [7]:
optimizer = torch.optim.SGD(model.parameters(), lr=0.5)

In [8]:
training_args = TrainingArguments(
    output_dir="./results",
    num_train_epochs=20,
    logging_strategy="epoch",
    per_device_train_batch_size=512,
)

trainer = Trainer(
    model=model,  # 自定义模型实例
    args=training_args,  # 训练参数
    train_dataset=data,
    optimizers=(optimizer, None),  # 传递优化器
)

trainer.train()  # 开始训练

Step,Training Loss
2,31.7821
4,21.7868
6,14.0839
8,8.2472
10,4.171
12,1.6363
14,0.3258
16,0.0086
18,0.0003
20,0.0001


TrainOutput(global_step=40, training_loss=4.102151058428717, metrics={'train_runtime': 0.0821, 'train_samples_per_second': 243729.476, 'train_steps_per_second': 487.459, 'total_flos': 0.0, 'train_loss': 4.102151058428717, 'epoch': 20.0})

In [9]:
model.save_pretrained("./model/")

In [10]:
model.from_pretrained("./model/")

  state_dict = torch.load(f"{pretrained_model_name_or_path}/model.bin")


CustomModelForRegression(
  (net): Sequential(
    (0): Linear(in_features=2, out_features=1, bias=True)
  )
)

In [11]:
print(model.net[0].weight, "\n", model.net[0].bias)

Parameter containing:
tensor([[ 1.9996, -3.4005]], requires_grad=True) 
 Parameter containing:
tensor([4.2004], requires_grad=True)


In [12]:
model.predict(torch.tensor([[2.0, 3.0], [6.0, 7.0]]), torch.device("cpu"))

tensor([-2.0019, -7.6055])