In [1]:
%matplotlib inline
import random
import torch
from d2l import torch as d2l

In [8]:
class SyntheticRegressionData(d2l.DataModule):
    def __init__(self, w, b, noise=0.01,
                num_train=1000, num_val=1000,
                batch_size=32):
        super().__init__()
        self.save_hyperparameters()
        n = num_train + num_val
        self.X = torch.randn(n, len(w))
        noise = torch.randn(n, 1) * noise
        self.y = torch.matmul(self.X, w.reshape(-1, 1)) + b + noise

In [9]:
data = SyntheticRegressionData(w=torch.tensor([2, -3.4]), b=4.2)

In [10]:
print(f"features: {data.X[0]},\nlabel: {data.y[0]}")

features: tensor([-0.3655,  0.4268]),
label: tensor([2.0281])


In [11]:
@d2l.add_to_class(SyntheticRegressionData)
def get_dataloader(self, train):
    if train:
        indices = list(range(0, self.num_train))
        random.shaffle(indices)
    else:
        indices = list(range(self.num_train, self.num_train + self.num_val))
    for i in range(0, len(indices), self.batch_size):
        batch_indices = torch.tensor(indices[i:i + self.batch_size])
        yield self.X[batch_indices], self.y[batch_indices]

In [13]:
@d2l.add_to_class(d2l.DataModule)
def get_tensor_loader(self, tensors, train, indices=slice(0, None)):
    tensors = tuple(a[indices] for a in tensors)
    dataset = torch.utils.data.TensorDataset(*tensors)
    return torch.utils.data.DataLoader(dataset, self.batch_size,
                                      shuffle=train)

@d2l.add_to_class(SyntheticRegressionData)
def get_dataloader(self, train):
    i = slice(0, self.num_train) if train else slice(self.train, None)
    return self.get_tensorloader((self.X, self.y), train, i)

In [14]:
X, y = next(iter(data.train_dataloader()))

In [15]:
print(f"X shape: {X.shape}")
print(f"y shape: {y.shape}")

X shape: torch.Size([32, 2])
y shape: torch.Size([32, 1])


In [16]:
len(data.train_dataloader())

32