## Synthetic Regression Data

This data is helping us to evaluate the properties of our learning algorithms and to confirm that our implementations work as expected.

For example, if we create data for which the correct parameters are known a priori, then we can check that our model can in fact recover them.

In [1]:
%matplotlib inline
import random
import torch
from d2l import torch as d2l

#### Generating the dataset

In [2]:
class SyntheticRegressionData(d2l.DataModule):  #@save
    def __init__(self, w, b, noise=0.01, num_train=1000, num_val=1000, batch_size=32):
        super().__init__()
        self.save_hyperparameters()
        n = num_train + num_val
        self.X = torch.randn(n, len(w))
        noise = torch.randn(n, 1) * noise
        self.y = self.X @ w.view((-1,1)) + b + noise

In [10]:
data = SyntheticRegressionData(w=torch.tensor([2, -3.4]), b=4.2)

In [12]:
print('features:', data.X[0],'\nlabel:', data.y[0])

features: tensor([0.8759, 2.9366]) 
label: tensor([-4.0145])


In [13]:
@d2l.add_to_class(SyntheticRegressionData)
def get_dataloader(self, train):
    if train:
        indices = list(range(0, self.num_train))
        random.shuffle(indices)
    else:
        indices = list(range(self.num_train, self.num_train+self.num_val))
    for i in range(0, len(indices), self.batch_size):
        batch_indices = torch.tensor(indices[i: i+self.batch_size])
        yield self.X[batch_indices], self.y[batch_indices]

In [15]:
X, y = next(iter(data.train_dataloader()))
print('X shape:', X.shape, '\ny shape:', y.shape)

X shape: torch.Size([32, 2]) 
y shape: torch.Size([32, 1])


In [16]:
# A more efficient way for dataloader

@d2l.add_to_class(d2l.DataModule)  #@save
def get_tensorloader(self, tensors, train, indices=slice(0, None)):
    tensors = tuple(a[indices] for a in tensors)
    dataset = torch.utils.data.TensorDataset(*tensors)
    return torch.utils.data.DataLoader(dataset, self.batch_size,
                                       shuffle=train)

@d2l.add_to_class(SyntheticRegressionData)  #@save
def get_dataloader(self, train):
    i = slice(0, self.num_train) if train else slice(self.num_train, None)
    return self.get_tensorloader((self.X, self.y), train, i)

In [22]:
X, y = next(iter(data.train_dataloader()))
print('X shape:', X.shape, '\ny shape:', y.shape)
print(len(data.train_dataloader()))
X[0]

X shape: torch.Size([32, 2]) 
y shape: torch.Size([32, 1])
32


tensor([-0.9407, -1.1003])