In [6]:
# Let's create synthetic data for regression
from utils import DataModule
import torch

class SyntheticRegressionData(DataModule):

    def __init__(self,w,b,num_train=1000,num_val=1000,noise=0.01,batch_size=32):
        super().__init__()
        self.save_hyperparameters()
        inp_dim = len(w)
        n = num_train + num_val
        self.X = torch.randn((n,inp_dim))
        epsilon = torch.randn((n,1))*noise
        self.y = torch.mm(self.X,w.reshape(-1,1)) + b + epsilon

In [7]:
data = SyntheticRegressionData(w=torch.tensor([2, -3.4]), b=4.2)


In [8]:
print('features:', data.X[0],'\nlabel:', data.y[0])


features: tensor([-1.1517,  0.8544]) 
label: tensor([-1.0021])


In [15]:
from utils import add_to_class
import random 

@add_to_class(SyntheticRegressionData)
def get_dataloader(self,train):
    # Get indices of the data
    if train:
        indices = [i for i in range(self.num_train)]
        # Shuffle indices
        random.shuffle(indices) 
    else:
        indices = [i for i in range(self.num_train,self.num_train + self.num_val)]
    
    number_of_batches = len(indices) // self.batch_size
    for i in range(0,number_of_batches*self.batch_size,self.batch_size):
        batch_indices = torch.tensor(indices[i: i+self.batch_size])
        yield self.X[batch_indices,:],self.y[batch_indices,:]



In [16]:
X, y = next(iter(data.train_dataloader()))
print('X shape:', X.shape, '\ny shape:', y.shape)

X shape: torch.Size([32, 2]) 
y shape: torch.Size([32, 1])


In [32]:
# More concise implementation uses torch utils
@add_to_class(SyntheticRegressionData)
def get_tensorloader(self,tensors,train,indices = slice(0,None)):
    #Get slices from whole data tensors=(X,y)
    tensors = tuple([a[indices] for a in tensors])
    dataset = torch.utils.data.TensorDataset(*tensors)
    return torch.utils.data.DataLoader(dataset,batch_size=self.batch_size,shuffle=train)


@add_to_class(SyntheticRegressionData)
def get_dataloader(self,train):
    idxs = slice(0,self.num_train) if train else slice(num_train,None)
    return self.get_tensorloader((self.X,self.y),train,idxs)




In [33]:
for X,y in data.train_dataloader():
    print('X shape:', X.shape, '\ny shape:', y.shape)

X shape: torch.Size([32, 2]) 
y shape: torch.Size([32, 1])
X shape: torch.Size([32, 2]) 
y shape: torch.Size([32, 1])
X shape: torch.Size([32, 2]) 
y shape: torch.Size([32, 1])
X shape: torch.Size([32, 2]) 
y shape: torch.Size([32, 1])
X shape: torch.Size([32, 2]) 
y shape: torch.Size([32, 1])
X shape: torch.Size([32, 2]) 
y shape: torch.Size([32, 1])
X shape: torch.Size([32, 2]) 
y shape: torch.Size([32, 1])
X shape: torch.Size([32, 2]) 
y shape: torch.Size([32, 1])
X shape: torch.Size([32, 2]) 
y shape: torch.Size([32, 1])
X shape: torch.Size([32, 2]) 
y shape: torch.Size([32, 1])
X shape: torch.Size([32, 2]) 
y shape: torch.Size([32, 1])
X shape: torch.Size([32, 2]) 
y shape: torch.Size([32, 1])
X shape: torch.Size([32, 2]) 
y shape: torch.Size([32, 1])
X shape: torch.Size([32, 2]) 
y shape: torch.Size([32, 1])
X shape: torch.Size([32, 2]) 
y shape: torch.Size([32, 1])
X shape: torch.Size([32, 2]) 
y shape: torch.Size([32, 1])
X shape: torch.Size([32, 2]) 
y shape: torch.Size([32, 1

The last batch has size of 8 data points, if you want to drop last batch write drop_last = True in data loader

In [34]:
from utils import Module
class LinearRegressionScratch(Module):
    def __init__(num_inputs, lr, sigma=0.01):
        super().__init__()
        self.save_hyperparameters()
        # Init parameters
        self.W = torch.normal(0,sigma,(num_inputs,1),requires_grad=True)
        self.b = torch.zeros(1, requires_grad=True)


@add_to_class(LinearRegressionScratch)
def forward(self, X):
    return torch.mm(X,self.W) + self.b

@add_to_class(LinearRegressionScratch)
def loss(self, y_hat, y):
        l = (y_hat-y)**2 / 2
        return errors.mean()
