In [48]:
%matplotlib inline
import torch
import random

In [7]:
class LinearRegressionScratch:
    """
    The linear regression model implemented from scratch
    """
    def __init__(self, num_inputs, lr, sigma = 0.01):
        self.w = torch.normal(0, sigma, (num_inputs,1), requires_grad = True)
        self.b = torch.zeros(1, requires_grad = True)

We must define our model, relating its inputs and parameters to the output. For our linear model we simply take the maxtrix-vector product of the input features $X$ and the model weights $w$, and add the offset $b$ to each example. 

The product $Xw$ is a vector and $b$ is a scalar, because of the broadcasting mechanisn, when we add a vector  and a scalar, the scalar is added to each component of the vector

Updating our model requires taking the gradient of the loss function. We use the squared loss function. We will return the averaged loss value among all examples in the **minibatch**

The `configure_optimizers` method will return an instance of the SGD class. 

In [14]:
class LinearRegressionScratch:
    """
    The linear regression model implemented from scratch
    """
    def __init__(self, num_inputs, lr, sigma = 0.01):
        self.w = torch.normal(0, sigma, (num_inputs,1), requires_grad = True)
        self.b = torch.zeros(1, requires_grad = True)

    def forward(self, X):
        return torch.matmul(X, self.w) + self.b

    def loss(self, y_hat, y):
        l = (y_hat - y) ** 2 / 2
        return l.mean()
    
    def configure_optimizers(self):
        return SGD([self.w, self.b], self.lr)
                  

###  Optimization Algo

Linear regression has a closed-form solution, however, our goal here is to illustrate how to train a more general neural networks, which requires minibatch SGD.

- at each step...

In [13]:
class SGD:
    """
    Minibatch stochastic gradient descent
    """
    def __init__(self, params, lr):
        self.params = params
        self.lr = lr
        
    def step(self):
        for param in self.params:
            param -= self.lr * param.grad
    
    def zero_grad(self):
        for param in self.params:
            if param.grad is not None:
                param.grad.zero_()
            

### Training Loop

- in each epoch, we iterate through the entire training dataset, passing once through every example (assuming the number of examples is divisible by batch size)
- in each **iteration** we grab a minibatch of training examples and compute the loss through the `training_step` method. We compute the gradients with with respect to each parameter; call the optimization algorithm to update the model parameters.

In [77]:
class SyntheticRegressionData:
    """
    Synthetic data for linear regression
    """
    def __init__(self, w, b, noise= 0.01, num_train = 1000, num_val = 1000, 
                 batch_size = 32):
        self.num_train = num_train
        self.num_val = num_val
        self.batch_size = batch_size
        n = num_train + num_val
        self.X  = torch.randn(n, len(w))
        noise = torch.randn(n, 1) * noise
        self.y = torch.matmul(self.X, w.reshape((-1,1))) + b + noise

    def get_dataloader(self, train):
        if train:
            indices = list(range(0, self.num_train))
            random.shuffle(indices)
        else: 
            indices = list(range(self.num_train, self.num_train + self.num_val))
        for i in range(0, len(indices), self.batch_size):
            batch_indices = torch.tensor(indices[i: i+ self.batch_size])
            yield self.X[batch_indices], self.y[batch_indices]
            


In [78]:
data = SyntheticRegressionData(w= torch.tensor([2, -3.4]), b = 4.2 )

Training machine learning models often requires multiple passes over the data, grabbing one minibatch of the examples at a time. This data is used to update the model. 

`get_dataloader` method takes a batch size, a matrix of features, and a vector of labels, and generates minibatche of size `batch_size`.

As such each minibatch consists of a tuple of features and labels. 

In [80]:
X , y = next(iter(data.get_dataloader(True)))
print('X shape:', X.shape, '\ny shape:', y.shape)

X shape: torch.Size([32, 2]) 
y shape: torch.Size([32, 1])


In [81]:
class Trainer:
    def __init__(self, max_epochs, num_gpus, gradient_clip_val=0):
        assert num_gpus == 0, 'no GPU support yet'

    def prepare_data(self, data):
        self.train_dataloader = data.train_dataloader()
        self.val_dataloader = data.val_dataloadr()
        self.num_train_batches = (len

In [None]:
# update