# CHAPTER 3: Linear Neural Networks

Affine transformation of input features is characterized by a linear transformation of features via weighted sum, combined
with a translation of bias. Models whose output prediction is determined by the affine transformation of input features are linear models, where the affine transformation is specified by the chosen weights and bias.

In [4]:
import torch
import random

In [5]:
def synthetic_data(w, b, num_examples): 
    """Generate y = Xw + b + noise."""
    """ Mean:0 and Standard deviation:1"""
    X = torch.normal(0, 1, (num_examples, len(w)))
    y = torch.matmul(X, w) + b
    y += torch.normal(0, 0.01, y.shape)
    return X, y.reshape((-1, 1))

true_w = torch.tensor([2, -3.4])
true_b = 4.2
features, labels = synthetic_data(true_w, true_b, 1000)
print('features:', features[0],'\nlabel:', labels[0])

features: tensor([1.2081, 0.1537]) 
label: tensor([6.1023])


In [8]:
def data_iter(batch_size, features, labels):
    num_examples = len(features)
    indices = list(range(num_examples))
    # The examples are read at random, in no particular order
    random.shuffle(indices)
    for i in range(0, num_examples, batch_size):
        batch_indices = torch.tensor(
            indices[i: min(i + batch_size, num_examples)])
        yield features[batch_indices], labels[batch_indices]

In [9]:
batch_size = 10
for X, y in data_iter(batch_size, features, labels):
    print(X, '\n', y)
    break

tensor([[-0.7475,  1.8047],
        [-0.4459, -0.6795],
        [-0.5048,  1.0340],
        [-0.3866, -0.3515],
        [ 0.7413,  0.3133],
        [ 1.4544,  1.5210],
        [ 0.8863, -0.6586],
        [-0.0494, -1.1076],
        [ 0.4625,  0.6936],
        [ 2.5180, -0.3998]]) 
 tensor([[-3.4637],
        [ 5.5979],
        [-0.3302],
        [ 4.6179],
        [ 4.6279],
        [ 1.9334],
        [ 8.2053],
        [ 7.8500],
        [ 2.7761],
        [10.5994]])


In [10]:
w = torch.normal(0, 0.01, size=(2,1), requires_grad=True)
b = torch.zeros(1, requires_grad=True)
print(w)

tensor([[-0.0026],
        [-0.0014]], requires_grad=True)


In [12]:
def linreg(X, w, b): #@save
    """The linear regression model."""
    return torch.matmul(X, w) + b

In [13]:
def squared_loss(y_hat, y): #@save
    """Squared loss."""
    return (y_hat - y.reshape(y_hat.shape)) ** 2 / 2

In [15]:
def sgd(params, lr, batch_size): #@save
    """Minibatch stochastic gradient descent."""
    with torch.no_grad():
        for param in params:
            param -= lr * param.grad / batch_size
            param.grad.zero_()

In [16]:
lr = 0.03
num_epochs = 3
net = linreg
loss = squared_loss
for epoch in range(num_epochs):
    for X, y in data_iter(batch_size, features, labels):
        l = loss(net(X, w, b), y) # Minibatch loss in `X` and `y`
        # Compute gradient on `l` with respect to [`w`, `b`]
        l.sum().backward()
        sgd([w, b], lr, batch_size) # Update parameters using their gradient
    with torch.no_grad():
        train_l = loss(net(features, w, b), labels)
        print(f'epoch {epoch + 1}, loss {float(train_l.mean()):f}')

epoch 1, loss 0.026966
epoch 2, loss 0.000091
epoch 3, loss 0.000045


In [17]:
print(f'error in estimating w: {true_w - w.reshape(true_w.shape)}')
print(f'error in estimating b: {true_b - b}')

error in estimating w: tensor([-0.0003, -0.0004], grad_fn=<SubBackward0>)
error in estimating b: tensor([0.0007], grad_fn=<RsubBackward1>)
