Source: [D2L section 3.2](https://d2l.ai/chapter_linear-networks/linear-regression-scratch.html)


In [None]:
#!pip install d2l==0.17.0
%matplotlib inline
import random
import numpy as np
import tensorflow as tf
# from d2l import tensorflow as d2l
# for easier reading np
np.set_printoptions(precision=4,suppress=True)

**Generating the Dataset**

In [None]:
def synthetic_data(w, b, num_examples):
    """Generate y = Xw + b + noise."""
    X = tf.zeros((num_examples, w.shape[0]))
    X += tf.random.normal(shape=X.shape)
    y = tf.matmul(X, tf.reshape(w, (-1, 1))) + b
    y += tf.random.normal(shape=y.shape, stddev=0.01)
    y = tf.reshape(y, (-1, 1))
    return X, y

In [None]:
true_w = tf.constant([2, -3.4])
true_b = 4.2
features, labels = synthetic_data(true_w, true_b, 1000)

In [None]:
print('features:', features[0], '\nlabel:', labels[0])
# d2l.set_figsize()
# The semicolon is for displaying the plot only
# d2l.plt.scatter(features[:, (1)].numpy(), labels.numpy(), 1);

features: tf.Tensor([-0.2746 -0.2678], shape=(2,), dtype=float32) 
label: tf.Tensor([4.5744], shape=(1,), dtype=float32)


**Reading the Dataset**

In [None]:
def data_iter(batch_size, features, labels):
    num_examples = len(features)
    indices = list(range(num_examples))
    # The examples are read at random, in no particular order
    random.shuffle(indices)
    for i in range(0, num_examples, batch_size):
        j = tf.constant(indices[i:min(i + batch_size, num_examples)])
        yield tf.gather(features, j), tf.gather(labels, j)

In [None]:
batch_size = 10

for X, y in data_iter(batch_size, features, labels):
    print(X, '\n', y)
    break

tf.Tensor(
[[-0.2951  1.1452]
 [ 1.7754  0.7784]
 [-1.2155 -0.8912]
 [-1.4991  0.3303]
 [-0.6442 -0.3912]
 [ 0.1752 -1.0567]
 [ 0.3709  1.5473]
 [-0.6878 -1.4764]
 [-1.3002 -1.0613]
 [ 1.2171 -1.4363]], shape=(10, 2), dtype=float32) 
 tf.Tensor(
[[-0.2941]
 [ 5.0931]
 [ 4.7934]
 [ 0.0872]
 [ 4.2347]
 [ 8.149 ]
 [-0.3017]
 [ 7.8397]
 [ 5.1994]
 [11.5042]], shape=(10, 1), dtype=float32)


**Initializing Model Parameters**

In [None]:
w = tf.Variable(tf.random.normal(shape=(2, 1), mean=0, stddev=0.01),
                trainable=True)
b = tf.Variable(tf.zeros(1), trainable=True)

**Defining the Model**

In [None]:
def linreg(X, w, b): 
    """The linear regression model."""
    return tf.matmul(X, w) + b

**Defining the Loss Function**

In [None]:
def squared_loss(y_hat, y):
    """Squared loss."""
    return (y_hat - tf.reshape(y, y_hat.shape))**2 / 2

**Defining the Optimization Algorithm**

In [None]:
def sgd(params, grads, lr, batch_size):
    """Minibatch stochastic gradient descent."""
    for param, grad in zip(params, grads):
        param.assign_sub(lr * grad / batch_size)

**Training**

In [None]:
lr = 0.03
num_epochs = 3
net = linreg
loss = squared_loss

In [None]:
for epoch in range(num_epochs):
    for X, y in data_iter(batch_size, features, labels):
        with tf.GradientTape() as g:
            l = loss(net(X, w, b), y)  # Minibatch loss in `X` and `y`
        # Compute gradient on l with respect to [`w`, `b`]
        dw, db = g.gradient(l, [w, b])
        # Update parameters using their gradient
        sgd([w, b], [dw, db], lr, batch_size)
    train_l = loss(net(features, w, b), labels)
    print(f'epoch {epoch + 1}, loss {float(tf.reduce_mean(train_l)):f}')

epoch 1, loss 0.039831
epoch 2, loss 0.000153
epoch 3, loss 0.000049


In [None]:
print(f'error in estimating w: {true_w - tf.reshape(w, true_w.shape)}')
print(f'error in estimating b: {true_b - b}')

error in estimating w: [ 0.0008 -0.0011]
error in estimating b: [-0.0003]
