In [3]:
!pip install mxnet

Looking in indexes: https://pypi.org/simple, https://us-python.pkg.dev/colab-wheels/public/simple/
Collecting mxnet
  Downloading mxnet-1.9.1-py3-none-manylinux2014_x86_64.whl (49.1 MB)
[K     |████████████████████████████████| 49.1 MB 1.3 MB/s 
[?25hCollecting graphviz<0.9.0,>=0.8.1
  Downloading graphviz-0.8.4-py2.py3-none-any.whl (16 kB)
Installing collected packages: graphviz, mxnet
  Attempting uninstall: graphviz
    Found existing installation: graphviz 0.10.1
    Uninstalling graphviz-0.10.1:
      Successfully uninstalled graphviz-0.10.1
Successfully installed graphviz-0.8.4 mxnet-1.9.1


In [33]:
%matplotlib inline
import mxnet 
from mxnet import autograd, np, npx
import random
npx.set_np()

## Generating Dataset

In [34]:
def synthetic_data(w, b, num_examples): 
  """Generate y = Xw + b + noise."""
  X = np.random.normal(0, 1, (num_examples, len(w)))
  y = np.dot(X, w) + b
  y += np.random.normal(0, 0.01, y.shape)
  return X, y.reshape((-1, 1))

In [35]:
true_w = np.array([2, -3.4])
true_b = 4.2
features, labels = synthetic_data(true_w, true_b, 1000)

## Reading Dataset

In [41]:
def data_iter(batch_size, features, labels):
  num_examples = len(features)
  indices = list(range(num_examples))
  # The examples are read at random, in no particular order

  random.shuffle(indices)
  for i in range(0, num_examples, batch_size):
    batch_indices = np.array(
                             indices[i: min(i + batch_size, num_examples)])
  yield features[batch_indices], labels[batch_indices]

## Init model parameters, model, loss fn, opt fn

In [42]:
w = np.random.normal(0, 0.01, (2, 1))
b = np.zeros(1)
w.attach_grad()
b.attach_grad()

In [43]:
def linreg(X, w, b): 
  """The linear regression model."""
  return np.dot(X, w) + b

def squared_loss(y_hat, y): 
  """Squared loss."""
  return (y_hat - y.reshape(y_hat.shape))**2 / 2

def sgd(params, lr, batch_size): 
  """Minibatch stochastic gradient descent."""
  for param in params:
    param[:] = param - lr * param.grad / batch_size

## Training

In [44]:
lr = 0.03
num_epochs = 300
net = linreg
loss = squared_loss
batch_size= 1000

In [45]:
for epoch in range(num_epochs):
  for X, y in data_iter(batch_size, features, labels):
    with autograd.record():
      l = loss(net(X, w, b), y) # Minibatch loss in `X` and `y`
  # Because `l` has a shape (`batch_size`, 1) and is not a scalar
  # variable, the elements in `l` are added together to obtain a new
  # variable, on which gradients with respect to [`w`, `b`] are computed

    l.backward()
    sgd([w, b], lr, batch_size) # Update parameters using their gradient
  train_l = loss(net(features, w, b), labels)
  if epoch%30 == 0:
    print(f'epoch {epoch + 1}, loss {float(train_l.mean()):f}')

epoch 1, loss 15.166262
epoch 31, loss 2.565142
epoch 61, loss 0.434754
epoch 91, loss 0.073871
epoch 121, loss 0.012613
epoch 151, loss 0.002192
epoch 181, loss 0.000416
epoch 211, loss 0.000112
epoch 241, loss 0.000060
epoch 271, loss 0.000051
