In [1]:
from mxnet import np, npx
from d2l import mxnet as d2l
from mxnet.gluon.loss import L2Loss
import mxnet.gluon.nn as nn
from mxnet.gluon.trainer import Trainer
from mxnet import init
from mxnet import autograd
from pdb import set_trace
npx.set_np()

## Create Dataset

In [2]:
p = 200
n = 200
X = np.random.normal(size=(n, p))
w = np.random.normal(size=(p, 1))
b = np.random.normal()
y = np.dot(X, w) + b

In [3]:
dataset_size = len(X)
train_size_fraction = 0.1
shuffled_indices = np.random.shuffle(np.array(range(dataset_size)))
train_indices = shuffled_indices[:int(train_size_fraction*dataset_size)]
test_indices = shuffled_indices[int(train_size_fraction*dataset_size):]

In [4]:
train_ds, test_ds = (X[train_indices, :], y[train_indices]), (X[test_indices, :], y[test_indices])

In [5]:
def get_iterator(ds, batch_size=100):
    ds_size = len(ds[0])
    for start_idx in range(0, ds_size, batch_size):
        indices = range(start_idx, min(start_idx + batch_size, ds_size))
        yield ds[0][indices], ds[1][indices]

In [6]:
train_dl, test_dl = get_iterator(train_ds), get_iterator(test_ds)

## Define Network

In [17]:
model = nn.Sequential()
with model.name_scope():
    model.add(nn.Dense(1))
model.initialize(init.Normal(sigma=0.01))

## Define Loss

In [10]:
def loss_fn(yhat, y):
    return L2Loss()(yhat, y)

## Define Optimization Loop

In [11]:
def get_ds_loss(ds, model):
    mse = 0
    num_samples = 0
    for X, y in get_iterator(ds):
        yhat = model(X)
        mse += ((yhat - y)**2).sum()
        num_samples += y.shape[0]
    return mse/num_samples

In [21]:
num_epochs = 100
lr = 0.003
wd = 3
first = True
trainer = Trainer(model.collect_params(), 'sgd', {'learning_rate': lr, 'wd': wd})
model.collect_params('.*bias').setattr('wd_mult', 0)
for epoch in range(num_epochs):
    for features, labels in get_iterator(train_ds):
        batch_size = features.shape[0]
        if first:
            print(f'Batch size={batch_size}')
            first = False
        with autograd.record():
            yhat = model(features)
            loss = loss_fn(yhat, labels)
        loss.backward()
        trainer.step(batch_size)
    print(f'Epoch: {epoch}, train_loss: {get_ds_loss(train_ds, model)}, test_loss: {get_ds_loss(test_ds, model)}')

Batch size=20
Epoch: 0, train_loss: 11.905947, test_loss: 214.86436
Epoch: 1, train_loss: 11.9028, test_loss: 214.86375
Epoch: 2, train_loss: 11.899744, test_loss: 214.86314
Epoch: 3, train_loss: 11.896772, test_loss: 214.86263
Epoch: 4, train_loss: 11.893879, test_loss: 214.86209
Epoch: 5, train_loss: 11.891066, test_loss: 214.86159
Epoch: 6, train_loss: 11.88833, test_loss: 214.86111
Epoch: 7, train_loss: 11.885667, test_loss: 214.86067
Epoch: 8, train_loss: 11.883078, test_loss: 214.86029
Epoch: 9, train_loss: 11.88056, test_loss: 214.8599
Epoch: 10, train_loss: 11.878111, test_loss: 214.85953
Epoch: 11, train_loss: 11.8757305, test_loss: 214.8592
Epoch: 12, train_loss: 11.873411, test_loss: 214.85887
Epoch: 13, train_loss: 11.871157, test_loss: 214.8586
Epoch: 14, train_loss: 11.868963, test_loss: 214.8583
Epoch: 15, train_loss: 11.866831, test_loss: 214.85805
Epoch: 16, train_loss: 11.864754, test_loss: 214.85785
Epoch: 17, train_loss: 11.862736, test_loss: 214.85764
Epoch: 18, tr

In [13]:
next(get_iterator(train_ds))[0].shape

(20, 200)