In [187]:
import numpy as np
from sklearn.preprocessing import StandardScaler

In [1]:
import pickle

with open('msd_full.pickle', 'rb') as f:
    data = pickle.load(f)

In [188]:
X_train = np.array(data['X_train'])
y_train = np.array(data['Y_train'])
X_test = np.array(data['X_test'])
y_test = np.array(data['Y_test'])

X_train

array([[  53.39967,   56.67781,   55.63508, ...,    1.3159 ,    2.37448,
           1.85999],
       [  42.83464,  -33.90478,   38.97704, ...,    9.72827, -175.51446,
         -15.8389 ],
       [  49.88591,   23.30563,   54.79012, ...,    1.94152,  101.27241,
           8.99355],
       ...,
       [  48.69997,   72.78532,   27.79159, ...,   -1.70515,   39.9382 ,
           4.83517],
       [  49.45505,   56.51035,   15.80259, ...,  -10.0645 ,   42.57409,
         -12.3541 ],
       [  40.31555,  -29.88336,    8.87396, ...,    5.81771,  -50.36502,
          -8.57407]])

In [195]:
scaler = StandardScaler()
X_train = scaler.fit_transform(X_train)
X_test = scaler.transform(X_test)

In [196]:
from sklearn.model_selection import train_test_split

X_subtrain, X_validation, y_subtrain, y_validation = train_test_split(X_train, y_train, test_size=0.1)

# Q1 (90%):
Train and tune the models listed above. Report test RMSE for each model setting.   
# OLS

In [197]:
X_train[0:10000]

array([[ 1.64730436,  1.07368824,  1.33234846, ..., -0.23489897,
        -0.0939435 ,  0.02434362],
       [-0.09061289, -0.68133197,  0.8599771 , ...,  0.39661381,
        -1.05632047, -0.77849533],
       [ 1.06930088,  0.42710869,  1.30838796, ..., -0.18793396,
         0.44109309,  0.34792887],
       ...,
       [ 0.74586166,  1.17827461,  0.69388217, ..., -0.53936303,
        -0.6605043 , -0.3324114 ],
       [ 0.64885934, -0.24098716,  0.05695651, ..., -1.08791946,
        -1.07260746, -3.00830262],
       [ 1.13937502,  0.07639194,  0.39806435, ..., -0.27280759,
         0.09099401, -0.15508672]])

In [199]:
from sklearn.linear_model import LinearRegression

reg = LinearRegression().fit(X_subtrain[0:10000], y_subtrain[0:10000])
y_pred = reg.predict(X_test)
rmse = np.sqrt(((y_pred - y_test) ** 2).mean())

rmse

9.560350209959099

# MLP_0_dm

## Loading package and data

In [204]:
import d2l
from mxnet import autograd, gluon, np, npx
npx.set_np()

In [343]:
def load_array(data_arrays, batch_size, is_train=False):
    """Construct a Gluon data loader"""
    dataset = gluon.data.ArrayDataset(*data_arrays)
    return gluon.data.DataLoader(dataset, batch_size, shuffle=is_train)

In [302]:
features = X_subtrain[0:10000].astype('float32')
labels = (y_subtrain[0:10000] - y_train.mean()).astype('float32')

batch_size = 10000
data_iter = load_array((features, labels), batch_size)

In [222]:
from mxnet.gluon import nn
from mxnet.gluon import loss as gloss
from mxnet import init
from mxnet import gluon

## Hyperparameter tuning

In [319]:
rates = [0.1, 0.05, 0.01]
for rate in rates:
    net = nn.Sequential()
    net.add(nn.Dense(1))
    net.initialize(init.Normal(sigma=0.01))
    loss = gloss.L2Loss()
    trainer = gluon.Trainer(net.collect_params(), 'sgd', {'learning_rate': rate})

    num_epochs = 3000
    for epoch in range(1, num_epochs + 1):
        for X, y in data_iter:
            with autograd.record():
                l = loss(net(X), y)
            l.backward()
            trainer.step(batch_size)
        l = loss(net(np.array(features)), y)
    
    print('epoch %d, loss: %f' % (epoch, l.mean().asnumpy()))
    print(" , rate = ", rate)
    
    pred = net(np.array(X_validation).astype('float32'))
    print("rmse: ", np.sqrt(((pred.reshape(-1,) - np.array(y_validation - y_train.mean())) ** 2).mean()))

epoch 3000, loss: 77.469025
 , rate =  0.1
rmse:  9.594734078489857
epoch 3000, loss: 76.727158
 , rate =  0.05
rmse:  9.594735320798623
epoch 3000, loss: 76.441711
 , rate =  0.01
rmse:  9.59521700024044


### We choose learing rate at 0.1 for our training.
## Training and testing

In [320]:
net = nn.Sequential()
net.add(nn.Dense(1))
net.initialize(init.Normal(sigma=0.01))
loss = gloss.L2Loss()
trainer = gluon.Trainer(net.collect_params(), 'sgd', {'learning_rate': 0.01})

num_epochs = 3000
for epoch in range(1, num_epochs + 1):
    for X, y in data_iter:
        with autograd.record():
            l = loss(net(X), y)
        l.backward()
        trainer.step(batch_size)
    l = loss(net(np.array(features)), y)
    
print('epoch %d, loss: %f' % (epoch, l.mean().asnumpy()))
print(" , rate = ", rate)

y_pred = net(np.array(X_test).astype('float32'))
rmse = np.sqrt(((y_pred.reshape(-1,) - np.array(y_test - y_train.mean())) ** 2).mean())

print(rmse)

epoch 3000, loss: 77.246964
 , rate =  0.01
9.56082492348468


# MLP_1_dm

In [303]:
features = X_subtrain[0:10000].astype('float32')
labels = (y_subtrain[0:10000] - y_train.mean()).astype('float32')

batch_size = 10000
data_iter = load_array((features, labels), batch_size)

In [322]:
rates = [0.1, 0.05, 0.01, 0.005]

for rate in rates:
    net = nn.Sequential()
    net.add(nn.Dense(45, activation='relu'),
            nn.Dense(1))
    net.initialize(init.Normal(sigma=0.01))

    trainer = gluon.Trainer(net.collect_params(), 'sgd', {'learning_rate': rate})

    for epoch in range(3000):
        train_loss= 0.
        for X, y in data_iter:
            # forward + backward
            with autograd.record():
                output = net(X)
                L2loss = gloss.L2Loss()
                loss = L2loss(output, y)
            loss.backward()
            # update parameters
            trainer.step(batch_size)
            # calculate training metrics
            train_loss += loss.mean()
    print("Epoch %d: loss %.3f" % (epoch, train_loss))
    print(" , rate = ", rate)
    
    pred = net(np.array(X_validation).astype('float32'))
    print("rmse: ", np.sqrt(((pred.reshape(-1,) - np.array(y_validation - y_train.mean())) ** 2).mean()))

Epoch 2999: loss 22.214
 , rate =  0.1
rmse:  11.788026028807177
Epoch 2999: loss 25.003
 , rate =  0.05
rmse:  11.365048931062686
Epoch 2999: loss 26.312
 , rate =  0.01
rmse:  10.141809658083018
Epoch 2999: loss 31.847
 , rate =  0.005
rmse:  9.484435148499303


### We choose learing rate at 0.005 for our training.
## Training and Testing

In [323]:
net = nn.Sequential()
net.add(nn.Dense(45, activation='relu'),
        nn.Dense(1))
net.initialize(init.Normal(sigma=0.01))

trainer = gluon.Trainer(net.collect_params(), 'sgd', {'learning_rate': 0.005})

for epoch in range(3000):
    train_loss= 0.
    for X, y in data_iter:
        # forward + backward
        with autograd.record():
            output = net(X)
            L2loss = gloss.L2Loss()
            loss = L2loss(output, y)
        loss.backward()
        # update parameters
        trainer.step(batch_size)
        # calculate training metrics
        train_loss += loss.mean()
print("Epoch %d: loss %.3f" % (epoch, train_loss))

Epoch 2999: loss 32.006


In [325]:
y_pred = net(np.array(X_test).astype('float32'))
rmse = np.sqrt(((y_pred.reshape(-1,) - np.array(y_test - y_train.mean())) ** 2).mean())

print(rmse)

9.51705727318843


# MLP_2_dm

In [391]:
features = X_subtrain[0:10000].astype('float32')
labels = (y_subtrain[0:10000] - y_train.mean()).astype('float32')

batch_size = 10000
data_iter = load_array((features, labels), batch_size)

# Hyperparameter tuning

In [392]:
rates = [0.1, 0.05, 0.01, 0.005]

for rate in rates:
    net = nn.Sequential()
    net.add(nn.Dense(45, activation='relu'),
            nn.Dense(45, activation='relu'),
            nn.Dense(1))
    net.initialize(init.Normal(sigma=0.01))

    trainer = gluon.Trainer(net.collect_params(), 'sgd', {'learning_rate': rate})

    for epoch in range(3000):
        train_loss= 0.
        for X, y in data_iter:
            # forward + backward
            with autograd.record():
                output = net(X)
                L2loss = gloss.L2Loss()
                loss = L2loss(output, y)
            loss.backward()
            # update parameters
            trainer.step(batch_size)
            # calculate training metrics
            train_loss += loss.mean()
    print("Epoch %d: loss %.3f" % (epoch, train_loss))
    print(" , rate = ", rate)
    
    pred = net(np.array(X_validation).astype('float32'))
    print("rmse: ", np.sqrt(((pred.reshape(-1,) - np.array(y_validation - y_train.mean())) ** 2).mean()))

Epoch 2999: loss 8.068
 , rate =  0.1
rmse:  12.771718529410819
Epoch 2999: loss 8.314
 , rate =  0.05
rmse:  11.519813252632463
Epoch 2999: loss 16.177
 , rate =  0.01
rmse:  11.689718268337575
Epoch 2999: loss 18.413
 , rate =  0.005
rmse:  10.112713427602086


### We choose learing rate at 0.005 for our training.
## Training and Testing

In [393]:
net = nn.Sequential()
net.add(nn.Dense(45, activation='relu'),
        nn.Dense(45, activation='relu'),
        nn.Dense(1))
net.initialize(init.Normal(sigma=0.01))

trainer = gluon.Trainer(net.collect_params(), 'sgd', {'learning_rate': 0.005})

for epoch in range(3000):
    train_loss= 0.
    for X, y in data_iter:
        # forward + backward
        with autograd.record():
            output = net(X)
            L2loss = gloss.L2Loss()
            loss = L2loss(output, y)
        loss.backward()
        # update parameters
        trainer.step(batch_size)
        # calculate training metrics
        train_loss += loss.mean()
print("Epoch %d: loss %.3f" % (epoch, train_loss))

Epoch 2999: loss 18.397


In [394]:
y_pred = net(np.array(X_test).astype('float32'))
rmse = np.sqrt(((y_pred.reshape(-1,) - np.array(y_test - y_train.mean())) ** 2).mean())

print(rmse)

10.780411714542982


# MLP_2_dm_L2

In [353]:
features = X_subtrain[0:10000].astype('float32')
labels = (y_subtrain[0:10000] - y_train.mean()).astype('float32')

batch_size = 10000
data_iter = load_array((features, labels), batch_size)

# Hyperparameter tuning

In [374]:
wds = [5, 1, 0.5, 0]

for wd in wds:
    net = nn.Sequential()
    net.add(nn.Dense(45, activation='relu'),
            nn.Dense(45, activation='relu'),
            nn.Dense(1))
    net.initialize(init.Normal(sigma=0.01))

    trainer = gluon.Trainer(net.collect_params(), 'sgd', {'learning_rate': 0.005, 'wd': wd})

    for epoch in range(3000):
        train_loss= 0.
        for X, y in data_iter:
            # forward + backward
            with autograd.record():
                output = net(X)
                L2loss = gloss.L2Loss()
                loss = L2loss(output, y)
            loss.backward()
            # update parameters
            trainer.step(batch_size)
            # calculate training metrics
            train_loss += loss.mean()
    print("Epoch %d: loss %.3f" % (epoch, train_loss))
    print(" , wd lambda = ", wd)
    
    pred = net(np.array(X_validation).astype('float32'))
    print("rmse: ", np.sqrt(((pred.reshape(-1,) - np.array(y_validation - y_train.mean())) ** 2).mean()))

Epoch 2999: loss 60.907
 , wd lambda =  5
rmse:  10.929705132185605
Epoch 2999: loss 60.906
 , wd lambda =  1
rmse:  10.929751922515324
Epoch 2999: loss 60.906
 , wd lambda =  0.5
rmse:  10.929792581386222
Epoch 2999: loss 9.654
 , wd lambda =  0
rmse:  12.203502508993948


### We choose weight-decay lambda at 5 for our training.
## Training and Testing

In [395]:
wd = 5

net = nn.Sequential()
net.add(nn.Dense(45, activation='relu'),
        nn.Dense(45, activation='relu'),
        nn.Dense(1))
net.initialize(init.Normal(sigma=0.01))

trainer = gluon.Trainer(net.collect_params(), 'sgd', {'learning_rate': 0.005, 'wd': wd})
net.collect_params('.*bias').setattr('wd_mult', 0)

for epoch in range(3000):
    train_loss= 0.
    for X, y in data_iter:
        # forward + backward
        with autograd.record():
            output = net(X)
            L2loss = gloss.L2Loss()
            loss = L2loss(output, y)
        loss.backward()
        # update parameters
        trainer.step(batch_size)
        # calculate training metrics
        train_loss += loss.mean()
print("Epoch %d: loss %.3f" % (epoch, train_loss))

Epoch 2999: loss 60.906


In [396]:
y_pred = net(np.array(X_test).astype('float32'))
rmse = np.sqrt(((y_pred.reshape(-1,) - np.array(y_test - y_train.mean())) ** 2).mean())

print(rmse)

10.853352511451803


# MLP_2_dm_dropout	

In [377]:
features = X_subtrain[0:10000].astype('float32')
labels = (y_subtrain[0:10000] - y_train.mean()).astype('float32')

batch_size = 10000
data_iter = load_array((features, labels), batch_size)

In [382]:
net = nn.Sequential()
net.add(nn.Dense(45, activation='relu'),
        nn.Dropout(0.5),
        nn.Dense(45, activation='relu'),
        nn.Dropout(0.5),
        nn.Dense(1))
net.initialize(init.Normal(sigma=0.01))

trainer = gluon.Trainer(net.collect_params(), 'sgd', {'learning_rate': 0.005})
net.collect_params('.*bias').setattr('wd_mult', 0)

for epoch in range(3000):
    train_loss= 0.
    for X, y in data_iter:
        # forward + backward
        with autograd.record():
            output = net(X)
            L2loss = gloss.L2Loss()
            loss = L2loss(output, y)
        loss.backward()
        # update parameters
        trainer.step(batch_size)
        # calculate training metrics
        train_loss += loss.mean()
print("Epoch %d: loss %.3f" % (epoch, train_loss))

Epoch 2999: loss 28.869


In [383]:
y_pred = net(np.array(X_test).astype('float32'))
rmse = np.sqrt(((y_pred.reshape(-1,) - np.array(y_test - y_train.mean())) ** 2).mean())

print(rmse)

9.694782538082917


# MLP_2_ykeep

In [413]:
features = X_subtrain[0:10000].astype('float32')
labels = (y_subtrain[0:10000]).astype('float32')

batch_size = 10000
data_iter = load_array((features, labels), batch_size)

# Hyperparameter tuning

In [422]:
rates = [1, 0.5, 0.1, 0.05]

for rate in rates:
    net = nn.Sequential()
    net.add(nn.Dense(45, activation='relu'),
            nn.Dense(45, activation='relu'),
            nn.Dense(1))
    net.initialize(init.Normal(sigma=0.1))

    trainer = gluon.Trainer(net.collect_params(), 'sgd', {'learning_rate': rate})

    for epoch in range(3000):
        train_loss= 0.
        for X, y in data_iter:
            # forward + backward
            with autograd.record():
                output = net(X)
                L2loss = gloss.L2Loss()
                loss = L2loss(output, y)
            loss.backward()
            # update parameters
            trainer.step(batch_size)
            # calculate training metrics
            train_loss += loss.mean()
    print("Epoch %d: loss %.3f" % (epoch, train_loss))
    print(" , rate = ", rate)
    
    pred = net(np.array(X_validation).astype('float32'))
    print("rmse: ", np.sqrt(((pred.reshape(-1,) - np.array(y_validation)) ** 2).mean()))

Epoch 2999: loss 60.906
 , rate =  1
rmse:  10.929908131989963
Epoch 2999: loss 60.906
 , rate =  0.5
rmse:  10.929908887816316
Epoch 2999: loss 60.906
 , rate =  0.1
rmse:  10.929911924754585
Epoch 2999: loss 60.906
 , rate =  0.05
rmse:  10.929915751601321


### We choose rate at 1 for our training.
## Training and Testing

In [423]:
net = nn.Sequential()
net.add(nn.Dense(45, activation='relu'),
        nn.Dense(45, activation='relu'),
        nn.Dense(1))
net.initialize(init.Normal(sigma=0.1))

trainer = gluon.Trainer(net.collect_params(), 'sgd', {'learning_rate': 1})

for epoch in range(3000):
    train_loss= 0.
    for X, y in data_iter:
        # forward + backward
        with autograd.record():
            output = net(X)
            L2loss = gloss.L2Loss()
            loss = L2loss(output, y)
        loss.backward()
        # update parameters
        trainer.step(batch_size)
        # calculate training metrics
        train_loss += loss.mean()
print("Epoch %d: loss %.3f" % (epoch, train_loss))

Epoch 2999: loss 60.906


In [424]:
y_pred = net(np.array(X_test).astype('float32'))
rmse = np.sqrt(((y_pred.reshape(-1,) - np.array(y_test)) ** 2).mean())

print(rmse)

10.853351746156997


# MLP_2_ykeep_L2

In [433]:
features = X_subtrain[0:10000].astype('float32')
labels = (y_subtrain[0:10000]).astype('float32')

batch_size = 10000
data_iter = load_array((features, labels), batch_size)

# Hyperparameter tuning

In [430]:
wds = [1, 0.5, 0]

for wd in wds:
    net = nn.Sequential()
    net.add(nn.Dense(45, activation='relu'),
            nn.Dense(45, activation='relu'),
            nn.Dense(1))
    net.initialize(init.Normal(sigma=0.1))

    trainer = gluon.Trainer(net.collect_params(), 'sgd', {'learning_rate': 0.1, 'wd': wd})

    for epoch in range(3000):
        train_loss= 0.
        for X, y in data_iter:
            # forward + backward
            with autograd.record():
                output = net(X)
                L2loss = gloss.L2Loss()
                loss = L2loss(output, y)
            loss.backward()
            # update parameters
            trainer.step(batch_size)
            # calculate training metrics
            train_loss += loss.mean()
    print("Epoch %d: loss %.3f" % (epoch, train_loss))
    print(" , wd lambda = ", wd)
    
    pred = net(np.array(X_validation).astype('float32'))
    print("rmse: ", np.sqrt(((pred.reshape(-1,) - np.array(y_validation)) ** 2).mean()))

Epoch 2999: loss 499220.750
 , wd lambda =  1
rmse:  999.286933469116
Epoch 2999: loss 221909.938
 , wd lambda =  0.5
rmse:  666.2639573799345
Epoch 2999: loss 60.906
 , wd lambda =  0
rmse:  10.929911924754585


In [437]:
wds = [0.01, 0.005, 0.001]

for wd in wds:
    net = nn.Sequential()
    net.add(nn.Dense(45, activation='relu'),
            nn.Dense(45, activation='relu'),
            nn.Dense(1))
    net.initialize(init.Normal(sigma=0.1))

    trainer = gluon.Trainer(net.collect_params(), 'sgd', {'learning_rate': 0.1, 'wd': wd})

    for epoch in range(3000):
        train_loss= 0.
        for X, y in data_iter:
            # forward + backward
            with autograd.record():
                output = net(X)
                L2loss = gloss.L2Loss()
                loss = L2loss(output, y)
            loss.backward()
            # update parameters
            trainer.step(batch_size)
            # calculate training metrics
            train_loss += loss.mean()
    print("Epoch %d: loss %.3f" % (epoch, train_loss))
    print(" , wd lambda = ", wd)
    
    pred = net(np.array(X_validation).astype('float32'))
    print("rmse: ", np.sqrt(((pred.reshape(-1,) - np.array(y_validation)) ** 2).mean()))

Epoch 2999: loss 256.646
 , wd lambda =  0.01
rmse:  22.663183547949902
Epoch 2999: loss 110.332
 , wd lambda =  0.005
rmse:  14.820958140469005
Epoch 2999: loss 62.900
 , wd lambda =  0.001
rmse:  11.122982395885943


### We choose weight-decay lambda at 0.001 for our training.
## Training and Testing

In [438]:
wd = 0.001

net = nn.Sequential()
net.add(nn.Dense(45, activation='relu'),
        nn.Dense(45, activation='relu'),
        nn.Dense(1))
net.initialize(init.Normal(sigma=0.1))

trainer = gluon.Trainer(net.collect_params(), 'sgd', {'learning_rate': 0.1, 'wd': wd})
net.collect_params('.*bias').setattr('wd_mult', 0)

for epoch in range(3000):
    train_loss= 0.
    for X, y in data_iter:
        # forward + backward
        with autograd.record():
            output = net(X)
            L2loss = gloss.L2Loss()
            loss = L2loss(output, y)
        loss.backward()
        # update parameters
        trainer.step(batch_size)
        # calculate training metrics
        train_loss += loss.mean()
print("Epoch %d: loss %.3f" % (epoch, train_loss))

Epoch 2999: loss 60.906


In [439]:
y_pred = net(np.array(X_test).astype('float32'))
rmse = np.sqrt(((y_pred.reshape(-1,) - np.array(y_test)) ** 2).mean())

print(rmse)

10.85336171210052


# MLP_2_ykeep_dropout

In [448]:
features = X_subtrain[0:10000].astype('float32')
labels = (y_subtrain[0:10000]).astype('float32')

batch_size = 10000
data_iter = load_array((features, labels), batch_size)

In [454]:
net = nn.Sequential()
net.add(nn.Dense(45, activation='relu'),
        nn.Dropout(0.5),
        nn.Dense(45, activation='relu'),
        nn.Dropout(0.5),
        nn.Dense(1))
net.initialize(init.Normal(sigma=0.1))

trainer = gluon.Trainer(net.collect_params(), 'sgd', {'learning_rate': 1})
net.collect_params('.*bias').setattr('wd_mult', 0)

for epoch in range(3000):
    train_loss= 0.
    for X, y in data_iter:
        # forward + backward
        with autograd.record():
            output = net(X)
            L2loss = gloss.L2Loss()
            loss = L2loss(output, y)
        loss.backward()
        # update parameters
        trainer.step(batch_size)
        # calculate training metrics
        train_loss += loss.mean()
print("Epoch %d: loss %.3f" % (epoch, train_loss))

Epoch 2999: loss 60.906


In [455]:
y_pred = net(np.array(X_test).astype('float32'))
rmse = np.sqrt(((y_pred.reshape(-1,) - np.array(y_test)) ** 2).mean())

print(rmse)

10.853351746156997


# MLP_2_dm_dropout_full	

In [457]:
features = X_subtrain.astype('float32')
labels = (y_subtrain - y_train.mean()).astype('float32')

batch_size = len(X_subtrain)
data_iter = load_array((features, labels), batch_size)

# Hyperparameter tuning

In [466]:
rates = [0.5, 0.1, 0.05]    
for rate in rates:
    net = nn.Sequential()
    net.add(nn.Dense(45, activation='relu'),
            nn.Dropout(0.5),
            nn.Dense(45, activation='relu'),
            nn.Dropout(0.5),
            nn.Dense(1))
    net.initialize(init.Normal(sigma=0.01))

    trainer = gluon.Trainer(net.collect_params(), 'sgd', {'learning_rate': rate})
    net.collect_params('.*bias').setattr('wd_mult', 0)

    for epoch in range(45):
        train_loss= 0.
        for X, y in data_iter:
            # forward + backward
            with autograd.record():
                output = net(X)
                L2loss = gloss.L2Loss()
                loss = L2loss(output, y)
            loss.backward()
            # update parameters
            trainer.step(batch_size)
            # calculate training metrics
            train_loss += loss.mean()
    print("Epoch %d: loss %.3f" % (epoch, train_loss))
    print(" , rate = ", rate)
    
    pred = net(np.array(X_validation).astype('float32'))
    print("rmse: ", np.sqrt(((pred.reshape(-1,) - np.array(y_validation - y_train.mean())) ** 2).mean()))

Epoch 44: loss 59.852
 , rate =  0.5
rmse:  10.929698997234798
Epoch 44: loss 59.840
 , rate =  0.1
rmse:  10.928396746302868
Epoch 44: loss 59.850
 , rate =  0.05
rmse:  10.929526493338702


In [468]:
sigmas = [0.05, 0.01, 0.005]    
for sigma in sigmas:
    net = nn.Sequential()
    net.add(nn.Dense(45, activation='relu'),
            nn.Dropout(0.5),
            nn.Dense(45, activation='relu'),
            nn.Dropout(0.5),
            nn.Dense(1))
    net.initialize(init.Normal(sigma=sigma))

    trainer = gluon.Trainer(net.collect_params(), 'sgd', {'learning_rate': 0.1})
    net.collect_params('.*bias').setattr('wd_mult', 0)

    for epoch in range(45):
        train_loss= 0.
        for X, y in data_iter:
            # forward + backward
            with autograd.record():
                output = net(X)
                L2loss = gloss.L2Loss()
                loss = L2loss(output, y)
            loss.backward()
            # update parameters
            trainer.step(batch_size)
            # calculate training metrics
            train_loss += loss.mean()
    print("Epoch %d: loss %.3f" % (epoch, train_loss))
    print(" , sigma = ", sigma)
    
    pred = net(np.array(X_validation).astype('float32'))
    print("rmse: ", np.sqrt(((pred.reshape(-1,) - np.array(y_validation - y_train.mean())) ** 2).mean()))

Epoch 44: loss 58.475
 , sigma =  0.05
rmse:  10.447137595888048
Epoch 44: loss 59.831
 , sigma =  0.01
rmse:  10.927359071768528
Epoch 44: loss 59.851
 , sigma =  0.005
rmse:  10.929675632377949


## We choose learning rate at 0.1 for training and normal distribution sigma = 0.05 for initialization.

In [469]:
net = nn.Sequential()
net.add(nn.Dense(45, activation='relu'),
        nn.Dropout(0.5),
        nn.Dense(45, activation='relu'),
        nn.Dropout(0.5),
        nn.Dense(1))
net.initialize(init.Normal(sigma=0.05))

trainer = gluon.Trainer(net.collect_params(), 'sgd', {'learning_rate': 0.1})
net.collect_params('.*bias').setattr('wd_mult', 0)

for epoch in range(45):
    train_loss= 0.
    for X, y in data_iter:
        # forward + backward
        with autograd.record():
            output = net(X)
            L2loss = gloss.L2Loss()
            loss = L2loss(output, y)
        loss.backward()
        # update parameters
        trainer.step(batch_size)
        # calculate training metrics
        train_loss += loss.mean()
print("Epoch %d: loss %.3f" % (epoch, train_loss))

Epoch 44: loss 51.525


In [470]:
y_pred = net(np.array(X_test).astype('float32'))
rmse = np.sqrt(((y_pred.reshape(-1,) - np.array(y_test - y_train.mean())) ** 2).mean())

print(rmse)

9.688246102835802


# Q2 (15%):
Summarize test RMSE in one table. Discuss your findings.

In [472]:
import pandas as pd

rmses = [9.560350209959099,  9.56082492348468, 9.51705727318843, 10.780411714542982, 10.853352511451803,
         9.694782538082917, 10.853351746156997, 10.85336171210052, 10.853351746156997, 9.688246102835802]

cases = ["OLS", "MLP_0_dm", "MLP_1_dm", "MLP_2_dm", "MLP_2_dm_L2", "MLP_2_dm_dropout", 
        "MLP_2_ykeep", "MLP_2_ykeep_L2", "MLP_2_ykeep_dropout", "MLP_2_dm_dropout_full"]

table = pd.DataFrame([cases, rmses]).T
table.columns = ["Case", "RMSE"]
table

Unnamed: 0,Case,RMSE
0,OLS,9.56035
1,MLP_0_dm,9.56082
2,MLP_1_dm,9.51706
3,MLP_2_dm,10.7804
4,MLP_2_dm_L2,10.8534
5,MLP_2_dm_dropout,9.69478
6,MLP_2_ykeep,10.8534
7,MLP_2_ykeep_L2,10.8534
8,MLP_2_ykeep_dropout,10.8534
9,MLP_2_dm_dropout_full,9.68825


* 效果最好的為單層的MLP，OLS次之
* 對Y進行的De-mean效果好於未De-mean的版本
* 在有De-mean的情況下：
  + 使用Weight-decay會降低效果，可能因為Model不夠powerful
  + 使用Dropout則顯著提升效果
* 未De-mean的話則效果不顯著
* 在De-mean及Drop-out的情況下，使用全部的資料效果好於只使用前10000筆