## Q1

In [1]:
import numpy as np
import pickle
import mxnet as mx

In [25]:
from sklearn.preprocessing import StandardScaler

In [32]:
from mxnet import gluon, init, npx, autograd
from mxnet.gluon import nn
from mxnet.gluon.data.dataset import ArrayDataset
from mxnet.gluon.data import DataLoader
from mxnet.gluon.loss import L2Loss

In [7]:
file = open('msd_full.pickle', 'rb')
raw_data = pickle.load(file)

In [40]:
x_train = raw_data['X_train'].astype('float32')
y_train = raw_data['Y_train'].astype('float32')
x_test = raw_data['X_test'].astype('float32')
y_test = raw_data['Y_test'].astype('float32')

In [41]:
scaler = StandardScaler()
x_train = scaler.fit_transform(x_train)
x_test = scaler.transform(x_test)

In [42]:
x_train_10000 = x_train[0:10000]
y_train_10000 = y_train[0:10000]
x_subtrain = x_train[0:int(len(x_train)*0.9)]
y_subtrain = y_train[0:int(len(y_train)*0.9)]
x_tune = x_train[int(len(x_train)*0.9):]
y_tune = y_train[int(len(y_train)*0.9):]

In [51]:
y_train_mean = y_train.mean()

### 1. OLS
- y: keep
- num of obs.: 10,000
- model: linear regression
- regularization: none

In [84]:
from sklearn.linear_model import LinearRegression
from sklearn import metrics

In [86]:
regressor = LinearRegression()  
regressor.fit(x_train_10000, y_train_10000)
y_pred = regressor.predict(x_test)
print('test rmse', np.sqrt(metrics.mean_squared_error(y_test, y_pred)))

test rmse 9.550725


### 2. MLP_0_dm
- y: de-mean
- num of obs: 10,000
- model: MLP, no hidden layers
- regularization: None

In [89]:
ctx =  mx.gpu() if mx.context.num_gpus() else mx.cpu()
y_train_demean = y_train_10000 - y_train_mean
y_tune_demean = y_tune - y_train_mean
y_test_demean = y_test - y_train_mean
y_train_fortest = y_train - y_train_mean

dataset = ArrayDataset(x_train_10000, y_train_demean)
train_data_loader = mx.gluon.data.DataLoader(dataset, batch_size = 512)
dataset_tune = ArrayDataset(x_tune, y_tune_demean)
tune_data_loader = mx.gluon.data.DataLoader(dataset_tune, batch_size = 512)

## for test
dataset_fortest = ArrayDataset(x_train, y_train_fortest)
train_dara_loader_fortest = mx.gluon.data.DataLoader(dataset_fortest, batch_size = 512)
dataset_test = ArrayDataset(x_test, y_test_demean)
test_data_loader = mx.gluon.data.DataLoader(dataset_test, batch_size = 512)

In [90]:
# construct and initialize network.
net = nn.Sequential()
net.add(nn.Dense(1))

# define loss and trainer.
criterion = gluon.loss.L2Loss()

epochs = 1000
lr = [0.1, 0.05, 0.01, 0.005, 0.001, 0.0005, 0.0001]

In [91]:
result_lr = {}

for index in lr:
    net.initialize(init.Normal(sigma=0.28993590), force_reinit = True, ctx=ctx)
    trainer = gluon.Trainer(net.collect_params(), 'sgd', {'learning_rate': index})
    train_history = []
    
    for epoch in range(epochs):
        cumulative_train_loss = mx.nd.zeros(1, ctx=ctx)
        training_samples = 0
        for x, y in train_data_loader:
            with autograd.record():
                y_train_pred = net(x)
                loss = criterion(y_train_pred, y)
            loss.backward()
            trainer.step(512)
            cumulative_train_loss += loss.sum()
            training_samples += x.shape[0]
        train_loss = cumulative_train_loss.asscalar()/training_samples
        train_rmse = np.sqrt(train_loss)

        # validation loop
        cumulative_valid_loss = mx.nd.zeros(1, ctx)
        valid_samples = 0
        for x, y in tune_data_loader:
            y_tune_pred = net(x)
            loss = criterion(y_tune_pred, y)
            cumulative_valid_loss += loss.sum()
            valid_samples += x.shape[0]
        valid_loss = cumulative_valid_loss.asscalar()/valid_samples
        valid_rmse = np.sqrt(valid_loss)
        train_history.append(valid_rmse)
        if len(train_history) > 50:
            if train_history[-51] < min(train_history[-50:]):
                print("Early Stopped", len(train_history))
                break
        
    result_lr[index] = valid_rmse
            
## use the best lr for testing
best_lr, _ = sorted(result_lr.items(), key = lambda item: item[1])[0]
print("best lr:", best_lr)

##########

net.initialize(init.Normal(sigma=0.28993590), force_reinit = True, ctx=ctx)
trainer = gluon.Trainer(net.collect_params(), 'sgd', {'learning_rate': best_lr})
train_history = []

for epoch in range(epochs):
    cumulative_train_loss = mx.nd.zeros(1, ctx=ctx)
    training_samples = 0
    for x, y in train_dara_loader_fortest:
        with autograd.record():
            y_train_pred = net(x)
            loss = criterion(y_train_pred, y)
        loss.backward()
        trainer.step(512)
        cumulative_train_loss += loss.sum()
        training_samples += x.shape[0]
    train_loss = cumulative_train_loss.asscalar()/training_samples
    train_rmse = np.sqrt(train_loss)

    # test loop
    cumulative_test_loss = mx.nd.zeros(1, ctx)
    test_samples = 0
    for x, y in test_data_loader:
        y_test_pred = net(x)
        loss = criterion(y_test_pred, y)
        cumulative_test_loss += loss.sum()
        test_samples += x.shape[0]
    test_loss = cumulative_test_loss.asscalar()/test_samples
    test_rmse = np.sqrt(test_loss)
#         if epoch%20 == 0:
#             print('epoch:', epoch, 'loss:', valid_rmse)
    train_history.append(test_rmse)
    if len(train_history) > 50:
        if train_history[-51] < min(train_history[-50:]):
            print("Early Stopped", len(train_history))
            break
print('test rmse:', test_rmse)

best lr: 0.05
test rmse: 6.735885451678123


### 3. MLP_1_dm
- y: de-mean
- num of obs: 10,000
- model: MLP, one hidden layer + ReLU
- regularization: None

In [92]:
y_train_demean = y_train_10000 - y_train_mean
y_tune_demean = y_tune - y_train_mean
y_test_demean = y_test - y_train_mean
y_train_fortest = y_train - y_train_mean

dataset = ArrayDataset(x_train_10000, y_train_demean)
train_data_loader = mx.gluon.data.DataLoader(dataset, batch_size = 512)
dataset_tune = ArrayDataset(x_tune, y_tune_demean)
tune_data_loader = mx.gluon.data.DataLoader(dataset_tune, batch_size = 512)

## for test
dataset_fortest = ArrayDataset(x_train, y_train_fortest)
train_dara_loader_fortest = mx.gluon.data.DataLoader(dataset_fortest, batch_size = 512)
dataset_test = ArrayDataset(x_test, y_test_demean)
test_data_loader = mx.gluon.data.DataLoader(dataset_test, batch_size = 512)

In [93]:
# construct and initialize network.
ctx =  mx.gpu() if mx.context.num_gpus() else mx.cpu()

net = nn.Sequential()
net.add(nn.Dense(45, activation="relu"))
net.add(nn.Dense(1, activation="relu"))

# define loss and trainer.
criterion = gluon.loss.L2Loss()

# epochs = 1000
# lr = [0.1, 0.05, 0.01, 0.005, 0.001, 0.0005, 0.0001]

In [94]:
result_lr = {}

for index in lr:
    net.initialize(init.Normal(sigma=0.28993590), force_reinit = True, ctx=ctx)
    trainer = gluon.Trainer(net.collect_params(), 'sgd', {'learning_rate': index})
    train_history = []
    
    for epoch in range(epochs):
        cumulative_train_loss = mx.nd.zeros(1, ctx=ctx)
        training_samples = 0
        for x, y in train_data_loader:
            with autograd.record():
                y_train_pred = net(x)
                loss = criterion(y_train_pred, y)
            loss.backward()
            trainer.step(512)
            cumulative_train_loss += loss.sum()
            training_samples += x.shape[0]
        train_loss = cumulative_train_loss.asscalar()/training_samples
        train_rmse = np.sqrt(train_loss)

        # validation loop
        cumulative_valid_loss = mx.nd.zeros(1, ctx)
        valid_samples = 0
        for x, y in tune_data_loader:
            y_tune_pred = net(x)
            loss = criterion(y_tune_pred, y)
            cumulative_valid_loss += loss.sum()
            valid_samples += x.shape[0]
        valid_loss = cumulative_valid_loss.asscalar()/valid_samples
        valid_rmse = np.sqrt(valid_loss)
#         if epoch%20 == 0:
#             print('epoch:', epoch, 'loss:', valid_rmse)
        train_history.append(valid_rmse)
        if len(train_history) > 50:
            if train_history[-51] < min(train_history[-50:]):
                print("Early Stopped", len(train_history))
                break
        
    result_lr[index] = valid_rmse
            
## use the best lr for testing
best_lr, _ = sorted(result_lr.items(), key = lambda item: item[1])[0]
print("best lr:", best_lr)

##########

net.initialize(init.Normal(sigma=0.28993590), force_reinit = True, ctx=ctx)
trainer = gluon.Trainer(net.collect_params(), 'sgd', {'learning_rate': best_lr})
train_history = []

for epoch in range(epochs):
    cumulative_train_loss = mx.nd.zeros(1, ctx=ctx)
    training_samples = 0
    for x, y in train_dara_loader_fortest:
        with autograd.record():
            y_train_pred = net(x)
            loss = criterion(y_train_pred, y)
        loss.backward()
        trainer.step(512)
        cumulative_train_loss += loss.sum()
        training_samples += x.shape[0]
    train_loss = cumulative_train_loss.asscalar()/training_samples
    train_rmse = np.sqrt(train_loss)

    # test loop
    cumulative_test_loss = mx.nd.zeros(1, ctx)
    test_samples = 0
    for x, y in test_data_loader:
        y_test_pred = net(x)
        loss = criterion(y_test_pred, y)
        cumulative_test_loss += loss.sum()
        test_samples += x.shape[0]
    test_loss = cumulative_test_loss.asscalar()/test_samples
    test_rmse = np.sqrt(test_loss)
#         if epoch%20 == 0:
#             print('epoch:', epoch, 'loss:', valid_rmse)
    train_history.append(test_rmse)
    if len(train_history) > 50:
        if train_history[-51] < min(train_history[-50:]):
            print("Early Stopped", len(train_history))
            break
print('test rmse:', test_rmse)

best lr: 0.05
test rmse: 7.171724924209921


### 4. MLP_2_dm
- y: de-mean
- num of obs: 10,000
- model: MLP, two hidden layers + ReLu
- regularization: None

In [95]:
y_train_demean = y_train_10000 - y_train_mean
y_tune_demean = y_tune - y_train_mean
y_test_demean = y_test - y_train_mean
y_train_fortest = y_train - y_train_mean

dataset = ArrayDataset(x_train_10000, y_train_demean)
train_data_loader = mx.gluon.data.DataLoader(dataset, batch_size = 512)
dataset_tune = ArrayDataset(x_tune, y_tune_demean)
tune_data_loader = mx.gluon.data.DataLoader(dataset_tune, batch_size = 512)

## for test
dataset_fortest = ArrayDataset(x_train, y_train_fortest)
train_dara_loader_fortest = mx.gluon.data.DataLoader(dataset_fortest, batch_size = 512)
dataset_test = ArrayDataset(x_test, y_test_demean)
test_data_loader = mx.gluon.data.DataLoader(dataset_test, batch_size = 512)

In [96]:
# construct and initialize network.
ctx =  mx.gpu() if mx.context.num_gpus() else mx.cpu()

net = nn.Sequential()
net.add(nn.Dense(45, activation="relu"))
net.add(nn.Dense(45, activation="relu"))
net.add(nn.Dense(1, activation="relu"))

# define loss and trainer.
criterion = gluon.loss.L2Loss()

# epochs = 1000
# lr = [0.1, 0.05, 0.01, 0.005, 0.001, 0.0005, 0.0001]

In [97]:
result_lr = {}

for index in lr:
    net.initialize(init.Normal(sigma=0.28993590), force_reinit = True, ctx=ctx)
    trainer = gluon.Trainer(net.collect_params(), 'sgd', {'learning_rate': index})
    train_history = []
    
    for epoch in range(epochs):
        cumulative_train_loss = mx.nd.zeros(1, ctx=ctx)
        training_samples = 0
        for x, y in train_data_loader:
            with autograd.record():
                y_train_pred = net(x)
                loss = criterion(y_train_pred, y)
            loss.backward()
            trainer.step(512)
            cumulative_train_loss += loss.sum()
            training_samples += x.shape[0]
        train_loss = cumulative_train_loss.asscalar()/training_samples
        train_rmse = np.sqrt(train_loss)

        # validation loop
        cumulative_valid_loss = mx.nd.zeros(1, ctx)
        valid_samples = 0
        for x, y in tune_data_loader:
            y_tune_pred = net(x)
            loss = criterion(y_tune_pred, y)
            cumulative_valid_loss += loss.sum()
            valid_samples += x.shape[0]
        valid_loss = cumulative_valid_loss.asscalar()/valid_samples
        valid_rmse = np.sqrt(valid_loss)
#         if epoch%20 == 0:
#             print('epoch:', epoch, 'loss:', valid_rmse)
        train_history.append(valid_rmse)
        if len(train_history) > 50:
            if train_history[-51] < min(train_history[-50:]):
                print("Early Stopped", len(train_history))
                break
        
    result_lr[index] = valid_rmse
            
## use the best lr for testing
best_lr, _ = sorted(result_lr.items(), key = lambda item: item[1])[0]
print("best lr:", best_lr)

##########

net.initialize(init.Normal(sigma=0.28993590), force_reinit = True, ctx=ctx)
trainer = gluon.Trainer(net.collect_params(), 'sgd', {'learning_rate': best_lr})
train_history = []

for epoch in range(epochs):
    cumulative_train_loss = mx.nd.zeros(1, ctx=ctx)
    training_samples = 0
    for x, y in train_dara_loader_fortest:
        with autograd.record():
            y_train_pred = net(x)
            loss = criterion(y_train_pred, y)
        loss.backward()
        trainer.step(512)
        cumulative_train_loss += loss.sum()
        training_samples += x.shape[0]
    train_loss = cumulative_train_loss.asscalar()/training_samples
    train_rmse = np.sqrt(train_loss)

    # test loop
    cumulative_test_loss = mx.nd.zeros(1, ctx)
    test_samples = 0
    for x, y in test_data_loader:
        y_test_pred = net(x)
        loss = criterion(y_test_pred, y)
        cumulative_test_loss += loss.sum()
        test_samples += x.shape[0]
    test_loss = cumulative_test_loss.asscalar()/test_samples
    test_rmse = np.sqrt(test_loss)
#         if epoch%20 == 0:
#             print('epoch:', epoch, 'loss:', valid_rmse)
    train_history.append(test_rmse)
    if len(train_history) > 50:
        if train_history[-51] < min(train_history[-50:]):
            print("Early Stopped", len(train_history))
            break
print('test rmse:', test_rmse)

best lr: 0.05
test rmse: 7.673851781349119


### 5. MLP_2_dm_L2
- y: de-mean
- num of obs: 10,000
- model: MLP, two hidden layers + ReLu
- regularization: Weight Decay (L2)

In [98]:
y_train_demean = y_train_10000 - y_train_mean
y_tune_demean = y_tune - y_train_mean
y_test_demean = y_test - y_train_mean
y_train_fortest = y_train - y_train_mean

dataset = ArrayDataset(x_train_10000, y_train_demean)
train_data_loader = mx.gluon.data.DataLoader(dataset, batch_size = 512)
dataset_tune = ArrayDataset(x_tune, y_tune_demean)
tune_data_loader = mx.gluon.data.DataLoader(dataset_tune, batch_size = 512)

## for test
dataset_fortest = ArrayDataset(x_train, y_train_fortest)
train_dara_loader_fortest = mx.gluon.data.DataLoader(dataset_fortest, batch_size = 512)
dataset_test = ArrayDataset(x_test, y_test_demean)
test_data_loader = mx.gluon.data.DataLoader(dataset_test, batch_size = 512)

In [103]:
# construct and initialize network.
ctx =  mx.gpu() if mx.context.num_gpus() else mx.cpu()

net = nn.Sequential()
net.add(nn.Dense(45, activation="relu"))
net.add(nn.Dense(45, activation="relu"))
net.add(nn.Dense(1, activation="relu"))
# define loss and trainer.
criterion = gluon.loss.L2Loss()

weight_decay = [0.1, 0.001, 0.0001]
# epochs = 1000
# lr = [0.1, 0.05, 0.01, 0.005, 0.001, 0.0005, 0.0001]

In [104]:
result_lr = {}
result_wd = {}
for wd in weight_decay:
    for index in lr:
        net.initialize(init.Normal(sigma=0.28993590), force_reinit = True, ctx=ctx)
        trainer = gluon.Trainer(net.collect_params(), 'sgd', {'learning_rate': index, 'wd': wd})
        train_history = []

        for epoch in range(epochs):
            cumulative_train_loss = mx.nd.zeros(1, ctx=ctx)
            training_samples = 0
            for x, y in train_data_loader:
                with autograd.record():
                    y_train_pred = net(x)
                    loss = criterion(y_train_pred, y)
                loss.backward()
                trainer.step(512)
                cumulative_train_loss += loss.sum()
                training_samples += x.shape[0]
            train_loss = cumulative_train_loss.asscalar()/training_samples
            train_rmse = np.sqrt(train_loss)

            # validation loop
            cumulative_valid_loss = mx.nd.zeros(1, ctx)
            valid_samples = 0
            for x, y in tune_data_loader:
                y_tune_pred = net(x)
                loss = criterion(y_tune_pred, y)
                cumulative_valid_loss += loss.sum()
                valid_samples += x.shape[0]
            valid_loss = cumulative_valid_loss.asscalar()/valid_samples
            valid_rmse = np.sqrt(valid_loss)
    #         if epoch%20 == 0:
    #             print('epoch:', epoch, 'loss:', valid_rmse)
            train_history.append(valid_rmse)
            if len(train_history) > 50:
                if train_history[-51] < min(train_history[-50:]):
                    print("Early Stopped", len(train_history))
                    break

        result_lr[index] = valid_rmse
        result_wd[wd] = valid_rmse
            
## use the best lr for testing
best_lr, _ = sorted(result_lr.items(), key = lambda item: item[1])[0]
best_wd, _ = sorted(result_wd.items(), key = lambda item: item[1])[0]

print("best lr:", best_lr)
print("best wd:", best_wd)

##########

net.initialize(init.Normal(sigma=0.28993590), force_reinit = True, ctx=ctx)
trainer = gluon.Trainer(net.collect_params(), 'sgd', {'learning_rate': best_lr, 'wd': best_wd})
train_history = []

for epoch in range(epochs):
    cumulative_train_loss = mx.nd.zeros(1, ctx=ctx)
    training_samples = 0
    for x, y in train_dara_loader_fortest:
        with autograd.record():
            y_train_pred = net(x)
            loss = criterion(y_train_pred, y)
        loss.backward()
        trainer.step(512)
        cumulative_train_loss += loss.sum()
        training_samples += x.shape[0]
    train_loss = cumulative_train_loss.asscalar()/training_samples
    train_rmse = np.sqrt(train_loss)

    # test loop
    cumulative_test_loss = mx.nd.zeros(1, ctx)
    test_samples = 0
    for x, y in test_data_loader:
        y_test_pred = net(x)
        loss = criterion(y_test_pred, y)
        cumulative_test_loss += loss.sum()
        test_samples += x.shape[0]
    test_loss = cumulative_test_loss.asscalar()/test_samples
    test_rmse = np.sqrt(test_loss)
#         if epoch%20 == 0:
#             print('epoch:', epoch, 'loss:', valid_rmse)
    train_history.append(test_rmse)
    if len(train_history) > 50:
        if train_history[-51] < min(train_history[-50:]):
            print("Early Stopped", len(train_history))
            break
print('test rmse:', test_rmse)

best lr: 0.001
best wd: 0.0001
test rmse: 7.237926156936364


### 6. MLP_2_dm_dropout
- y: de-mean
- num of obs: 10,000
- model: MLP, two hidden layers + ReLu
- regularization: Dropout

In [105]:
y_train_demean = y_train_10000 - y_train_mean
y_tune_demean = y_tune - y_train_mean
y_test_demean = y_test - y_train_mean
y_train_fortest = y_train - y_train_mean

dataset = ArrayDataset(x_train_10000, y_train_demean)
train_data_loader = mx.gluon.data.DataLoader(dataset, batch_size = 512)
dataset_tune = ArrayDataset(x_tune, y_tune_demean)
tune_data_loader = mx.gluon.data.DataLoader(dataset_tune, batch_size = 512)

## for test
dataset_fortest = ArrayDataset(x_train, y_train_fortest)
train_dara_loader_fortest = mx.gluon.data.DataLoader(dataset_fortest, batch_size = 512)
dataset_test = ArrayDataset(x_test, y_test_demean)
test_data_loader = mx.gluon.data.DataLoader(dataset_test, batch_size = 512)

In [106]:
# construct and initialize network.
ctx =  mx.gpu() if mx.context.num_gpus() else mx.cpu()

net = nn.Sequential()
net.add(nn.Dense(45, activation="relu"))
net.add(gluon.nn.Dropout(.5))
net.add(nn.Dense(45, activation="relu"))
net.add(gluon.nn.Dropout(.5))
net.add(nn.Dense(1, activation="relu"))
# define loss and trainer.
criterion = gluon.loss.L2Loss()

# epochs = 1000
# lr = [0.1, 0.05, 0.01, 0.005, 0.001, 0.0005, 0.0001]

In [107]:
result_lr = {}

for index in lr:
    net.initialize(init.Normal(sigma=0.28993590), force_reinit = True, ctx=ctx)
    trainer = gluon.Trainer(net.collect_params(), 'sgd', {'learning_rate': index})
    train_history = []
    
    for epoch in range(epochs):
        cumulative_train_loss = mx.nd.zeros(1, ctx=ctx)
        training_samples = 0
        for x, y in train_data_loader:
            with autograd.record():
                y_train_pred = net(x)
                loss = criterion(y_train_pred, y)
            loss.backward()
            trainer.step(512)
            cumulative_train_loss += loss.sum()
            training_samples += x.shape[0]
        train_loss = cumulative_train_loss.asscalar()/training_samples
        train_rmse = np.sqrt(train_loss)

        # validation loop
        cumulative_valid_loss = mx.nd.zeros(1, ctx)
        valid_samples = 0
        for x, y in tune_data_loader:
            y_tune_pred = net(x)
            loss = criterion(y_tune_pred, y)
            cumulative_valid_loss += loss.sum()
            valid_samples += x.shape[0]
        valid_loss = cumulative_valid_loss.asscalar()/valid_samples
        valid_rmse = np.sqrt(valid_loss)
#         if epoch%20 == 0:
#             print('epoch:', epoch, 'loss:', valid_rmse)
        train_history.append(valid_rmse)
        if len(train_history) > 50:
            if train_history[-51] < min(train_history[-50:]):
                print("Early Stopped", len(train_history))
                break
        
    result_lr[index] = valid_rmse
            
## use the best lr for testing
best_lr, _ = sorted(result_lr.items(), key = lambda item: item[1])[0]
print("best lr:", best_lr)

##########

net.initialize(init.Normal(sigma=0.28993590), force_reinit = True, ctx=ctx)
trainer = gluon.Trainer(net.collect_params(), 'sgd', {'learning_rate': best_lr})
train_history = []

for epoch in range(epochs):
    cumulative_train_loss = mx.nd.zeros(1, ctx=ctx)
    training_samples = 0
    for x, y in train_dara_loader_fortest:
        with autograd.record():
            y_train_pred = net(x)
            loss = criterion(y_train_pred, y)
        loss.backward()
        trainer.step(512)
        cumulative_train_loss += loss.sum()
        training_samples += x.shape[0]
    train_loss = cumulative_train_loss.asscalar()/training_samples
    train_rmse = np.sqrt(train_loss)

    # test loop
    cumulative_test_loss = mx.nd.zeros(1, ctx)
    test_samples = 0
    for x, y in test_data_loader:
        y_test_pred = net(x)
        loss = criterion(y_test_pred, y)
        cumulative_test_loss += loss.sum()
        test_samples += x.shape[0]
    test_loss = cumulative_test_loss.asscalar()/test_samples
    test_rmse = np.sqrt(test_loss)
#         if epoch%20 == 0:
#             print('epoch:', epoch, 'loss:', valid_rmse)
    train_history.append(test_rmse)
    if len(train_history) > 50:
        if train_history[-51] < min(train_history[-50:]):
            print("Early Stopped", len(train_history))
            break
print('test rmse:', test_rmse)

best lr: 0.0001
test rmse: 7.673851781349119


### 7. MLP_2_ykeep
- y: keep
- num of obs: 10,000
- model: MLP, two hidden layers + ReLu
- regularization: None

In [108]:
dataset = ArrayDataset(x_train_10000, y_train_10000)
train_data_loader = mx.gluon.data.DataLoader(dataset, batch_size = 512)
dataset_tune = ArrayDataset(x_tune, y_tune)
tune_data_loader = mx.gluon.data.DataLoader(dataset_tune, batch_size = 512)

## for test
dataset_fortest = ArrayDataset(x_train, y_train)
train_dara_loader_fortest = mx.gluon.data.DataLoader(dataset_fortest, batch_size = 512)
dataset_test = ArrayDataset(x_test, y_test)
test_data_loader = mx.gluon.data.DataLoader(dataset_test, batch_size = 512)

In [109]:
# construct and initialize network.
ctx =  mx.gpu() if mx.context.num_gpus() else mx.cpu()

net = nn.Sequential()
net.add(nn.Dense(45, activation="relu"))
net.add(nn.Dense(45, activation="relu"))
net.add(nn.Dense(1, activation="relu"))
# define loss and trainer.
criterion = gluon.loss.L2Loss()

# epochs = 1000
lr = [0.00001, 0.000005, 0.000001]

In [110]:
result_lr = {}

for index in lr:
    net.initialize(init.Normal(sigma=0.28993590), force_reinit = True, ctx=ctx)
    trainer = gluon.Trainer(net.collect_params(), 'sgd', {'learning_rate': index})
    train_history = []
    
    for epoch in range(epochs):
        cumulative_train_loss = mx.nd.zeros(1, ctx=ctx)
        training_samples = 0
        for x, y in train_data_loader:
            with autograd.record():
                y_train_pred = net(x)
                loss = criterion(y_train_pred, y)
            loss.backward()
            trainer.step(512)
            cumulative_train_loss += loss.sum()
            training_samples += x.shape[0]
        train_loss = cumulative_train_loss.asscalar()/training_samples
        train_rmse = np.sqrt(train_loss)

        # validation loop
        cumulative_valid_loss = mx.nd.zeros(1, ctx)
        valid_samples = 0
        for x, y in tune_data_loader:
            y_tune_pred = net(x)
            loss = criterion(y_tune_pred, y)
            cumulative_valid_loss += loss.sum()
            valid_samples += x.shape[0]
        valid_loss = cumulative_valid_loss.asscalar()/valid_samples
        valid_rmse = np.sqrt(valid_loss)
#         if epoch%20 == 0:
#             print('epoch:', epoch, 'loss:', valid_rmse)
        train_history.append(valid_rmse)
        if len(train_history) > 50:
            if train_history[-51] < min(train_history[-50:]):
                print("Early Stopped", len(train_history))
                break
        
    result_lr[index] = valid_rmse
            
## use the best lr for testing
best_lr, _ = sorted(result_lr.items(), key = lambda item: item[1])[0]
print("best lr:", best_lr)

##########

net.initialize(init.Normal(sigma=0.28993590), force_reinit = True, ctx=ctx)
trainer = gluon.Trainer(net.collect_params(), 'sgd', {'learning_rate': best_lr})
train_history = []

for epoch in range(epochs):
    cumulative_train_loss = mx.nd.zeros(1, ctx=ctx)
    training_samples = 0
    for x, y in train_dara_loader_fortest:
        with autograd.record():
            y_train_pred = net(x)
            loss = criterion(y_train_pred, y)
        loss.backward()
        trainer.step(512)
        cumulative_train_loss += loss.sum()
        training_samples += x.shape[0]
    train_loss = cumulative_train_loss.asscalar()/training_samples
    train_rmse = np.sqrt(train_loss)

    # test loop
    cumulative_test_loss = mx.nd.zeros(1, ctx)
    test_samples = 0
    for x, y in test_data_loader:
        y_test_pred = net(x)
        loss = criterion(y_test_pred, y)
        cumulative_test_loss += loss.sum()
        test_samples += x.shape[0]
    test_loss = cumulative_test_loss.asscalar()/test_samples
    test_rmse = np.sqrt(test_loss)
#         if epoch%20 == 0:
#             print('epoch:', epoch, 'loss:', valid_rmse)
    train_history.append(test_rmse)
    if len(train_history) > 50:
        if train_history[-51] < min(train_history[-50:]):
            print("Early Stopped", len(train_history))
            break
print('test rmse:', test_rmse)

best lr: 1e-05
test rmse: 8.305737185328523


### 8. MLP_2_ykeep_L2
- y: keep
- num of obs: 10,000
- model: MLP, two hidden layers + ReLu
- regularization: Weight Decay (L2)

In [111]:
dataset = ArrayDataset(x_train_10000, y_train_10000)
train_data_loader = mx.gluon.data.DataLoader(dataset, batch_size = 512)
dataset_tune = ArrayDataset(x_tune, y_tune)
tune_data_loader = mx.gluon.data.DataLoader(dataset_tune, batch_size = 512)

## for test
dataset_fortest = ArrayDataset(x_train, y_train)
train_dara_loader_fortest = mx.gluon.data.DataLoader(dataset_fortest, batch_size = 512)
dataset_test = ArrayDataset(x_test, y_test)
test_data_loader = mx.gluon.data.DataLoader(dataset_test, batch_size = 512)

In [112]:
# construct and initialize network.
ctx =  mx.gpu() if mx.context.num_gpus() else mx.cpu()

net = nn.Sequential()
net.add(nn.Dense(45, activation="relu"))
net.add(nn.Dense(45, activation="relu"))
net.add(nn.Dense(1, activation="relu"))
# define loss and trainer.
criterion = gluon.loss.L2Loss()

# epochs = 1000
lr = [0.00001, 0.000005, 0.000001]

In [113]:
result_lr = {}
result_wd = {}

for wd in weight_decay:
    for index in lr:
        net.initialize(init.Normal(sigma=0.28993590), force_reinit = True, ctx=ctx)
        trainer = gluon.Trainer(net.collect_params(), 'sgd', {'learning_rate': index, 'wd': wd})
        train_history = []

        for epoch in range(epochs):
            cumulative_train_loss = mx.nd.zeros(1, ctx=ctx)
            training_samples = 0
            for x, y in train_data_loader:
                with autograd.record():
                    y_train_pred = net(x)
                    loss = criterion(y_train_pred, y)
                loss.backward()
                trainer.step(512)
                cumulative_train_loss += loss.sum()
                training_samples += x.shape[0]
            train_loss = cumulative_train_loss.asscalar()/training_samples
            train_rmse = np.sqrt(train_loss)

            # validation loop
            cumulative_valid_loss = mx.nd.zeros(1, ctx)
            valid_samples = 0
            for x, y in tune_data_loader:
                y_tune_pred = net(x)
                loss = criterion(y_tune_pred, y)
                cumulative_valid_loss += loss.sum()
                valid_samples += x.shape[0]
            valid_loss = cumulative_valid_loss.asscalar()/valid_samples
            valid_rmse = np.sqrt(valid_loss)
    #         if epoch%20 == 0:
    #             print('epoch:', epoch, 'loss:', valid_rmse)
            train_history.append(valid_rmse)
            if len(train_history) > 50:
                if train_history[-51] < min(train_history[-50:]):
                    print("Early Stopped", len(train_history))
                    break

        result_lr[index] = valid_rmse
        result_wd[wd] = valid_rmse
            
## use the best lr for testing
best_lr, _ = sorted(result_lr.items(), key = lambda item: item[1])[0]
best_wd, _ = sorted(result_wd.items(), key = lambda item: item[1])[0]

print("best lr:", best_lr)
print("best wd:", best_wd)

##########

net.initialize(init.Normal(sigma=0.28993590), force_reinit = True, ctx=ctx)
trainer = gluon.Trainer(net.collect_params(), 'sgd', {'learning_rate': best_lr, 'wd': best_wd})
train_history = []

for epoch in range(epochs):
    cumulative_train_loss = mx.nd.zeros(1, ctx=ctx)
    training_samples = 0
    for x, y in train_dara_loader_fortest:
        with autograd.record():
            y_train_pred = net(x)
            loss = criterion(y_train_pred, y)
        loss.backward()
        trainer.step(512)
        cumulative_train_loss += loss.sum()
        training_samples += x.shape[0]
    train_loss = cumulative_train_loss.asscalar()/training_samples
    train_rmse = np.sqrt(train_loss)

    # test loop
    cumulative_test_loss = mx.nd.zeros(1, ctx)
    test_samples = 0
    for x, y in test_data_loader:
        y_test_pred = net(x)
        loss = criterion(y_test_pred, y)
        cumulative_test_loss += loss.sum()
        test_samples += x.shape[0]
    test_loss = cumulative_test_loss.asscalar()/test_samples
    test_rmse = np.sqrt(test_loss)
#         if epoch%20 == 0:
#             print('epoch:', epoch, 'loss:', valid_rmse)
    train_history.append(test_rmse)
    if len(train_history) > 50:
        if train_history[-51] < min(train_history[-50:]):
            print("Early Stopped", len(train_history))
            break
print('test rmse:', test_rmse)

best lr: 1e-05
best wd: 0.001
test rmse: 8.146696957364185


### 9. MLP_2_ykeep_dropout
- y: keep
- num of obs: 10,000
- model: MLP, two hidden layers + ReLu
- regularization: Dropout

In [114]:
dataset = ArrayDataset(x_train_10000, y_train_10000)
train_data_loader = mx.gluon.data.DataLoader(dataset, batch_size = 512)
dataset_tune = ArrayDataset(x_tune, y_tune)
tune_data_loader = mx.gluon.data.DataLoader(dataset_tune, batch_size = 512)

## for test
dataset_fortest = ArrayDataset(x_train, y_train)
train_dara_loader_fortest = mx.gluon.data.DataLoader(dataset_fortest, batch_size = 512)
dataset_test = ArrayDataset(x_test, y_test)
test_data_loader = mx.gluon.data.DataLoader(dataset_test, batch_size = 512)

In [115]:
# construct and initialize network.
ctx =  mx.gpu() if mx.context.num_gpus() else mx.cpu()

net = nn.Sequential()
net.add(nn.Dense(45, activation="relu"))
net.add(gluon.nn.Dropout(.5))
net.add(nn.Dense(45, activation="relu"))
net.add(gluon.nn.Dropout(.5))
net.add(nn.Dense(1, activation="relu"))
# define loss and trainer.
criterion = gluon.loss.L2Loss()

# epochs = 1000
lr = [0.00001, 0.000005, 0.000001]

In [116]:
result_lr = {}

for index in lr:
    net.initialize(init.Normal(sigma=0.28993590), force_reinit = True, ctx=ctx)
    trainer = gluon.Trainer(net.collect_params(), 'sgd', {'learning_rate': index})
    train_history = []
    
    for epoch in range(epochs):
        cumulative_train_loss = mx.nd.zeros(1, ctx=ctx)
        training_samples = 0
        for x, y in train_data_loader:
            with autograd.record():
                y_train_pred = net(x)
                loss = criterion(y_train_pred, y)
            loss.backward()
            trainer.step(512)
            cumulative_train_loss += loss.sum()
            training_samples += x.shape[0]
        train_loss = cumulative_train_loss.asscalar()/training_samples
        train_rmse = np.sqrt(train_loss)

        # validation loop
        cumulative_valid_loss = mx.nd.zeros(1, ctx)
        valid_samples = 0
        for x, y in tune_data_loader:
            y_tune_pred = net(x)
            loss = criterion(y_tune_pred, y)
            cumulative_valid_loss += loss.sum()
            valid_samples += x.shape[0]
        valid_loss = cumulative_valid_loss.asscalar()/valid_samples
        valid_rmse = np.sqrt(valid_loss)
#         if epoch%20 == 0:
#             print('epoch:', epoch, 'loss:', valid_rmse)
        train_history.append(valid_rmse)
        if len(train_history) > 50:
            if train_history[-51] < min(train_history[-50:]):
                print("Early Stopped", len(train_history))
                break
        
    result_lr[index] = valid_rmse
            
## use the best lr for testing
best_lr, _ = sorted(result_lr.items(), key = lambda item: item[1])[0]
print("best lr:", best_lr)

##########

net.initialize(init.Normal(sigma=0.28993590), force_reinit = True, ctx=ctx)
trainer = gluon.Trainer(net.collect_params(), 'sgd', {'learning_rate': best_lr})
train_history = []

for epoch in range(epochs):
    cumulative_train_loss = mx.nd.zeros(1, ctx=ctx)
    training_samples = 0
    for x, y in train_dara_loader_fortest:
        with autograd.record():
            y_train_pred = net(x)
            loss = criterion(y_train_pred, y)
        loss.backward()
        trainer.step(512)
        cumulative_train_loss += loss.sum()
        training_samples += x.shape[0]
    train_loss = cumulative_train_loss.asscalar()/training_samples
    train_rmse = np.sqrt(train_loss)

    # test loop
    cumulative_test_loss = mx.nd.zeros(1, ctx)
    test_samples = 0
    for x, y in test_data_loader:
        y_test_pred = net(x)
        loss = criterion(y_test_pred, y)
        cumulative_test_loss += loss.sum()
        test_samples += x.shape[0]
    test_loss = cumulative_test_loss.asscalar()/test_samples
    test_rmse = np.sqrt(test_loss)
#         if epoch%20 == 0:
#             print('epoch:', epoch, 'loss:', valid_rmse)
    train_history.append(test_rmse)
    if len(train_history) > 50:
        if train_history[-51] < min(train_history[-50:]):
            print("Early Stopped", len(train_history))
            break
print('test rmse:', test_rmse)

best lr: 1e-05
test rmse: 38.74574320742499


### 10. MLP_2_dm_dropout_full
- y: de-mean
- num of obs: All
- model: MLP, two hidden layers + ReLu
- regularization: Dropout

In [117]:
y_train_demean = y_subtrain - y_train_mean
y_tune_demean = y_tune - y_train_mean
y_test_demean = y_test - y_train_mean
y_train_fortest = y_train - y_train_mean

dataset = ArrayDataset(x_subtrain, y_train_demean)
train_data_loader = mx.gluon.data.DataLoader(dataset, batch_size = 512)
dataset_tune = ArrayDataset(x_tune, y_tune_demean)
tune_data_loader = mx.gluon.data.DataLoader(dataset_tune, batch_size = 512)

## for test
dataset_fortest = ArrayDataset(x_train, y_train_fortest)
train_dara_loader_fortest = mx.gluon.data.DataLoader(dataset_fortest, batch_size = 512)
dataset_test = ArrayDataset(x_test, y_test_demean)
test_data_loader = mx.gluon.data.DataLoader(dataset_test, batch_size = 512)

In [118]:
# construct and initialize network.
ctx =  mx.gpu() if mx.context.num_gpus() else mx.cpu()

net = nn.Sequential()
net.add(nn.Dense(45, activation="relu"))
net.add(gluon.nn.Dropout(.5))
net.add(nn.Dense(45, activation="relu"))
net.add(gluon.nn.Dropout(.5))
net.add(nn.Dense(1, activation="relu"))
# define loss and trainer.
criterion = gluon.loss.L2Loss()

epochs = 10
lr = [0.00001, 0.000005, 0.000001]

In [None]:
result_lr = {}

for index in lr:
    net.initialize(init.Normal(sigma=0.28993590), force_reinit = True, ctx=ctx)
    trainer = gluon.Trainer(net.collect_params(), 'sgd', {'learning_rate': index})
    train_history = []
    
    for epoch in range(epochs):
        cumulative_train_loss = mx.nd.zeros(1, ctx=ctx)
        training_samples = 0
        for x, y in train_data_loader:
            with autograd.record():
                y_train_pred = net(x)
                loss = criterion(y_train_pred, y)
            loss.backward()
            trainer.step(512)
            cumulative_train_loss += loss.sum()
            training_samples += x.shape[0]
        train_loss = cumulative_train_loss.asscalar()/training_samples
        train_rmse = np.sqrt(train_loss)

        # validation loop
        cumulative_valid_loss = mx.nd.zeros(1, ctx)
        valid_samples = 0
        for x, y in tune_data_loader:
            y_tune_pred = net(x)
            loss = criterion(y_tune_pred, y)
            cumulative_valid_loss += loss.sum()
            valid_samples += x.shape[0]
        valid_loss = cumulative_valid_loss.asscalar()/valid_samples
        valid_rmse = np.sqrt(valid_loss)
#         if epoch%20 == 0:
#             print('epoch:', epoch, 'loss:', valid_rmse)
        train_history.append(valid_rmse)
        if len(train_history) > 50:
            if train_history[-51] < min(train_history[-50:]):
                print("Early Stopped", len(train_history))
                break
        
    result_lr[index] = valid_rmse
            
## use the best lr for testing
best_lr, _ = sorted(result_lr.items(), key = lambda item: item[1])[0]
print("best lr:", best_lr)

##########

net.initialize(init.Normal(sigma=0.28993590), force_reinit = True, ctx=ctx)
trainer = gluon.Trainer(net.collect_params(), 'sgd', {'learning_rate': best_lr})
train_history = []

for epoch in range(epochs):
    cumulative_train_loss = mx.nd.zeros(1, ctx=ctx)
    training_samples = 0
    for x, y in train_dara_loader_fortest:
        with autograd.record():
            y_train_pred = net(x)
            loss = criterion(y_train_pred, y)
        loss.backward()
        trainer.step(512)
        cumulative_train_loss += loss.sum()
        training_samples += x.shape[0]
    train_loss = cumulative_train_loss.asscalar()/training_samples
    train_rmse = np.sqrt(train_loss)

    # test loop
    cumulative_test_loss = mx.nd.zeros(1, ctx)
    test_samples = 0
    for x, y in test_data_loader:
        y_test_pred = net(x)
        loss = criterion(y_test_pred, y)
        cumulative_test_loss += loss.sum()
        test_samples += x.shape[0]
    test_loss = cumulative_test_loss.asscalar()/test_samples
    test_rmse = np.sqrt(test_loss)
#         if epoch%20 == 0:
#             print('epoch:', epoch, 'loss:', valid_rmse)
    train_history.append(test_rmse)
    if len(train_history) > 50:
        if train_history[-51] < min(train_history[-50:]):
            print("Early Stopped", len(train_history))
            break
print('test rmse:', test_rmse)

## Q2

| Method  | x_continue  | x_binary  | y  | RMSE  |
|:---:|:---:|:---:|:---:|:---:|
| ymean  | not used  | not used  | keep  | 928.8095109332162  |
| OLS  | keep  | keep  | keep  | 692.289127136201  |
| OLS  | keep  | keep  | de-mean  | 692.289127136183  |
| OLS  | std.  | std.  | de-mean  | 692.2891271371996  |
| Ridge  | keep  | keep  | keep  | 660.5612900111672  |
| Ridge  | std.  | keep  | de-mean  | 645.516812393874  |
| Ridge  | keep  | keep  | de-mean  | 660.5612900111672  |
| Ridge  | std.  | std.  | de-mean  | 588.793465752767  |
| Lasso  | std.  | keep  | de-mean  | 643.3855039398903  |