In [1]:
import torch.optim as optim
from models.sae import StackedAutoencoder
from lib.loader import ProcessedCsvDataset, get_loader
from lib.utils import print_stage as print_step

In [2]:
dst = ProcessedCsvDataset()
sae = StackedAutoencoder(dst.num_features, [100, 50, 20])  # a 3-layer stacked autoencoder

In [5]:
sae.fit(dst.train_feature, loader_fn=lambda x: get_loader(x, batch_size=1024),
        optimizer_fn=lambda x: optim.SGD(x, lr=1e-3, momentum=0.9),
        callback=print_step, epochs=50)#, scheduler_fn=lambda x: optim.lr_scheduler.ReduceLROnPlateau(x, verbose=True))
sae.save('data/sae.pth')

[00 - 00 - 00000] 1.00567
[00 - 00 - 01000] 1.04164
[00 - 01 - 00000] 1.28509
[00 - 01 - 01000] 1.78452
[00 - 02 - 00000] 0.91463
[00 - 02 - 01000] 1.17651
[00 - 03 - 00000] 0.80254
[00 - 03 - 01000] 2.63060
[00 - 04 - 00000] 1.00755
[00 - 04 - 01000] 0.73282
[00 - 05 - 00000] 0.75183
[00 - 05 - 01000] 0.98244
[00 - 06 - 00000] 0.71366
[00 - 06 - 01000] 3.25456
[00 - 07 - 00000] 0.65664
[00 - 07 - 01000] 0.80710
[00 - 08 - 00000] 0.70620
[00 - 08 - 01000] 0.65719
[00 - 09 - 00000] 0.64480
[00 - 09 - 01000] 0.63370
[00 - 10 - 00000] 0.65615
[00 - 10 - 01000] 0.64593
[00 - 11 - 00000] 0.87351
[00 - 11 - 01000] 3.46710
[00 - 12 - 00000] 0.63509
[00 - 12 - 01000] 0.60909
[00 - 13 - 00000] 0.63977
[00 - 13 - 01000] 0.63896
[00 - 14 - 00000] 0.64615
[00 - 14 - 01000] 0.61035
[00 - 15 - 00000] 0.63463
[00 - 15 - 01000] 0.61133
[00 - 16 - 00000] 0.62396
[00 - 16 - 01000] 0.74533
[00 - 17 - 00000] 0.58279
[00 - 17 - 01000] 0.59665
[00 - 18 - 00000] 0.59278
[00 - 18 - 01000] 0.69479
[00 - 19 - 0

Using encoded features

In [6]:
sae.load('data/sae.pth')

train_feature = sae.encode(dst.train_feature)
train_label = dst.train_label
test_feature = sae.encode(dst.test_feature)
test_label = dst.test_label

In [7]:
from sklearn.ensemble import RandomForestRegressor
from sklearn.tree import DecisionTreeRegressor
from sklearn.ensemble import AdaBoostRegressor
import xgboost as xgb

In [8]:
rf_raw = RandomForestRegressor(10, max_depth=7, n_jobs=-1, random_state=7)
rf_raw.fit(train_feature, train_label)
rf_pred = rf_raw.predict(test_feature)
print(((test_label - rf_pred) ** 2).mean())

0.8199905996626019


In [9]:
ada_raw = AdaBoostRegressor(DecisionTreeRegressor(max_depth=4), n_estimators=100, loss='square', random_state=7)
ada_raw.fit(train_feature, train_label)
ada_pred=ada_raw.predict(test_feature)
print(((test_label - ada_pred) ** 2).mean())

1.649093366398667


Using concatenated features

In [None]:
import numpy as np
train_feature = np.concatenate((dst.train_feature, train_feature), 1)
test_feature = np.concatenate((dst.test_feature, test_feature), 1)

In [None]:
rf_raw = RandomForestRegressor(10, max_depth=7, n_jobs=-1, random_state=7)
rf_raw.fit(train_feature, train_label)
rf_pred = rf_raw.predict(test_feature)
print(((test_label - rf_pred) ** 2).mean())

In [None]:
ada_raw = AdaBoostRegressor(DecisionTreeRegressor(max_depth=4), n_estimators=100, loss='square', random_state=7)
ada_raw.fit(train_feature, train_label)
ada_pred=ada_raw.predict(test_feature)
print(((test_label - ada_pred) ** 2).mean())