Permalink
Switch branches/tags
last_OK jenkins-tomk-hadoop-1 jenkins-tomas_jenkins-7 jenkins-tomas_jenkins-6 jenkins-tomas_jenkins-5 jenkins-tomas_jenkins-4 jenkins-tomas_jenkins-3 jenkins-tomas_jenkins-2 jenkins-tomas_jenkins-1 jenkins-sample-docs-3 jenkins-sample-docs-2 jenkins-sample-docs-1 jenkins-rel-wright-10 jenkins-rel-wright-9 jenkins-rel-wright-8 jenkins-rel-wright-7 jenkins-rel-wright-6 jenkins-rel-wright-5 jenkins-rel-wright-4 jenkins-rel-wright-3 jenkins-rel-wright-2 jenkins-rel-wright-1 jenkins-rel-wolpert-11 jenkins-rel-wolpert-10 jenkins-rel-wolpert-9 jenkins-rel-wolpert-8 jenkins-rel-wolpert-7 jenkins-rel-wolpert-6 jenkins-rel-wolpert-5 jenkins-rel-wolpert-4 jenkins-rel-wolpert-3 jenkins-rel-wolpert-2 jenkins-rel-wolpert-1 jenkins-rel-wheeler-12 jenkins-rel-wheeler-11 jenkins-rel-wheeler-10 jenkins-rel-wheeler-9 jenkins-rel-wheeler-8 jenkins-rel-wheeler-7 jenkins-rel-wheeler-6 jenkins-rel-wheeler-5 jenkins-rel-wheeler-4 jenkins-rel-wheeler-3 jenkins-rel-wheeler-2 jenkins-rel-wheeler-1 jenkins-rel-weierstrass-7 jenkins-rel-weierstrass-6 jenkins-rel-weierstrass-5 jenkins-rel-weierstrass-4 jenkins-rel-weierstrass-3 jenkins-rel-weierstrass-2 jenkins-rel-weierstrass-1 jenkins-rel-vapnik-1 jenkins-rel-vajda-4 jenkins-rel-vajda-3 jenkins-rel-vajda-2 jenkins-rel-vajda-1 jenkins-rel-ueno-12 jenkins-rel-ueno-11 jenkins-rel-ueno-10 jenkins-rel-ueno-9 jenkins-rel-ueno-8 jenkins-rel-ueno-7 jenkins-rel-ueno-6 jenkins-rel-ueno-5 jenkins-rel-ueno-4 jenkins-rel-ueno-3 jenkins-rel-ueno-2 jenkins-rel-ueno-1 jenkins-rel-tverberg-6 jenkins-rel-tverberg-5 jenkins-rel-tverberg-4 jenkins-rel-tverberg-3 jenkins-rel-tverberg-2 jenkins-rel-tverberg-1 jenkins-rel-tutte-2 jenkins-rel-tutte-1 jenkins-rel-turnbull-2 jenkins-rel-turnbull-1 jenkins-rel-turing-10 jenkins-rel-turing-9 jenkins-rel-turing-8 jenkins-rel-turing-7 jenkins-rel-turing-6 jenkins-rel-turing-5 jenkins-rel-turing-4 jenkins-rel-turing-3 jenkins-rel-turing-2 jenkins-rel-turing-1 jenkins-rel-turin-4 jenkins-rel-turin-3 jenkins-rel-turin-2 jenkins-rel-turin-1 jenkins-rel-turchin-11 jenkins-rel-turchin-10 jenkins-rel-turchin-9 jenkins-rel-turchin-8 jenkins-rel-turchin-7 jenkins-rel-turchin-6 jenkins-rel-turchin-5
Nothing to show
Find file Copy path
Fetching contributors…
Cannot retrieve contributors at this time
90 lines (69 sloc) 4.12 KB
setwd(normalizePath(dirname(R.utils::commandArgs(asValues=TRUE)$"f")))
source("../../../scripts/h2o-r-test-setup.R")
check.deeplearning_autoencoder <- function() {
Log.info("Deep Learning Autoencoder MNIST)")
train_hex = h2o.uploadFile(locate("bigdata/laptop/mnist/train.csv.gz"))
test_hex = h2o.uploadFile(locate("bigdata/laptop/mnist/test.csv.gz" ))
predictors = c(1:784)
resp = 785
nfeatures = 20 #number of features (smallest hidden layer)
train_hex[,resp] <- as.factor(train_hex[,resp])
test_hex[,resp] <- as.factor(test_hex[,resp])
# split data into two parts (first part for unsupervised training, second part for supervised training)
sid <- h2o.runif(train_hex, seed=0)
# split <- h2o.splitFrame(train_hex, 0.5)
# first part of the data, without labels for unsupervised learning (DL auto-encoder)
train_unsupervised <- train_hex[sid>=0.5,]
#summary(train_unsupervised)
# second part of the data, with labels for supervised learning (drf)
train_supervised <- train_hex[sid<0.5,]
# train autoencoder on train_unsupervised
ae_model <- h2o.deeplearning(x=predictors,
model_id="ae_model",
training_frame=train_unsupervised[-resp],
activation="Tanh",
ignore_const_cols=F,
autoencoder=T,
hidden=c(nfeatures),
epochs=1,
reproducible=T,#slow - turn off for real problems
seed=1234)
# convert train_supervised with autoencoder model to lower-dimensional space
train_supervised_features <- h2o.deepfeatures(ae_model, train_supervised[-resp], layer=1)
expect_equal(ncol(train_supervised_features), nfeatures)
myX = c(1:nfeatures)
myY = nfeatures+1
# Now train DRF on extracted feature space, first need to add response back
train_supervised_features <- h2o.cbind(train_supervised_features, train_supervised[resp])
drf_model <- h2o.randomForest(training_frame=train_supervised_features, x=myX, y=myY, ntrees=10, seed=1234, min_rows=10)
# Now test the DRF model on the test set (first need to process into the reduced feature space)
test_features <- h2o.deepfeatures(ae_model, test_hex[,-resp], layer=1)
test_features <- h2o.cbind(test_features,test_hex[,resp])
cm <- h2o.confusionMatrix(drf_model, test_features)
print(cm)
# compare to pyunit_autoencoderDeepLearning_large.py
expect_equal(cm$Error[11], 0.088, tolerance = 0.01, scale = 1) # absolute difference: scale = 1
## Another usecase: Use pretrained unsupervised autoencoder model to initialize a supervised Deep Learning model
pretrained_model <- h2o.deeplearning(x=predictors,
y=resp,
training_frame=train_supervised,
validation_frame=test_hex,
ignore_const_cols=F,
hidden=(nfeatures),
epochs=1,
reproducible=T,
seed=1234,
pretrained_autoencoder="ae_model")
print(h2o.logloss(pretrained_model,valid=T))
model_from_scratch <- h2o.deeplearning(x=predictors,
y=resp,
training_frame=train_supervised,
validation_frame=test_hex,
ignore_const_cols=F,
hidden=(nfeatures),
epochs=1,
reproducible=T,
seed=1234)
print(h2o.logloss(model_from_scratch,valid=T))
}
doTest("Deep Learning AutoEncoder MNIST", check.deeplearning_autoencoder)