Permalink
Switch branches/tags
last_OK jenkins-tomk-hadoop-1 jenkins-tomas_jenkins-7 jenkins-tomas_jenkins-6 jenkins-tomas_jenkins-5 jenkins-tomas_jenkins-4 jenkins-tomas_jenkins-3 jenkins-tomas_jenkins-2 jenkins-tomas_jenkins-1 jenkins-sample-docs-3 jenkins-sample-docs-2 jenkins-sample-docs-1 jenkins-rel-wright-10 jenkins-rel-wright-9 jenkins-rel-wright-8 jenkins-rel-wright-7 jenkins-rel-wright-6 jenkins-rel-wright-5 jenkins-rel-wright-4 jenkins-rel-wright-3 jenkins-rel-wright-2 jenkins-rel-wright-1 jenkins-rel-wolpert-11 jenkins-rel-wolpert-10 jenkins-rel-wolpert-9 jenkins-rel-wolpert-8 jenkins-rel-wolpert-7 jenkins-rel-wolpert-6 jenkins-rel-wolpert-5 jenkins-rel-wolpert-4 jenkins-rel-wolpert-3 jenkins-rel-wolpert-2 jenkins-rel-wolpert-1 jenkins-rel-wheeler-12 jenkins-rel-wheeler-11 jenkins-rel-wheeler-10 jenkins-rel-wheeler-9 jenkins-rel-wheeler-8 jenkins-rel-wheeler-7 jenkins-rel-wheeler-6 jenkins-rel-wheeler-5 jenkins-rel-wheeler-4 jenkins-rel-wheeler-3 jenkins-rel-wheeler-2 jenkins-rel-wheeler-1 jenkins-rel-weierstrass-7 jenkins-rel-weierstrass-6 jenkins-rel-weierstrass-5 jenkins-rel-weierstrass-4 jenkins-rel-weierstrass-3 jenkins-rel-weierstrass-2 jenkins-rel-weierstrass-1 jenkins-rel-vapnik-1 jenkins-rel-vajda-4 jenkins-rel-vajda-3 jenkins-rel-vajda-2 jenkins-rel-vajda-1 jenkins-rel-ueno-12 jenkins-rel-ueno-11 jenkins-rel-ueno-10 jenkins-rel-ueno-9 jenkins-rel-ueno-8 jenkins-rel-ueno-7 jenkins-rel-ueno-6 jenkins-rel-ueno-5 jenkins-rel-ueno-4 jenkins-rel-ueno-3 jenkins-rel-ueno-2 jenkins-rel-ueno-1 jenkins-rel-tverberg-6 jenkins-rel-tverberg-5 jenkins-rel-tverberg-4 jenkins-rel-tverberg-3 jenkins-rel-tverberg-2 jenkins-rel-tverberg-1 jenkins-rel-tutte-2 jenkins-rel-tutte-1 jenkins-rel-turnbull-2 jenkins-rel-turnbull-1 jenkins-rel-turing-10 jenkins-rel-turing-9 jenkins-rel-turing-8 jenkins-rel-turing-7 jenkins-rel-turing-6 jenkins-rel-turing-5 jenkins-rel-turing-4 jenkins-rel-turing-3 jenkins-rel-turing-2 jenkins-rel-turing-1 jenkins-rel-turin-4 jenkins-rel-turin-3 jenkins-rel-turin-2 jenkins-rel-turin-1 jenkins-rel-turchin-11 jenkins-rel-turchin-10 jenkins-rel-turchin-9 jenkins-rel-turchin-8 jenkins-rel-turchin-7 jenkins-rel-turchin-6 jenkins-rel-turchin-5
Nothing to show
Find file Copy path
99 lines (77 sloc) 3.68 KB
setwd(normalizePath(dirname(R.utils::commandArgs(asValues=TRUE)$"f")))
source("../../../scripts/h2o-r-test-setup.R")
check.deeplearning_anomaly <- function() {
Log.info("Deep Learning Anomaly Detection MNIST)")
TRAIN <- "bigdata/laptop/mnist/train.csv.gz"
TEST <- "bigdata/laptop/mnist/test.csv.gz"
# set to FALSE for stand-alone demo
if (TRUE) {
train_hex <- h2o.uploadFile(locate(TRAIN), destination_frame = "train")
test_hex <- h2o.uploadFile(locate(TEST))
print(train_hex)
} else {
library(h2o)
homedir <- paste0(path.expand("~"),"/h2o-3/") #modify if needed
train_hex <- h2o.importFile(path = paste0(homedir,TRAIN), header = F, sep = ',', destination_frame = 'train.hex')
test_hex <- h2o.importFile(path = paste0(homedir,TEST ), header = F, sep = ',', destination_frame = 'test.hex' )
}
predictors = c(1:784)
resp = 785
# unsupervised -> drop the response column (digit: 0-9)
train_hex <- h2o.assign(train_hex[,-resp], 'train_hex')
test_hex <- h2o.assign(test_hex[,-resp], 'test_hex')
# helper functions for display of handwritten digits
# adapted from http://www.r-bloggers.com/the-essence-of-a-handwritten-digit/
plotDigit <- function(mydata, rec_error) {
len <- nrow(mydata)
N <- ceiling(sqrt(len))
par(mfrow=c(N,N),pty='s',mar=c(1,1,1,1),xaxt='n',yaxt='n')
for (i in 1:nrow(mydata)) {
colors<-c('white','black')
cus_col<-colorRampPalette(colors=colors)
z<-array(mydata[i,],dim=c(28,28))
z<-z[,28:1]
image(1:28,1:28,z,main=paste0("rec_error: ", round(rec_error[i],4)),col=cus_col(256))
}
}
plotDigits <- function(data, rec_error, rows) {
row_idx <- sort(order(rec_error[,1],decreasing=F)[rows])
my_rec_error <- rec_error[row_idx,]
my_data <- as.matrix(as.data.frame(data[row_idx,]))
plotDigit(my_data, my_rec_error)
}
## ANOMALY DETECTION DEMO
# 1) LEARN WHAT'S NORMAL WITH UNSUPERVISED AUTOENCODER
ae_model <- h2o.deeplearning(x=predictors,
training_frame=train_hex,
activation="Tanh",
autoencoder=T,
hidden=c(50),
l1=1e-5,
ignore_const_cols=F,
epochs=1)
# 2) DETECT OUTLIERS
# h2o.anomaly computes the per-row reconstruction error for the test data set
# (passing it through the autoencoder model and computing mean square error (MSE) for each row)
test_rec_error <- as.data.frame(h2o.anomaly(ae_model, test_hex))
# 3) VISUALIZE OUTLIERS
# Let's look at the test set points with low/median/high reconstruction errors.
# We will now visualize the original test set points and their reconstructions obtained
# by propagating them through the narrow neural net.
# Convert the test data into its autoencoded representation (pass through narrow neural net)
test_recon <- predict(ae_model, test_hex)
# The good
# Let's plot the 25 digits with lowest reconstruction error.
# First we plot the reconstruction, then the original scanned images.
plotDigits(test_recon, test_rec_error, c(1:25))
plotDigits(test_hex, test_rec_error, c(1:25))
# The bad
# Now the same for the 25 digits with median reconstruction error.
plotDigits(test_recon, test_rec_error, c(4988:5012))
plotDigits(test_hex, test_rec_error, c(4988:5012))
# The ugly
# And here are the biggest outliers - The 25 digits with highest reconstruction error!
plotDigits(test_recon, test_rec_error, c(9976:10000))
plotDigits(test_hex, test_rec_error, c(9976:10000))
}
doTest("Deep Learning Anomaly Detection MNIST", check.deeplearning_anomaly)