Skip to content

Commit

Permalink
weighted quantile runit nopass
Browse files Browse the repository at this point in the history
  • Loading branch information
Nidhi Mehta authored and Nidhi Mehta committed Nov 24, 2015
1 parent 578eb95 commit a06b9af
Show file tree
Hide file tree
Showing 2 changed files with 91 additions and 0 deletions.
37 changes: 37 additions & 0 deletions h2o-r/tests/testdir_misc/runit_NOPASS_quantile.R
@@ -0,0 +1,37 @@
#This tests quantile and weighted quantile on synthetic data by comparing with R

test.quantile <- function(conn){
N = 1000

x = rgamma(N, shape=0.067, scale = 0.008)
aa = as.h2o(x)
r_q = quantile(x, probs = c(0.1, 0.5, 1, 2, 5, 10, 50,88.83,99,90)/100,na.rm=T)
h_q = h2o.quantile(aa,probs = c(0.1, 0.5, 1, 2, 5, 10, 50,88.83,99,90 )/100,na.rm=T)
expect_equal(r_q,h_q )

x = rlnorm(N,meanlog = 12,sdlog = 132)
aa = as.h2o(x)
r_q = quantile(x, probs = seq(0,1,.05),na.rm=T)
h_q = h2o.quantile(aa,probs = seq(0,1,.05),na.rm=T)
expect_equal(r_q,h_q )

x = rexp(N, rate = 12.3)
ss = sample(1:N,size = N/10,replace = F)
x[ss]=NA
aa = as.h2o(x)
r_q = quantile(x, probs = seq(0,1,.05),na.rm=T)
h_q = h2o.quantile(aa,probs = seq(0,1,.05),na.rm=T)
expect_equal(r_q,h_q )

#weighted quantiles
#library(Hmisc)
x = runif(N)
aa = as.h2o(x)
wts = sample(1:6, N, TRUE)
h_wts = as.h2o(wts)
#r_q = wtd.quantile(x, wts, probs = seq(0,1,.05))
#h_q = h2o.quantile(aa,probs = seq(0,1,.05),weight_column = h_wts)
#expect_equal(r_q,h_q )

}
doTest("Test quantile",test.quantile )
54 changes: 54 additions & 0 deletions h2o-r/tests/testdir_misc/runit_NOPASS_weighted_quantile.R
@@ -0,0 +1,54 @@
# This tests weighted quantile
# by comparing results with R's wtd.quntile function and sanity checking by ignoring rows with zero weight
# dataset - http://mlr.cs.umass.edu/ml/datasets/Bank+Marketing

test.wtd.quantile <- function(conn){

a= h2o.importFile(locate("smalldata/gbm_test/bank-full.csv.zip"),destination_frame = "bank_UCI")
dim(a)
myX = 1:16
myY = 17

rowss =45211
#Sample rows for 2-fold xval
ss = sample(1:rowss,size = 22000)
ww = rep(1,rowss)
ww[ss]=2

#Parse fold column to h2O
wei = as.h2o(ww,destination_frame = "weight")
colnames(wei)

#Cbind fold column to the original dataset
a = h2o.assign(h2o.cbind(a,wei),key = "bank")
dim(a)

#Build gbm by specifying the fold column
gg = h2o.gbm(x = myX,y = myY,training_frame = a,ntrees = 5,fold_column = "x",keep_cross_validation_predictions = T,model_id = "cv_gbm")

#Define and use weights column
ww[ss]=0
wi = as.h2o(ww,destination_frame = "weight_col")

#Predict
pr = h2o.predict(gg,a)
pred = as.data.frame(pr[,3])

# weighted h2o quantile
#hq = as.numeric(h2o.quantile(pr[,3],probs = seq(0,.95,.05),weight_column = wi))

# weighted R quantile
#library(Hmisc)
#wq = as.numeric(wtd.quantile(pred[,1],ww,probs = seq(0,.95,.05)))

#expect_equal(wq,hq,tolerance = 1e-5)

#Sanity check with just nonzero weighted rows
#pp=pred[which(ww==1),]
#qq = as.numeric(quantile(pp,probs = seq(0,.95,.05)))

#expect_equal(wq,qq,tolerance = 3e-4)
#expect_equal(hq,qq,tolerance = 3e-4)
}
doTest("Test weighted quantile",test.wtd.quntile )

0 comments on commit a06b9af

Please sign in to comment.