From 9d892697bd483773f2b75a70045cf66fba4cb91c Mon Sep 17 00:00:00 2001 From: Anand Avati Date: Thu, 2 Apr 2015 20:53:52 -0700 Subject: [PATCH] MAHOUT-1638: implement String equivalent of Vec.makeCons() Necessary for creating an empty Vec to hold String labels in the future. Replace calls to Vec.makeCons with H2OHelper.makeEmptyStrVec() where the created Vec is used to store label strings. Also, fix an off-by-one bug in MapBlockHelper Signed-off-by: Anand Avati --- .../apache/mahout/h2obindings/H2OHdfs.java | 2 +- .../apache/mahout/h2obindings/H2OHelper.java | 37 ++++++++++++++++++- .../h2obindings/ops/MapBlockHelper.scala | 2 +- 3 files changed, 38 insertions(+), 3 deletions(-) diff --git a/h2o/src/main/java/org/apache/mahout/h2obindings/H2OHdfs.java b/h2o/src/main/java/org/apache/mahout/h2obindings/H2OHdfs.java index f21ebe0ef1..56b3745e42 100644 --- a/h2o/src/main/java/org/apache/mahout/h2obindings/H2OHdfs.java +++ b/h2o/src/main/java/org/apache/mahout/h2obindings/H2OHdfs.java @@ -167,7 +167,7 @@ public static H2ODrm drmFromSeqfile(String filename, int parMin) { } if (reader.getKeyClass() == Text.class) { - labels = frame.anyVec().makeZero(); + labels = H2OHelper.makeEmptyStrVec(frame.anyVec()); labelwriter = labels.open(); } diff --git a/h2o/src/main/java/org/apache/mahout/h2obindings/H2OHelper.java b/h2o/src/main/java/org/apache/mahout/h2obindings/H2OHelper.java index 2ede8cfea3..859e5b422b 100644 --- a/h2o/src/main/java/org/apache/mahout/h2obindings/H2OHelper.java +++ b/h2o/src/main/java/org/apache/mahout/h2obindings/H2OHelper.java @@ -36,6 +36,11 @@ import org.apache.mahout.h2obindings.drm.H2ODrm; +// for makeEmptyStrVec +import water.Key; +import water.DKV; +import water.fvec.CStrChunk; + /** * Collection of helper methods for H2O backend. */ @@ -323,7 +328,7 @@ public static H2ODrm drmFromMatrix(Matrix m, int minHint, int exactHint) { Map map = m.getRowLabelBindings(); if (map != null) { // label vector must be similarly partitioned like the Frame - labels = frame.anyVec().makeZero(); + labels = makeEmptyStrVec(frame.anyVec()); Vec.Writer writer = labels.open(); Map rmap = reverseMap(map); for (int r = 0; r < m.rowSize(); r++) { @@ -389,6 +394,36 @@ public static Frame emptyFrame(long nrow, int ncol, int minHint, int exactHint, return new Frame(vecs); } + + /** + * The following two methods: vecChunkLen and makeEmptyStrVec + * are h2o-0.1.25 specific. + */ + public static Vec makeEmptyStrVec(final Vec template) { + final int nChunks = template.nChunks(); + Key key = template.group().addVec(); + final Vec emptystr = new Vec(key, template._espc, null, Vec.T_NUM); + + new MRTask() { + @Override protected void setupLocal() { + for (int i = 0; i < nChunks; i++) { + Key k = emptystr.chunkKey(i); + int chklen = vecChunkLen(template, i); + int stridx[] = new int[chklen]; + byte b[] = new byte[1]; b[0] = 0; + for (int j = 0; j < chklen; j++) stridx[j] = -1; + if (k.home()) DKV.put(k, new CStrChunk(1, b, chklen, stridx), _fs); + } + if (emptystr._key.home()) DKV.put(emptystr._key, emptystr, _fs); + } + }.doAllNodes(); + return emptystr; + } + + public static int vecChunkLen(Vec template, int chunk) { + return (int) (template._espc[chunk + 1] - template._espc[chunk]); + } + /** * Create an empty (zero-filled) H2O DRM. * diff --git a/h2o/src/main/scala/org/apache/mahout/h2obindings/ops/MapBlockHelper.scala b/h2o/src/main/scala/org/apache/mahout/h2obindings/ops/MapBlockHelper.scala index 038482607f..f69a8441f0 100644 --- a/h2o/src/main/scala/org/apache/mahout/h2obindings/ops/MapBlockHelper.scala +++ b/h2o/src/main/scala/org/apache/mahout/h2obindings/ops/MapBlockHelper.scala @@ -37,7 +37,7 @@ object MapBlockHelper { case `s` => { val arr = new Array[String](in.rowSize) val vstr = new ValueString - for (i <- 0 to in.rowSize) { + for (i <- 0 to (in.rowSize - 1)) { arr(i) = labels.atStr(vstr, i + startlong).toString } arr