Skip to content

Commit

Permalink
Merge branch 'master' of https://github.com/0xdata/h2o
Browse files Browse the repository at this point in the history
  • Loading branch information
arnocandel committed Oct 22, 2014
2 parents 3a51e24 + 052262b commit 137e4b7
Show file tree
Hide file tree
Showing 6 changed files with 12 additions and 11 deletions.
File renamed without changes.
2 changes: 1 addition & 1 deletion py/testdir_0xdata_only/test_parse_cust.py
Original file line number Diff line number Diff line change
Expand Up @@ -51,7 +51,7 @@ def test_parse_cust(self):
goodKeyList = [key for key in importKeyList if ('.csv' in key or '.tsv' in key)]
trial = 0
# just do 1?
for i, importKey in enumerate(random.sample(goodKeyList,3)]):
for i, importKey in enumerate(random.sample(goodKeyList,3)):
print "importKey:", importKey
trial +=1

Expand Down
1 change: 1 addition & 0 deletions py/testdir_kevin/n0
Original file line number Diff line number Diff line change
@@ -1,6 +1,7 @@
./n0.doit test_GLM2_syn_eqns_data.py $*
./n0.doit test_libsvm.py $*
./n0.doit test_parse_rand_utf8_3.py $*
./n0.doit test_parse_rand_utf8_just_double_quote.py $*
./n0.doit test_parse_rand_utf8_unmatched_double.py $*
./n0.doit test_parse_specific_case1.py $*
./n0.doit test_parse_specific_case2.py $*
Expand Down
4 changes: 0 additions & 4 deletions py/testdir_multi_jvm/n0
Original file line number Diff line number Diff line change
Expand Up @@ -8,7 +8,6 @@
# ./n0.doit test_cloud_5_loop.py $*
# ./n0.doit test_cloud.py $*
# ./n0.doit test_cloud_udp_drop.py $*
./n0.doit test_create_frame_rand1.py $*
./n0.doit test_csv_download_libsvm.py $*
./n0.doit test_ddply_plot2.py $*
./n0.doit test_dead_node_status.py $*
Expand Down Expand Up @@ -49,7 +48,6 @@
./n0.doit test_GLM2_prob_cols_4.py $*
./n0.doit test_GLM2.py $*
./n0.doit test_impute_with_na.py $*
./n0.doit test_impute_with_na_rand.py $*
./n0.doit test_inspect.py $*
./n0.doit test_iostatus.py $*
./n0.doit test_jstack.py $*
Expand Down Expand Up @@ -88,7 +86,6 @@
./n0.doit test_parse_file_loop_fvec.py $*
./n0.doit test_parse_fs_schmoo_fvec.py $*
./n0.doit test_parse_full_rand.py $*
./n0.doit test_parse_libsvm_fvec.py $*
./n0.doit test_parse_many_cols_fvec.py $*
./n0.doit test_parse_manyfiles_fvec.py $*
./n0.doit test_parse_mnist_rebalance.py $*
Expand All @@ -99,7 +96,6 @@
./n0.doit test_parse_rand_schmoo_fvec.py $*
./n0.doit test_parse_rand_utf8_2.py $*
./n0.doit test_parse_rand_utf8_angle_start.py $*
./n0.doit test_parse_rand_utf8_just_double_quote.py $*
./n0.doit test_parse_rand_utf8_multibyte.py $*
./n0.doit test_parse_rand_utf8.py $*
./n0.doit test_parse_time2_fvec.py $*
Expand Down
3 changes: 1 addition & 2 deletions py/testdir_single_jvm/n0
Original file line number Diff line number Diff line change
Expand Up @@ -51,7 +51,6 @@
./n0.doit test_export_import.py $*
./n0.doit test_failswith512chunk.py $*
./n0.doit test_fp_many_cols_fvec.py $*
./n0.doit test_fp_parse_fail.py $*
./n0.doit test_frame_split_iris.py $*
./n0.doit test_frame_split.py $*
./n0.doit test_from_import_fvec.py $*
Expand Down Expand Up @@ -109,7 +108,6 @@
./n0.doit test_GLM2_poisson_1.py $*
./n0.doit test_GLM2_poisson_fail.py $*
./n0.doit test_GLM2_poisson_goalies.py $*
./n0.doit test_GLM2_poisson_normalize.py $*
./n0.doit test_GLM2_poisson_rand2.py $*
./n0.doit test_GLM2_poisson_timeout_fail.py $*
./n0.doit test_GLM2_princeton.py $*
Expand Down Expand Up @@ -163,6 +161,7 @@
./n0.doit test_parse_small_many_fvec.py $*
./n0.doit test_parse_specific_case2a.py $*
./n0.doit test_parse_specific_case4.py $*
./n0.doit test_parse_specific_case5.py $*
./n0.doit test_parse_sq_30rows_fvec.py $*
./n0.doit test_parse_summary_manyfiles_1_fvec.py $*
./n0.doit test_parse_syn_gz_cat_fvec.py $*
Expand Down
13 changes: 9 additions & 4 deletions src/main/java/water/api/Impute.java
Original file line number Diff line number Diff line change
Expand Up @@ -52,7 +52,7 @@ protected void init() throws IllegalArgumentException {
throw new IllegalArgumentException("method must be one of (mean, median, mode)"); // regression, randomForest)");
if ( !(column.isEnum()) && column.naCnt() <= 0)
throw new IllegalArgumentException("No NAs in the column, nothing to do.");
if (column.isEnum() && !Arrays.asList(column._domain).contains("NA"))
if (column.isEnum() && !Arrays.asList(column._domain).contains("NA") && column.naCnt() <= 0 )
throw new IllegalArgumentException("No NAs in the column, nothing to do.");
// if (method == Method.regression && (column.isEnum() || column.isUUID() || column.isTime()))
// throw new IllegalArgumentException("Trying to perform regression on non-numeric column! Please select a different column.");
Expand Down Expand Up @@ -89,12 +89,15 @@ protected void init() throws IllegalArgumentException {
long maxCounts = -1;
int mode = -1;
for (int i = 0; i < counts[0].length; ++i) {
if (counts[0][i] > maxCounts && !dom[i].equals("NA")) {
if (counts[0][i] > maxCounts && !dom[i].equals("NA")) { // check for "NA" in domain -- corner case from R
maxCounts = counts[0][i];
mode = i;
}
}
_replace_val = mode != -1 ? (double) mode : (double) Arrays.asList(dom).indexOf("NA");
_replace_val = mode != -1
? (double) mode
: (double) Arrays.asList(dom).indexOf("NA"); // could produce -1 if "NA" not in the domain -- that is we don't have the R corner case
if (_replace_val == -1) _replace_val = Double.NaN; // OK to replace, since we're in the elif "mode" block
}
final double rv = _replace_val;
new MRTask2() {
Expand All @@ -103,7 +106,9 @@ public void map(Chunk[] cs) {
Chunk c = cs[col_id];
int rows = c.len();
for (int r = 0; r < rows; ++r) {
if (c.isNA0(r) || (c._vec.isEnum() && c._vec.domain()[(int) c.at0(r)].equals("NA"))) c.set0(r, rv);
if (c.isNA0(r) || (c._vec.isEnum() && c._vec.domain()[(int) c.at0(r)].equals("NA"))) {
if (!Double.isNaN(rv)) c.set0(r, rv); // leave as NA if replace value is NA
}
}
}
}.doAll(source);
Expand Down

0 comments on commit 137e4b7

Please sign in to comment.