Skip to content

Commit

Permalink
Cleans up ASTStrList to handle frames with more than one vector durin…
Browse files Browse the repository at this point in the history
…g column conversion. Checks types before converting. Adds several new column type conversions.
  • Loading branch information
bghill committed Oct 13, 2015
1 parent 44994ee commit 836c32e
Show file tree
Hide file tree
Showing 3 changed files with 271 additions and 60 deletions.
19 changes: 19 additions & 0 deletions h2o-core/src/main/java/water/parser/BufferedString.java
Expand Up @@ -131,5 +131,24 @@ else if (o instanceof String) {
public final byte [] getBuffer() {return _buf;}
public final int getOffset() {return _off;}
public final int length() {return _len;}

public static final byte NA = 0;
public static final byte INT = 1;
public static final byte REAL= 2;
public final byte getNumericType() {
int i = 0;
int decimalCnt = 0;
if (_len == 0) return NA;
if (_buf[_off] == '+' || _buf[_off] == '-') i++;
while( i < _len) {
if (_buf[_off+i] == '.') decimalCnt++;
if (_buf[_off+i] < '0' || _buf[_off+i] > '9') return NA;
i++;
}
if (decimalCnt > 0)
if (decimalCnt == 1) return REAL;
else return NA; //more than one decimal, NaN
else return INT;
}
}

51 changes: 38 additions & 13 deletions h2o-core/src/main/java/water/rapids/ASTStrList.java
Expand Up @@ -82,11 +82,18 @@ class ASTAsCharacter extends ASTPrim {
public String str() { return "as.character"; }
@Override Val apply( Env env, Env.StackHelp stk, AST asts[] ) {
Frame ary = stk.track(asts[1].exec(env)).getFrame();
if( ary.numCols() != 1 ) throw new IllegalArgumentException("character requires a single column");
Vec v0 = ary.anyVec();
Vec v1 = v0.isString() ? null : VecUtils.toStringVec(v0); // toCategoricalVec() creates a new vec --> must be cleaned up!
Frame fr = new Frame(ary._names, new Vec[]{v1 == null ? v0.makeCopy(null) : v1});
return new ValFrame(fr);
Vec[] nvecs = new Vec[ary.numCols()];
Vec vv;
for(int c=0;c<nvecs.length;++c) {
vv = ary.vec(c);
try {
nvecs[c] = VecUtils.toStringVec(vv);
} catch (Exception e) {
VecUtils.deleteVecs(nvecs, c);
throw e;
}
}
return new ValFrame(new Frame(ary._names, nvecs));
}
}

Expand All @@ -98,11 +105,26 @@ class ASTAsFactor extends ASTPrim {
@Override
public String str() { return "as.factor"; }
@Override Val apply( Env env, Env.StackHelp stk, AST asts[] ) {
Frame fr = stk.track(asts[1].exec(env)).getFrame();
if( fr.numCols() != 1 ) throw new IllegalArgumentException("as.factor requires a single column");
Vec v0 = fr.anyVec();
if( !v0.isCategorical() ) v0 = VecUtils.toCategoricalVec(v0);
return new ValFrame(new Frame(fr._names, new Vec[]{v0}));
Frame ary = stk.track(asts[1].exec(env)).getFrame();
Vec[] nvecs = new Vec[ary.numCols()];

// Type check - prescreen for correct types
for (Vec v : ary.vecs())
if (!(v.isCategorical() || v.isString()|| v.isNumeric()))
throw new IllegalArgumentException("asfactor() requires a string, categorical, or numeric column. "
+"Received "+ary.anyVec().get_type_str()
+". Please convert column to a string or categorical first.");
Vec vv;
for(int c=0;c<nvecs.length;++c) {
vv = ary.vec(c);
try {
nvecs[c] = VecUtils.toCategoricalVec(vv);
} catch (Exception e) {
VecUtils.deleteVecs(nvecs, c);
throw e;
}
}
return new ValFrame(new Frame(ary._names, nvecs));
}
}

Expand All @@ -119,9 +141,12 @@ class ASTAsNumeric extends ASTPrim {
Vec vv;
for(int c=0;c<nvecs.length;++c) {
vv = fr.vec(c);
if( vv.isCategorical() ) nvecs[c] = VecUtils.toIntVec(vv);
else if( vv.isString() ) nvecs[c] = VecUtils.toNumericVec(vv);
else nvecs[c] = vv.makeCopy();
try {
nvecs[c] = VecUtils.toNumericVec(vv);
} catch (Exception e) {
VecUtils.deleteVecs(nvecs, c);
throw e;
}
}
return new ValFrame(new Frame(fr._names, nvecs));
}
Expand Down

0 comments on commit 836c32e

Please sign in to comment.