Skip to content

Commit

Permalink
[SW-2477] Replace IcedHashMapWrapper with New guessType Method On Par…
Browse files Browse the repository at this point in the history
…sePreviewWriter (#2380)

(cherry picked from commit d756029)
  • Loading branch information
mn-mikke committed Nov 9, 2020
1 parent d29e691 commit fb5563f
Show file tree
Hide file tree
Showing 4 changed files with 26 additions and 68 deletions.
Expand Up @@ -96,10 +96,9 @@ class ImportFrameHandler extends Handler {
private def convertCategoricalColumnsToOtherTypesIfNeeded(frame: Frame, categoricalColumnIndices: Array[Int]) = {
categoricalColumnIndices.foreach { idx =>
val vector = frame.vec(idx)
val previewWriter =
new CategoricalPreviewParseWriter(vector.domain(), vector.length().toInt, vector.naCnt().toInt)
val types = previewWriter.guessTypes()
types(0) match {
val correctType =
CategoricalPreviewParseWriter.guessType(vector.domain(), vector.length().toInt, vector.naCnt().toInt)
correctType match {
case Vec.T_CAT => // No action needed
case Vec.T_STR =>
Log.info(s"The categorical column '${frame.names()(idx)}' has been converted to string.")
Expand Down
Expand Up @@ -17,29 +17,30 @@

package water.parser;

import java.lang.reflect.Field;
import water.util.IcedHashMap;
public class CategoricalPreviewParseWriter {

public class CategoricalPreviewParseWriter extends PreviewParseWriter {
public static byte guessType(String[] domain, int nLines, int nEmpty) {
final int nStrings = nLines - nEmpty;
final int nNums = 0;
final int nDates = 0;
final int nUUID = 0;
final int nZeros = 0;

public CategoricalPreviewParseWriter(String[] domain, int totalCount, int naCount) {
super(1);
this._nlines = totalCount;
this._nempty[0] = naCount;
this._nstrings[0] = totalCount - naCount;
IcedHashMap<String, String>[] domains = new IcedHashMap[1];
domains[0] = new IcedHashMapWrapper(domain);
setPrivateDomains(domains);
}
PreviewParseWriter.IDomain domainWrapper =
new PreviewParseWriter.IDomain() {
public int size() {
return domain.length;
}

public boolean contains(String value) {
for (String domainValue : domain) {
if (value.equals(domainValue)) return true;
}
return false;
}
};

private void setPrivateDomains(IcedHashMap<String, String>[] domains) {
try {
Field domainsField = PreviewParseWriter.class.getDeclaredField("_domains");
domainsField.setAccessible(true);
domainsField.set(this, domains);
domainsField.setAccessible(false);
} catch (Exception e) {
throw new RuntimeException(e);
}
return PreviewParseWriter.guessType(
nLines, nNums, nStrings, nDates, nUUID, nZeros, nEmpty, domainWrapper);
}
}
41 changes: 0 additions & 41 deletions extensions/src/main/scala/water/parser/IcedHashMapWrapper.java

This file was deleted.

Expand Up @@ -46,8 +46,7 @@ class CategoricalPreviewParseWriterTestSuite extends FunSuite with Matchers {

val domain = testCase.filter(_ != null).distinct.toArray
val naCount = testCase.filter(_ == null).length
val categoricalWriter = new CategoricalPreviewParseWriter(domain, testCase.length, naCount)
val result = categoricalWriter.guessTypes()(0)
val result = CategoricalPreviewParseWriter.guessType(domain, testCase.length, naCount)

result shouldEqual expected
}
Expand Down

0 comments on commit fb5563f

Please sign in to comment.