Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

[SW-2477] Replace IcedHashMapWrapper with New guessType Method On PreviewParseWriter #2380

Merged
merged 1 commit into from Nov 9, 2020
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Jump to
Jump to file
Failed to load files.
Diff view
Diff view
Expand Up @@ -95,10 +95,9 @@ class ImportFrameHandler extends Handler {
private def convertCategoricalColumnsToOtherTypesIfNeeded(frame: Frame, categoricalColumnIndices: Array[Int]) = {
categoricalColumnIndices.foreach { idx =>
val vector = frame.vec(idx)
val previewWriter =
new CategoricalPreviewParseWriter(vector.domain(), vector.length().toInt, vector.naCnt().toInt)
val types = previewWriter.guessTypes()
types(0) match {
val correctType =
CategoricalPreviewParseWriter.guessType(vector.domain(), vector.length().toInt, vector.naCnt().toInt)
correctType match {
case Vec.T_CAT => // No action needed
case Vec.T_STR =>
Log.info(s"The categorical column '${frame.names()(idx)}' has been converted to string.")
Expand Down
Expand Up @@ -17,29 +17,30 @@

package water.parser;

import java.lang.reflect.Field;
import water.util.IcedHashMap;
public class CategoricalPreviewParseWriter {

public class CategoricalPreviewParseWriter extends PreviewParseWriter {
public static byte guessType(String[] domain, int nLines, int nEmpty) {
final int nStrings = nLines - nEmpty;
final int nNums = 0;
final int nDates = 0;
final int nUUID = 0;
final int nZeros = 0;

public CategoricalPreviewParseWriter(String[] domain, int totalCount, int naCount) {
super(1);
this._nlines = totalCount;
this._nempty[0] = naCount;
this._nstrings[0] = totalCount - naCount;
IcedHashMap<String, String>[] domains = new IcedHashMap[1];
domains[0] = new IcedHashMapWrapper(domain);
setPrivateDomains(domains);
}
PreviewParseWriter.IDomain domainWrapper =
new PreviewParseWriter.IDomain() {
public int size() {
return domain.length;
}

public boolean contains(String value) {
for (String domainValue : domain) {
if (value.equals(domainValue)) return true;
honzasterba marked this conversation as resolved.
Show resolved Hide resolved
}
return false;
}
};

private void setPrivateDomains(IcedHashMap<String, String>[] domains) {
try {
Field domainsField = PreviewParseWriter.class.getDeclaredField("_domains");
domainsField.setAccessible(true);
domainsField.set(this, domains);
domainsField.setAccessible(false);
} catch (Exception e) {
throw new RuntimeException(e);
}
return PreviewParseWriter.guessType(
nLines, nNums, nStrings, nDates, nUUID, nZeros, nEmpty, domainWrapper);
}
}
41 changes: 0 additions & 41 deletions extensions/src/main/scala/water/parser/IcedHashMapWrapper.java

This file was deleted.

Expand Up @@ -46,8 +46,7 @@ class CategoricalPreviewParseWriterTestSuite extends FunSuite with Matchers {

val domain = testCase.filter(_ != null).distinct.toArray
val naCount = testCase.filter(_ == null).length
val categoricalWriter = new CategoricalPreviewParseWriter(domain, testCase.length, naCount)
val result = categoricalWriter.guessTypes()(0)
val result = CategoricalPreviewParseWriter.guessType(domain, testCase.length, naCount)

result shouldEqual expected
}
Expand Down