Skip to content
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
176 changes: 105 additions & 71 deletions src/main/java/org/apache/sysds/lops/Data.java

Large diffs are not rendered by default.

100 changes: 78 additions & 22 deletions src/main/java/org/apache/sysds/parser/DataExpression.java
Original file line number Diff line number Diff line change
Expand Up @@ -110,7 +110,8 @@ public class DataExpression extends DataIdentifier
//public static final String DELIM_RECODE = "recode";
public static final String DELIM_NA_STRINGS = "naStrings";
public static final String DELIM_NA_STRING_SEP = "\u00b7";

// Parameter names relevant to reading/writing delimited index/libsvmv files
public static final String LIBSVM_INDEX_DELIM = "indSep";

public static final String DELIM_SPARSE = "sparse"; // applicable only for write

Expand All @@ -137,6 +138,8 @@ public class DataExpression extends DataIdentifier
VALUETYPEPARAM, SCHEMAPARAM, DESCRIPTIONPARAM, AUTHORPARAM, CREATEDPARAM,
// Parameters related to delimited/csv files.
DELIM_FILL_VALUE, DELIM_DELIMITER, DELIM_FILL, DELIM_HAS_HEADER_ROW, DELIM_NA_STRINGS,
// Parameters related to delimited/libsvm files.
LIBSVM_INDEX_DELIM,
// Parameters related to privacy
PRIVACY, FINE_GRAINED_PRIVACY));

Expand All @@ -145,7 +148,9 @@ public class DataExpression extends DataIdentifier
Arrays.asList(IO_FILENAME, READROWPARAM, READCOLPARAM, FORMAT_TYPE, DATATYPEPARAM,
VALUETYPEPARAM, SCHEMAPARAM, ROWBLOCKCOUNTPARAM, COLUMNBLOCKCOUNTPARAM, READNNZPARAM,
// Parameters related to delimited/csv files.
DELIM_FILL_VALUE, DELIM_DELIMITER, DELIM_FILL, DELIM_HAS_HEADER_ROW, DELIM_NA_STRINGS));
DELIM_FILL_VALUE, DELIM_DELIMITER, DELIM_FILL, DELIM_HAS_HEADER_ROW, DELIM_NA_STRINGS,
// Parameters related to delimited/libsvm files.
LIBSVM_INDEX_DELIM));

/* Default Values for delimited (CSV/LIBSVM) files */
public static final String DEFAULT_DELIM_DELIMITER = ",";
Expand All @@ -155,6 +160,7 @@ public class DataExpression extends DataIdentifier
public static final boolean DEFAULT_DELIM_SPARSE = false;
public static final String DEFAULT_NA_STRINGS = "";
public static final String DEFAULT_SCHEMAPARAM = "NULL";
public static final String DEFAULT_LIBSVM_INDEX_DELIM = ":";

private DataOp _opcode;
private HashMap<String, Expression> _varParams;
Expand Down Expand Up @@ -921,6 +927,7 @@ public void validateExpression(HashMap<String, DataIdentifier> ids, HashMap<Stri
|| getVarParam(COLUMNBLOCKCOUNTPARAM) != null
|| getVarParam(FORMAT_TYPE) != null
|| getVarParam(DELIM_DELIMITER) != null
|| getVarParam(LIBSVM_INDEX_DELIM) != null
|| getVarParam(DELIM_HAS_HEADER_ROW) != null
|| getVarParam(DELIM_FILL) != null
|| getVarParam(DELIM_FILL_VALUE) != null
Expand Down Expand Up @@ -1151,33 +1158,55 @@ else if( getVarParam(READNNZPARAM) != null ) {
}
}

boolean islibsvm = false;
islibsvm = (formatTypeString != null && formatTypeString.equalsIgnoreCase(FileFormat.LIBSVM.toString()));
if (islibsvm){
boolean isLIBSVM = false;
isLIBSVM = (formatTypeString != null && formatTypeString.equalsIgnoreCase(FileFormat.LIBSVM.toString()));
if (isLIBSVM) {
// Handle libsvm file format
shouldReadMTD = true;

// only allow IO_FILENAME, READROWPARAM, READCOLPARAM
// as valid parameters
if( !inferredFormatType ){
for (String key : _varParams.keySet()){
if (! (key.equals(IO_FILENAME) || key.equals(FORMAT_TYPE)
if( !inferredFormatType ) {
for (String key : _varParams.keySet()) {
if (!(key.equals(IO_FILENAME) || key.equals(FORMAT_TYPE)
|| key.equals(READROWPARAM) || key.equals(READCOLPARAM)
|| key.equals(READNNZPARAM) || key.equals(DATATYPEPARAM)
|| key.equals(VALUETYPEPARAM) ))
|| key.equals(VALUETYPEPARAM) || key.equals(DELIM_DELIMITER)
|| key.equals(LIBSVM_INDEX_DELIM)))
{
String msg = "Only parameters allowed are: " + IO_FILENAME + ","
+ READROWPARAM + ","
+ READCOLPARAM;
String msg = "Only parameters allowed are: " + IO_FILENAME + ","
+ READROWPARAM + "," + READCOLPARAM
+ DELIM_DELIMITER + "," + LIBSVM_INDEX_DELIM;

raiseValidateError("Invalid parameter " + key + " in read statement: " +
toString() + ". " + msg, conditional, LanguageErrorCodes.INVALID_PARAMETERS);
}
}
}
// DEFAULT for "sep" : ","
if (getVarParam(DELIM_DELIMITER) == null) {
addVarParam(DELIM_DELIMITER, new StringIdentifier(DEFAULT_DELIM_DELIMITER, this));
}
else {
if ((getVarParam(DELIM_DELIMITER) instanceof ConstIdentifier)
&& (!(getVarParam( DELIM_DELIMITER) instanceof StringIdentifier))) {
raiseValidateError( "For delimited file " + getVarParam(DELIM_DELIMITER) + " must be a string value ", conditional);
}
}
// DEFAULT for "indSep": ":"
if(getVarParam(LIBSVM_INDEX_DELIM) == null) {
addVarParam(LIBSVM_INDEX_DELIM, new StringIdentifier(DEFAULT_LIBSVM_INDEX_DELIM, this));
}
else {
if((getVarParam(LIBSVM_INDEX_DELIM) instanceof ConstIdentifier)
&& (!(getVarParam(LIBSVM_INDEX_DELIM) instanceof StringIdentifier))) {
raiseValidateError(
"For delimited file " + getVarParam(LIBSVM_INDEX_DELIM) + " must be a string value ", conditional);
}
}
}

dataTypeString = (getVarParam(DATATYPEPARAM) == null) ? null : getVarParam(DATATYPEPARAM).toString();
dataTypeString = (getVarParam(DATATYPEPARAM) == null) ? null : getVarParam(DATATYPEPARAM).toString();

if ( dataTypeString == null || dataTypeString.equalsIgnoreCase(Statement.MATRIX_DATA_TYPE)
|| dataTypeString.equalsIgnoreCase(Statement.FRAME_DATA_TYPE)) {
Expand All @@ -1203,8 +1232,8 @@ else if( getVarParam(READNNZPARAM) != null ) {
// initialize size of target data identifier to UNKNOWN
getOutput().setDimensions(-1, -1);

if ( !isCSV && ConfigurationManager.getCompilerConfig()
.getBool(ConfigType.REJECT_READ_WRITE_UNKNOWNS) //skip check for csv format / jmlc api
if (!isCSV && !isLIBSVM && ConfigurationManager.getCompilerConfig()
.getBool(ConfigType.REJECT_READ_WRITE_UNKNOWNS) //skip check for csv/libsvm format / jmlc api
&& (getVarParam(READROWPARAM) == null || getVarParam(READCOLPARAM) == null) ) {
raiseValidateError("Missing or incomplete dimension information in read statement: "
+ mtdFileName, conditional, LanguageErrorCodes.INVALID_PARAMETERS);
Expand All @@ -1229,6 +1258,15 @@ && getVarParam(READCOLPARAM) instanceof ConstIdentifier)
}
}

if(isLIBSVM) {
Long dim2 = (getVarParam(READCOLPARAM) == null) ? null : Long.valueOf(getVarParam(READCOLPARAM).toString());
if(dim2 < 0 && ConfigurationManager.getCompilerConfig()
.getBool(ConfigType.REJECT_READ_WRITE_UNKNOWNS)) {
raiseValidateError("Invalid dimension information in read statement", conditional, LanguageErrorCodes.INVALID_PARAMETERS);
}
getOutput().setDimensions(-1, dim2 + 1);
}

// initialize block dimensions to UNKNOWN
getOutput().setBlocksize(-1);

Expand Down Expand Up @@ -1292,7 +1330,7 @@ else if ( dataTypeString.equalsIgnoreCase(Statement.SCALAR_DATA_TYPE)) {

case WRITE:

// for delimited format, if no delimiter specified THEN set default ","
// for CSV format, if no delimiter specified THEN set default ","
if (getVarParam(FORMAT_TYPE) == null || getVarParam(FORMAT_TYPE).toString().equalsIgnoreCase(FileFormat.CSV.toString())){
if (getVarParam(DELIM_DELIMITER) == null) {
addVarParam(DELIM_DELIMITER, new StringIdentifier(DEFAULT_DELIM_DELIMITER, this));
Expand All @@ -1304,13 +1342,20 @@ else if ( dataTypeString.equalsIgnoreCase(Statement.SCALAR_DATA_TYPE)) {
addVarParam(DELIM_SPARSE, new BooleanIdentifier(DEFAULT_DELIM_SPARSE, this));
}
}

if (getVarParam(FORMAT_TYPE) == null || getVarParam(FORMAT_TYPE).toString().equalsIgnoreCase(FileFormat.LIBSVM.toString())){
if (getVarParam(DELIM_SPARSE) == null) {
addVarParam(DELIM_SPARSE, new BooleanIdentifier(DEFAULT_DELIM_SPARSE, this));
}
}

// for LIBSVM format, add the default separators if not specified
if (getVarParam(FORMAT_TYPE) == null || getVarParam(FORMAT_TYPE).toString().equalsIgnoreCase(FileFormat.LIBSVM.toString())) {
if(getVarParam(DELIM_DELIMITER) == null) {
addVarParam(DELIM_DELIMITER, new StringIdentifier(DEFAULT_DELIM_DELIMITER, this));
}
if(getVarParam(LIBSVM_INDEX_DELIM) == null) {
addVarParam(LIBSVM_INDEX_DELIM, new StringIdentifier(DEFAULT_LIBSVM_INDEX_DELIM, this));
}
if(getVarParam(DELIM_SPARSE) == null) {
addVarParam(DELIM_SPARSE, new BooleanIdentifier(DEFAULT_DELIM_SPARSE, this));
}
}

/* NOTE MB: disabled filename concatenation because we now support dynamic rewrite
if (getVarParam(IO_FILENAME) instanceof BinaryExpression){
BinaryExpression expr = (BinaryExpression)getVarParam(IO_FILENAME);
Expand Down Expand Up @@ -2307,6 +2352,17 @@ public boolean isCSVReadWithUnknownSize() {
return false;
}

public boolean isLIBSVMReadWithUnknownSize() {
Expression format = getVarParam(FORMAT_TYPE);
if (_opcode == DataOp.READ && format != null && format.toString().equalsIgnoreCase(FileFormat.LIBSVM.toString())) {
Expression rows = getVarParam(READROWPARAM);
Expression cols = getVarParam(READCOLPARAM);
return (rows == null || Long.parseLong(rows.toString()) < 0)
|| (cols == null || Long.parseLong(cols.toString()) < 0);
}
return false;
}

public boolean isRead()
{
return (_opcode == DataOp.READ);
Expand Down
Loading