Skip to content

Commit

Permalink
Added field validation for data types, indices, width. Includes creat…
Browse files Browse the repository at this point in the history
…ing two setters in field config to set default value for data types and calculate/set width based on indices.
  • Loading branch information
Megan Foss committed Nov 23, 2021
1 parent 428a512 commit 428a2dd
Show file tree
Hide file tree
Showing 4 changed files with 98 additions and 53 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -65,7 +65,6 @@ public FixedwidthBatchReader(FixedwidthFormatConfig config, int maxRecords) {
this.maxRecords = maxRecords;
}


@Override
public boolean open(FileSchemaNegotiator negotiator) {
split = negotiator.split();
Expand Down Expand Up @@ -197,4 +196,4 @@ private boolean parseLine(String line, RowSetLoader writer) throws IOException {
return true;
}

}
}
Original file line number Diff line number Diff line change
Expand Up @@ -18,6 +18,7 @@

package org.apache.drill.exec.store.fixedwidth;

import com.fasterxml.jackson.annotation.JsonCreator;
import com.fasterxml.jackson.annotation.JsonInclude;
import com.fasterxml.jackson.annotation.JsonProperty;
import com.fasterxml.jackson.annotation.JsonTypeName;
Expand All @@ -29,12 +30,12 @@

@JsonTypeName("fixedwidthReaderFieldDescription")
@JsonInclude(JsonInclude.Include.NON_DEFAULT)
public class FixedwidthFieldConfig {
public class FixedwidthFieldConfig implements Comparable<FixedwidthFieldConfig> {

private final String name;
private final int index;
private final int width;
private final TypeProtos.MinorType type;
private int width;
private TypeProtos.MinorType type;
private final String dateTimeFormat;

public FixedwidthFieldConfig(@JsonProperty("name") String name,
Expand All @@ -44,41 +45,17 @@ public FixedwidthFieldConfig(@JsonProperty("name") String name,
this(name, index, width, type, null);
}

@JsonCreator
public FixedwidthFieldConfig(@JsonProperty("name") String name,
@JsonProperty("index") int index,
@JsonProperty("width") int width,
@JsonProperty("type") TypeProtos.MinorType type,
@JsonProperty("dateTimeFormat") String dateTimeFormat) {

this.name = name;
this.index = index;
this.width = width;
this.type = type;
this.dateTimeFormat = dateTimeFormat;


// Need to verify names are different - where can we access all the names of other columns
// if(name != null){
// this.name = name;
// } else{
// throw new IllegalArgumentException("Invalid name"); //Is this the right way to throw an exception if blank? What about if not valid SQL?
// }
//
// if (index >= 0){
// this.index = index;
// } else {
// throw new IllegalArgumentException("Index must be 0 or greater");
// }
//
// //Can modify this to be optional and be calculated based on start index of this field and next
// this.width = width;
//
// if (type == null){
// this.type = TypeProtos.MinorType.VARCHAR;
// } else {
// this.type = type;
// }
// this.dateTimeFormat = dateTimeFormat; // No default required, null is allowed
}

public String getName() {return name;}
Expand All @@ -87,8 +64,16 @@ public FixedwidthFieldConfig(@JsonProperty("name") String name,

public int getWidth() {return width;}

public void setWidth(int value) {
this.width = value;
}

public TypeProtos.MinorType getType() {return type;}

public void setType() {
this.type = TypeProtos.MinorType.VARCHAR;
}

public String getDateTimeFormat() {return dateTimeFormat;}

@Override
Expand Down Expand Up @@ -122,4 +107,9 @@ public String toString() {
.field("dateTimeFormat", dateTimeFormat)
.toString();
}

@Override
public int compareTo(FixedwidthFieldConfig o) {
return new Integer(this.getIndex()).compareTo(o.getIndex());
}
}
Original file line number Diff line number Diff line change
Expand Up @@ -26,11 +26,13 @@
import org.apache.drill.common.PlanStringBuilder;
import org.apache.drill.common.exceptions.UserException;
import org.apache.drill.common.logical.FormatPluginConfig;
import org.apache.drill.common.types.TypeProtos;
import org.apache.drill.shaded.guava.com.google.common.collect.ImmutableList;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;

import java.util.ArrayList;
import java.util.Arrays;
import java.util.Collections;
import java.util.HashSet;
import java.util.List;
Expand All @@ -43,11 +45,15 @@ public class FixedwidthFormatConfig implements FormatPluginConfig {
private static final Logger logger = LoggerFactory.getLogger(FixedwidthFormatConfig.class);
private final List<String> extensions;
private final List<FixedwidthFieldConfig> fields;
private final List<TypeProtos.MinorType> validDataTypes = Arrays.asList(new TypeProtos.MinorType[]{TypeProtos.MinorType.INT, TypeProtos.MinorType.VARCHAR,
TypeProtos.MinorType.DATE, TypeProtos.MinorType.TIME, TypeProtos.MinorType.TIMESTAMP, TypeProtos.MinorType.FLOAT4,
TypeProtos.MinorType.FLOAT8, TypeProtos.MinorType.BIGINT, TypeProtos.MinorType.VARDECIMAL});

@JsonCreator
public FixedwidthFormatConfig(@JsonProperty("extensions") List<String> extensions,
@JsonProperty("fields") List<FixedwidthFieldConfig> fields) {
this.extensions = extensions == null ? Collections.singletonList("fwf") : ImmutableList.copyOf(extensions);
Collections.sort(fields);
this.fields = fields;

validateFieldInput();
Expand Down Expand Up @@ -133,13 +139,56 @@ public List<Integer> getFieldWidths() {
return result;
}

@JsonIgnore
public void setFieldWidths(int i, int value) {
for (FixedwidthFieldConfig field : fields) {
if (field.getIndex() == i) {
field.setWidth(value);
}
}
}

@JsonIgnore
public List<TypeProtos.MinorType> getFieldTypes() {
List<TypeProtos.MinorType> result = new ArrayList<>();
if (! hasFields()) {
return result;
}

for (FixedwidthFieldConfig field : fields) {
result.add(field.getType());
}
return result;
}

@JsonIgnore
public void setFieldTypes(int i) {
for (FixedwidthFieldConfig field : fields) {
if (field.getIndex() == i) {
field.setType();
}
}
}

@JsonIgnore
public void validateFieldInput(){
Set<String> uniqueNames = new HashSet<>();
for (String name : this.getFieldNames()){
/*if (name.length() == 0){
List<Integer> fieldIndices = this.getFieldIndices();
List<Integer> fieldWidths = this.getFieldWidths();
List<String> fieldNames = this.getFieldNames();
List<TypeProtos.MinorType> fieldTypes = this.getFieldTypes();
int width = 0;
int prevIndexAndWidth = -1;

}*/
// Ensure no two fields have the same name
for (String name : this.getFieldNames()){
if (name.length() == 0){
throw UserException
.validationError()
.message("Blank field name detected.")
.addContext("Plugin", FixedwidthFormatPlugin.DEFAULT_NAME)
.build(logger);
}
if (uniqueNames.contains(name)){
throw UserException
.validationError()
Expand All @@ -149,10 +198,6 @@ public void validateFieldInput(){
}
uniqueNames.add(name);
}
List<Integer> fieldIndices = this.getFieldIndices();
List<Integer> fieldWidths = this.getFieldWidths();
List<String> fieldNames = this.getFieldNames();
int prevIndexAndWidth = -1;

//assuming that fieldIndices is the same size as fieldWidths, width is required
for (int i = 0; i<fieldIndices.size(); i++) {
Expand All @@ -163,24 +208,35 @@ public void validateFieldInput(){
.addContext("Plugin", FixedwidthFormatPlugin.DEFAULT_NAME)
.build(logger);
}
/*
else if (fieldWidths.get(i) == null || fieldWidths.get(i) < 1) {
else if (fieldIndices.get(i) <= prevIndexAndWidth) {
throw UserException
.validationError()
.message("Overlapping fields: " + fieldNames.get(i-1) + " and " + fieldNames.get(i))
.addContext("Plugin", FixedwidthFormatPlugin.DEFAULT_NAME)
.build(logger);
}

if (fieldWidths.get(i) == null || fieldWidths.get(i) < 1) {
// Come back to this - can we calculate this instead of throwing an error?
if (i == fieldIndices.size()-1) {
Integer width =
throw UserException
.validationError()
.message("Width for field '" + fieldNames.get(i) + "' is empty.")
.addContext("Plugin", FixedwidthFormatPlugin.DEFAULT_NAME)
.build(logger);
}
Integer width = fieldIndices.get(i+1) - fieldIndices.get(i);
fieldWidths.set(i, width);
width = fieldIndices.get(i+1) - fieldIndices.get(i) - 1;
setFieldWidths(fieldIndices.get(i), width);
}
prevIndexAndWidth = fieldIndices.get(i) + fieldWidths.get(i);

// Validate Field Type
if (fieldTypes.get(i) == null || fieldTypes.get(i).toString().length() == 0) {
setFieldTypes(fieldIndices.get(i));
}
else if (!validDataTypes.contains(fieldTypes.get(i))){
setFieldTypes(fieldIndices.get(i)); //Should we throw an error or default to VARCHAR for data types that are not yet available in this plugin
}
*/
else if (fieldIndices.get(i) <= prevIndexAndWidth) {
throw UserException
.validationError()
.message("Overlapping fields: " + fieldNames.get(i-1) + " and " + fieldNames.get(i))
.addContext("Plugin", FixedwidthFormatPlugin.DEFAULT_NAME)
.build(logger);
}
prevIndexAndWidth = fieldIndices.get(i) + fieldWidths.get(i);
}
}

}
Original file line number Diff line number Diff line change
Expand Up @@ -51,8 +51,8 @@ public static void setup() throws Exception {
FixedwidthFormatConfig formatConfig = new FixedwidthFormatConfig(Lists.newArrayList("fwf"),
Lists.newArrayList(
new FixedwidthFieldConfig("Number", 1, 5, TypeProtos.MinorType.VARDECIMAL),
new FixedwidthFieldConfig("Letter", 7,4, TypeProtos.MinorType.VARCHAR, ""),
new FixedwidthFieldConfig("Address",12, 3,TypeProtos.MinorType.INT, ""),
new FixedwidthFieldConfig("Letter", 7,4, TypeProtos.MinorType.VARCHAR, ""),
new FixedwidthFieldConfig("Date",16, 10,TypeProtos.MinorType.DATE, "MM-dd-yyyy"),
new FixedwidthFieldConfig( "Time", 27, 8,TypeProtos.MinorType.TIME,"HH:mm:ss" ),
new FixedwidthFieldConfig("DateTime", 36, 23,TypeProtos.MinorType.TIMESTAMP, "MM-dd-yyyy'T'HH:mm:ss.SSX" )
Expand Down

0 comments on commit 428a2dd

Please sign in to comment.