Skip to content
Permalink
Browse files
DRILL-6104: Add Log/Regex Format Plugin
closes #1114
  • Loading branch information
cgivre authored and Ben-Zvi committed Jul 19, 2018
1 parent 5a0c75f commit b1aca337f4e07ad34ad5662872b237bd7257e468
Show file tree
Hide file tree
Showing 14 changed files with 1,555 additions and 6 deletions.
@@ -0,0 +1,119 @@
/*
* Licensed to the Apache Software Foundation (ASF) under one
* or more contributor license agreements. See the NOTICE file
* distributed with this work for additional information
* regarding copyright ownership. The ASF licenses this file
* to you under the Apache License, Version 2.0 (the
* "License"); you may not use this file except in compliance
* with the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.drill.exec.store.log;

import com.fasterxml.jackson.annotation.JsonIgnore;
import com.fasterxml.jackson.annotation.JsonTypeName;
import com.google.common.base.Objects;
import org.apache.drill.common.logical.FormatPluginConfig;

import java.util.ArrayList;
import java.util.Arrays;
import java.util.List;

@JsonTypeName("logRegex")
public class LogFormatConfig implements FormatPluginConfig {

private String regex;
private String extension;
private int maxErrors = 10;
private List<LogFormatField> schema;

public String getRegex() {
return regex;
}

public String getExtension() {
return extension;
}

public int getMaxErrors() {
return maxErrors;
}

public List<LogFormatField> getSchema() {
return schema;
}

//Setters
public void setExtension(String ext) {
this.extension = ext;
}

public void setMaxErrors(int errors) {
this.maxErrors = errors;
}

public void setRegex(String regex) {
this.regex = regex;
}

public void setSchema() {
this.schema = new ArrayList<LogFormatField>();
}

@Override
public boolean equals(Object obj) {
if (this == obj) {
return true;
}
if (obj == null || getClass() != obj.getClass()) {
return false;
}
LogFormatConfig other = (LogFormatConfig) obj;
return Objects.equal(regex, other.regex) &&
Objects.equal(maxErrors, other.maxErrors) &&
Objects.equal(schema, other.schema) &&
Objects.equal(extension, other.extension);
}

@Override
public int hashCode() {
return Arrays.hashCode(new Object[]{regex, maxErrors, schema, extension});
}

@JsonIgnore
public List<String> getFieldNames() {
List<String> result = new ArrayList<String>();
if (this.schema == null) {
return result;
}

for (LogFormatField field : this.schema) {
result.add(field.getFieldName());
}
return result;
}

@JsonIgnore
public String getDataType(int fieldIndex) {
LogFormatField f = this.schema.get(fieldIndex);
return f.getFieldType().toUpperCase();
}

@JsonIgnore
public LogFormatField getField(int fieldIndex) {
return this.schema.get(fieldIndex);
}

@JsonIgnore
public String getDateFormat(int patternIndex) {
LogFormatField f = this.schema.get(patternIndex);
return f.getFormat();
}
}
@@ -0,0 +1,86 @@
/*
* Licensed to the Apache Software Foundation (ASF) under one
* or more contributor license agreements. See the NOTICE file
* distributed with this work for additional information
* regarding copyright ownership. The ASF licenses this file
* to you under the Apache License, Version 2.0 (the
* "License"); you may not use this file except in compliance
* with the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/

package org.apache.drill.exec.store.log;

import com.fasterxml.jackson.annotation.JsonInclude;
import com.fasterxml.jackson.annotation.JsonTypeName;

@JsonTypeName("regexReaderFieldDescription")
@JsonInclude(JsonInclude.Include.NON_DEFAULT)
public class LogFormatField {

/*
* The three configuration options for a field are:
* 1. The field name
* 2. The data type (fieldType). Field type defaults to VARCHAR if it is not specified
* 3. The format string which is used for date/time fields. This field is ignored if used with a non
* date/time field.
* */

private String fieldName = "";
private String fieldType = "VARCHAR";
private String format;

//These will be used in the future for field validation and masking
//public String validator;
//public double minValue;
//public double maxValue;


public LogFormatField() {
}

//These constructors are used for unit testing
public LogFormatField(String fieldName) {
this(fieldName, null, null);
}

public LogFormatField(String fieldName, String fieldType) {
this(fieldName, fieldType, null);
}

public LogFormatField(String fieldName, String fieldType, String format) {
this.fieldName = fieldName;
this.fieldType = fieldType;
this.format = format;
}

public String getFieldName() {
return fieldName;
}

public String getFieldType() {
return fieldType;
}

public String getFormat() {
return format;
}


/*
public String getValidator() { return validator; }
public double getMinValue() { return minValue; }
public double getMaxValue() {
return maxValue;
}
*/
}
@@ -0,0 +1,84 @@
/*
* Licensed to the Apache Software Foundation (ASF) under one
* or more contributor license agreements. See the NOTICE file
* distributed with this work for additional information
* regarding copyright ownership. The ASF licenses this file
* to you under the Apache License, Version 2.0 (the
* "License"); you may not use this file except in compliance
* with the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/

package org.apache.drill.exec.store.log;

import com.google.common.collect.Lists;
import org.apache.drill.common.exceptions.ExecutionSetupException;
import org.apache.drill.common.expression.SchemaPath;
import org.apache.drill.common.logical.StoragePluginConfig;
import org.apache.drill.exec.ops.FragmentContext;
import org.apache.drill.exec.proto.UserBitShared;
import org.apache.drill.exec.server.DrillbitContext;
import org.apache.drill.exec.store.RecordReader;
import org.apache.drill.exec.store.RecordWriter;
import org.apache.drill.exec.store.dfs.DrillFileSystem;
import org.apache.drill.exec.store.dfs.easy.EasyFormatPlugin;
import org.apache.drill.exec.store.dfs.easy.EasyWriter;
import org.apache.drill.exec.store.dfs.easy.FileWork;
import org.apache.hadoop.conf.Configuration;

import java.util.List;

public class LogFormatPlugin extends EasyFormatPlugin<LogFormatConfig> {

public static final String DEFAULT_NAME = "logRegex";
private final LogFormatConfig formatConfig;

public LogFormatPlugin(String name, DrillbitContext context,
Configuration fsConf, StoragePluginConfig storageConfig,
LogFormatConfig formatConfig) {
super(name, context, fsConf, storageConfig, formatConfig,
true, // readable
false, // writable
true, // blockSplittable
true, // compressible
Lists.newArrayList(formatConfig.getExtension()),
DEFAULT_NAME);
this.formatConfig = formatConfig;
}

@Override
public RecordReader getRecordReader(FragmentContext context,
DrillFileSystem dfs, FileWork fileWork, List<SchemaPath> columns,
String userName) throws ExecutionSetupException {
return new LogRecordReader(context, dfs, fileWork,
columns, userName, formatConfig);
}

@Override
public boolean supportsPushDown() {
return true;
}

@Override
public RecordWriter getRecordWriter(FragmentContext context,
EasyWriter writer) throws UnsupportedOperationException {
throw new UnsupportedOperationException("unimplemented");
}

@Override
public int getReaderOperatorType() {
return UserBitShared.CoreOperatorType.REGEX_SUB_SCAN_VALUE;
}

@Override
public int getWriterOperatorType() {
throw new UnsupportedOperationException("unimplemented");
}
}

0 comments on commit b1aca33

Please sign in to comment.