Skip to content
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Original file line number Diff line number Diff line change
@@ -0,0 +1,320 @@
/*
* Licensed to the Apache Software Foundation (ASF) under one
* or more contributor license agreements. See the NOTICE file
* distributed with this work for additional information
* regarding copyright ownership. The ASF licenses this file
* to you under the Apache License, Version 2.0 (the
* "License"); you may not use this file except in compliance
* with the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing,
* software distributed under the License is distributed on an
* "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
* KIND, either express or implied. See the License for the
* specific language governing permissions and limitations
* under the License.
*/
package org.apache.nifi.processors.kite;

import java.io.IOException;
import java.util.Collection;
import java.util.List;
import java.util.Map;
import java.util.Scanner;

import org.apache.avro.Schema;
import org.apache.avro.Schema.Field;
import org.apache.avro.generic.GenericData.Record;
import org.apache.avro.generic.IndexedRecord;

import com.google.common.base.Preconditions;
import com.google.common.collect.Lists;
import com.google.common.collect.Maps;

/**
* Responsible for converting records of one Avro type to another. Supports
* syntax like "record.field" to unpack fields and will try to do simple type
* conversion.
*/
public class AvroRecordConverter {
private final Schema inputSchema;
private final Schema outputSchema;
// Store this from output field to input field so we can look up by output.
private final Map<String, String> fieldMapping;

/**
* @param inputSchema
* Schema of input record objects
* @param outputSchema
* Schema of output record objects
* @param fieldMapping
* Map from field name in input record to field name in output
* record.
*/
public AvroRecordConverter(Schema inputSchema, Schema outputSchema,
Map<String, String> fieldMapping) {
this.inputSchema = inputSchema;
this.outputSchema = outputSchema;
// Need to reverse this map.
this.fieldMapping = Maps
.newHashMapWithExpectedSize(fieldMapping.size());
for (Map.Entry<String, String> entry : fieldMapping.entrySet()) {
this.fieldMapping.put(entry.getValue(), entry.getKey());
}
}

/**
* @return Any fields in the output schema that are not mapped or are mapped
* by a non-existent input field.
*/
public Collection<String> getUnmappedFields() {
List<String> result = Lists.newArrayList();
for (Field f : outputSchema.getFields()) {
String fieldName = f.name();
if (fieldMapping.containsKey(fieldName)) {
fieldName = fieldMapping.get(fieldName);
}

Schema currentSchema = inputSchema;
while (fieldName.contains(".")) {
// Recurse down the schema to find the right field.
int dotIndex = fieldName.indexOf('.');
String entityName = fieldName.substring(0, dotIndex);
// Get the schema. In case we had an optional record, choose
// just the record.
currentSchema = getNonNullSchema(currentSchema);
if (currentSchema.getField(entityName) == null) {
// Tried to step into a schema that doesn't exist. Break out
// of the loop
break;
}
currentSchema = currentSchema.getField(entityName).schema();
fieldName = fieldName.substring(dotIndex + 1);
}
if (currentSchema == null
|| getNonNullSchema(currentSchema).getField(fieldName) == null) {
result.add(f.name());
}
}
return result;
}

/**
* Converts one record to another given a input and output schema plus
* explicit mappings for certain target fields.
*
* @param input
* Input record to convert conforming to the inputSchema this
* converter was created with.
* @return Record converted to the outputSchema this converter was created
* with.
* @throws AvroConversionException
* When schemas do not match or illegal conversions are
* attempted, such as when numeric data fails to parse.
*/
public Record convert(Record input) throws AvroConversionException {
Record result = new Record(outputSchema);
for (Field outputField : outputSchema.getFields()) {
// Default to matching by name
String inputFieldName = outputField.name();
if (fieldMapping.containsKey(outputField.name())) {
inputFieldName = fieldMapping.get(outputField.name());
}

IndexedRecord currentRecord = input;
Schema currentSchema = getNonNullSchema(inputSchema);
while (inputFieldName.contains(".")) {
// Recurse down the schema to find the right field.
int dotIndex = inputFieldName.indexOf('.');
String entityName = inputFieldName.substring(0, dotIndex);
// Get the record object
Object innerRecord = currentRecord.get(currentSchema.getField(
entityName).pos());
if (innerRecord == null) {
// Probably hit a null record here. Just break out of the
// loop so that null object will be passed to convertData
// below.
currentRecord = null;
break;
}
if (innerRecord != null
&& !(innerRecord instanceof IndexedRecord)) {
throw new AvroConversionException(inputFieldName
+ " stepped through a non-record");
}
currentRecord = (IndexedRecord) innerRecord;

// Get the schema. In case we had an optional record, choose
// just the record.
currentSchema = currentSchema.getField(entityName).schema();
currentSchema = getNonNullSchema(currentSchema);
inputFieldName = inputFieldName.substring(dotIndex + 1);
}

// Current should now be in the right place to read the record.
Field f = currentSchema.getField(inputFieldName);
if (currentRecord == null) {
// We may have stepped into a null union type and gotten a null
// result.
Schema s = null;
if (f != null) {
s = f.schema();
}
result.put(outputField.name(),
convertData(null, s, outputField.schema()));
} else {
result.put(
outputField.name(),
convertData(currentRecord.get(f.pos()), f.schema(),
outputField.schema()));
}
}
return result;
}

public Schema getInputSchema() {
return inputSchema;
}

public Schema getOutputSchema() {
return outputSchema;
}

/**
* Converts the data from one schema to another. If the types are the same,
* no change will be made, but simple conversions will be attempted for
* other types.
*
* @param content
* The data to convert, generally taken from a field in an input
* Record.
* @param inputSchema
* The schema of the content object
* @param outputSchema
* The schema to convert to.
* @return The content object, converted to the output schema.
* @throws AvroConversionException
* When conversion is impossible, either because the output type
* is not supported or because numeric data failed to parse.
*/
private Object convertData(Object content, Schema inputSchema,
Schema outputSchema) throws AvroConversionException {
if (content == null) {
// No conversion can happen here.
if (supportsNull(outputSchema)) {
return null;
}
throw new AvroConversionException("Output schema " + outputSchema
+ " does not support null");
}

Schema nonNillInput = getNonNullSchema(inputSchema);
Schema nonNillOutput = getNonNullSchema(outputSchema);
if (nonNillInput.getType().equals(nonNillOutput.getType())) {
return content;
} else {
if (nonNillOutput.getType() == Schema.Type.STRING) {
return content.toString();
}

// For the non-string cases of these, we will try to convert through
// string using Scanner to validate types. This means we could
// return questionable results when a String starts with a number
// but then contains other content
Scanner scanner = new Scanner(content.toString());
switch (nonNillOutput.getType()) {
case LONG:
if (scanner.hasNextLong()) {
return scanner.nextLong();
} else {
throw new AvroConversionException("Cannot convert "
+ content + " to long");
}
case INT:
if (scanner.hasNextInt()) {
return scanner.nextInt();
} else {
throw new AvroConversionException("Cannot convert "
+ content + " to int");
}
case DOUBLE:
if (scanner.hasNextDouble()) {
return scanner.nextDouble();
} else {
throw new AvroConversionException("Cannot convert "
+ content + " to double");
}
case FLOAT:
if (scanner.hasNextFloat()) {
return scanner.nextFloat();
} else {
throw new AvroConversionException("Cannot convert "
+ content + " to float");
}
default:
throw new AvroConversionException("Cannot convert to type "
+ nonNillOutput.getType());
}
}
}

/**
* If s is a union schema of some type with null, returns that type.
* Otherwise just return schema itself.
*
* Does not handle unions of schemas with anything except null and one type.
*
* @param s
* Schema to remove nillable from.
* @return The Schema of the non-null part of a the union, if the input was
* a union type. Otherwise returns the input schema.
*/
protected static Schema getNonNullSchema(Schema s) {
// Handle the case where s is a union type. Assert that this must be a
// union that only includes one non-null type.
if (s.getType() == Schema.Type.UNION) {
List<Schema> types = s.getTypes();
boolean foundOne = false;
Schema result = s;
for (Schema type : types) {
if (!type.getType().equals(Schema.Type.NULL)) {
Preconditions.checkArgument(foundOne == false,
"Cannot handle union of two non-null types");
foundOne = true;
result = type;
}
}
return result;
} else {
return s;
}
}

protected static boolean supportsNull(Schema s) {
if (s.getType() == Schema.Type.NULL) {
return true;
} else if (s.getType() == Schema.Type.UNION) {
for (Schema type : s.getTypes()) {
if (type.getType() == Schema.Type.NULL) {
return true;
}
}
}
return false;
}

/**
* Exception thrown when Avro conversion fails.
*/
public class AvroConversionException extends Exception {
public AvroConversionException(String string, IOException e) {
super(string, e);
}

public AvroConversionException(String string) {
super(string);
}
}
}
Loading