From 8b2d70c67c1560f119f303b0beb6b0198f99b76e Mon Sep 17 00:00:00 2001 From: Owen O'Malley Date: Tue, 23 Jul 2019 04:40:47 -0700 Subject: [PATCH] ORC-529: Allow configuration and table properties to control encryption. Fixes #415 Signed-off-by: Owen O'Malley --- .../java/org/apache/orc/InMemoryKeystore.java | 5 +- .../core/src/java/org/apache/orc/OrcConf.java | 5 +- .../core/src/java/org/apache/orc/OrcFile.java | 98 +--- .../java/org/apache/orc/TypeDescription.java | 385 +++------------- .../java/org/apache/orc/impl/CryptoUtils.java | 52 +++ .../java/org/apache/orc/impl/ParserUtils.java | 425 ++++++++++++++++++ .../java/org/apache/orc/impl/WriterImpl.java | 92 ++-- .../orc/impl/reader/ReaderEncryption.java | 11 +- .../impl/reader/ReaderEncryptionVariant.java | 6 +- .../org.apache.orc.impl.KeyProvider$Factory | 15 + .../org/apache/orc/TestTypeDescription.java | 81 ++++ .../org/apache/orc/TestVectorOrcFile.java | 11 +- .../org/apache/orc/impl/TestCryptoUtils.java | 45 +- .../apache/orc/impl/TestPhysicalFsWriter.java | 2 +- .../apache/orc/mapred/OrcOutputFormat.java | 4 +- java/pom.xml | 1 + .../java/org/apache/orc/impl/HadoopShims.java | 65 +-- .../apache/orc/impl/HadoopShimsCurrent.java | 4 +- .../apache/orc/impl/HadoopShimsPre2_3.java | 2 +- .../apache/orc/impl/HadoopShimsPre2_6.java | 2 +- .../apache/orc/impl/HadoopShimsPre2_7.java | 4 +- .../java/org/apache/orc/impl/KeyProvider.java | 84 ++++ .../orc/impl/TestHadoopShimsPre2_7.java | 140 ------ .../java/org/apache/orc/tools/KeyTool.java | 9 +- .../org/apache/orc/impl/FakeKeyProvider.java | 142 ++++++ .../orc/impl/TestHadoopKeyProvider.java | 62 +++ ...pache.hadoop.crypto.key.KeyProviderFactory | 2 +- 27 files changed, 1080 insertions(+), 674 deletions(-) create mode 100644 java/core/src/java/org/apache/orc/impl/ParserUtils.java create mode 100644 java/core/src/resources/META-INF/services/org.apache.orc.impl.KeyProvider$Factory create mode 100644 java/shims/src/java/org/apache/orc/impl/KeyProvider.java create mode 100644 java/tools/src/test/org/apache/orc/impl/FakeKeyProvider.java create mode 100644 java/tools/src/test/org/apache/orc/impl/TestHadoopKeyProvider.java rename java/{shims => tools}/src/test/resources/META-INF/services/org.apache.hadoop.crypto.key.KeyProviderFactory (92%) diff --git a/java/core/src/java/org/apache/orc/InMemoryKeystore.java b/java/core/src/java/org/apache/orc/InMemoryKeystore.java index 735962ffb9..78e6a85f34 100644 --- a/java/core/src/java/org/apache/orc/InMemoryKeystore.java +++ b/java/core/src/java/org/apache/orc/InMemoryKeystore.java @@ -18,6 +18,7 @@ package org.apache.orc; import org.apache.orc.impl.HadoopShims; +import org.apache.orc.impl.KeyProvider; import org.apache.orc.impl.LocalKey; import javax.crypto.BadPaddingException; @@ -40,7 +41,7 @@ import java.util.TreeMap; /** - * This is an in-memory implementation of {@link HadoopShims.KeyProvider}. + * This is an in-memory implementation of {@link KeyProvider}. * * The primary use of this class is for when the user doesn't have a * Hadoop KMS running and wishes to use encryption. It is also useful for @@ -52,7 +53,7 @@ * * This class is not thread safe. */ -public class InMemoryKeystore implements HadoopShims.KeyProvider { +public class InMemoryKeystore implements KeyProvider { /** * Support AES 256 ? */ diff --git a/java/core/src/java/org/apache/orc/OrcConf.java b/java/core/src/java/org/apache/orc/OrcConf.java index a6fbad1c80..7cca1dbf6b 100644 --- a/java/core/src/java/org/apache/orc/OrcConf.java +++ b/java/core/src/java/org/apache/orc/OrcConf.java @@ -162,7 +162,10 @@ public enum OrcConf { "Comma-separated list of columns for which dictionary encoding is to be skipped."), // some JVM doesn't allow array creation of size Integer.MAX_VALUE, so chunk size is slightly less than max int ORC_MAX_DISK_RANGE_CHUNK_LIMIT("orc.max.disk.range.chunk.limit", "hive.exec.orc.max.disk.range.chunk.limit", - Integer.MAX_VALUE - 1024, "When reading stripes >2GB, specify max limit for the chunk size.") + Integer.MAX_VALUE - 1024, "When reading stripes >2GB, specify max limit for the chunk size."), + ENCRYPTION("orc.encrypt", "orc.encrypt", null, "The list of keys and columns to encrypt with"), + DATA_MASK("orc.mask", "orc.mask", null, "The masks to apply to the encrypted columns"), + KEY_PROVIDER("orc.key.provider", "orc.key.provider", "hadoop", "The kind of KeyProvider to use for encryption.") ; private final String attribute; diff --git a/java/core/src/java/org/apache/orc/OrcFile.java b/java/core/src/java/org/apache/orc/OrcFile.java index 4eb2c83938..23c4d0f32a 100644 --- a/java/core/src/java/org/apache/orc/OrcFile.java +++ b/java/core/src/java/org/apache/orc/OrcFile.java @@ -32,6 +32,7 @@ import org.apache.hadoop.fs.Path; import org.apache.orc.impl.HadoopShims; import org.apache.orc.impl.HadoopShimsFactory; +import org.apache.orc.impl.KeyProvider; import org.apache.orc.impl.MemoryManagerImpl; import org.apache.orc.impl.OrcTail; import org.apache.orc.impl.ReaderImpl; @@ -275,7 +276,7 @@ public static class ReaderOptions { private FileSystem filesystem; private long maxLength = Long.MAX_VALUE; private OrcTail orcTail; - private HadoopShims.KeyProvider keyProvider; + private KeyProvider keyProvider; // TODO: We can generalize FileMetada interface. Make OrcTail implement FileMetadata interface // and remove this class altogether. Both footer caching and llap caching just needs OrcTail. // For now keeping this around to avoid complex surgery @@ -306,7 +307,7 @@ public ReaderOptions orcTail(OrcTail tail) { * @param provider * @return */ - public ReaderOptions setKeyProvider(HadoopShims.KeyProvider provider) { + public ReaderOptions setKeyProvider(KeyProvider provider) { this.keyProvider = provider; return this; } @@ -327,7 +328,7 @@ public OrcTail getOrcTail() { return orcTail; } - public HadoopShims.KeyProvider getKeyProvider() { + public KeyProvider getKeyProvider() { return keyProvider; } @@ -396,40 +397,6 @@ public static BloomFilterVersion fromString(String s) { } } - /** - * An internal class that describes how to encrypt a column. - */ - public static class EncryptionOption { - private final String columnNames; - private final String keyName; - private final String mask; - private final String[] maskParameters; - - EncryptionOption(String columnNames, String keyName, String mask, - String... maskParams) { - this.columnNames = columnNames; - this.keyName = keyName; - this.mask = mask; - this.maskParameters = maskParams; - } - - public String getColumnNames() { - return columnNames; - } - - public String getKeyName() { - return keyName; - } - - public String getMask() { - return mask; - } - - public String[] getMaskParameters() { - return maskParameters; - } - } - /** * Options for creating ORC file writers. */ @@ -460,8 +427,9 @@ public static class WriterOptions implements Cloneable { private boolean writeVariableLengthBlocks; private HadoopShims shims; private String directEncodingColumns; - private List encryption = new ArrayList<>(); - private HadoopShims.KeyProvider provider; + private String encryption; + private String masks; + private KeyProvider provider; protected WriterOptions(Properties tableProperties, Configuration conf) { configuration = conf; @@ -757,50 +725,24 @@ public WriterOptions directEncodingColumns(String value) { return this; } - /* - * Encrypt a set of columns with a key. - * For readers without access to the key, they will read nulls. - * @param columnNames the columns to encrypt - * @param keyName the key name to encrypt the data with - * @return this - */ - public WriterOptions encryptColumn(String columnNames, - String keyName) { - return encryptColumn(columnNames, keyName, - DataMask.Standard.NULLIFY.getName()); - } - /** * Encrypt a set of columns with a key. - * The data is also masked and stored unencrypted in the file. Readers - * without access to the key will instead get the masked data. - * @param columnNames the column names to encrypt - * @param keyName the key name to encrypt the data with - * @param mask the kind of masking - * @param maskParameters the parameters to the mask + * For readers without access to the key, they will read nulls. + * @param value a key-list of which columns to encrypt * @return this */ - public WriterOptions encryptColumn(String columnNames, - String keyName, - String mask, - String... maskParameters) { - encryption.add(new EncryptionOption(columnNames, keyName, mask, - maskParameters)); + public WriterOptions encrypt(String value) { + encryption = value; return this; } /** - * Set a different mask on a subtree that is already being encrypted. - * @param columnNames the column names to change the mask on - * @param mask the name of the mask - * @param maskParameters the parameters for the mask + * Set the masks for the unencrypted data. + * @param value a list of the masks and column names * @return this */ - public WriterOptions maskColumn(String columnNames, - String mask, - String... maskParameters) { - encryption.add(new EncryptionOption(columnNames, null, - mask, maskParameters)); + public WriterOptions masks(String value) { + masks = value; return this; } @@ -809,12 +751,12 @@ public WriterOptions maskColumn(String columnNames, * @param provider * @return */ - public WriterOptions setKeyProvider(HadoopShims.KeyProvider provider) { + public WriterOptions setKeyProvider(KeyProvider provider) { this.provider = provider; return this; } - public HadoopShims.KeyProvider getKeyProvider() { + public KeyProvider getKeyProvider() { return provider; } @@ -922,9 +864,13 @@ public String getDirectEncodingColumns() { return directEncodingColumns; } - public List getEncryption() { + public String getEncryption() { return encryption; } + + public String getMasks() { + return masks; + } } /** diff --git a/java/core/src/java/org/apache/orc/TypeDescription.java b/java/core/src/java/org/apache/orc/TypeDescription.java index 4f32cc5197..f36b1341db 100644 --- a/java/core/src/java/org/apache/orc/TypeDescription.java +++ b/java/core/src/java/org/apache/orc/TypeDescription.java @@ -30,6 +30,7 @@ import org.apache.hadoop.hive.ql.exec.vector.TimestampColumnVector; import org.apache.hadoop.hive.ql.exec.vector.UnionColumnVector; import org.apache.hadoop.hive.ql.exec.vector.VectorizedRowBatch; +import org.apache.orc.impl.ParserUtils; import org.apache.orc.impl.SchemaEvolution; import org.jetbrains.annotations.NotNull; @@ -56,6 +57,10 @@ public class TypeDescription private static final int DEFAULT_LENGTH = 256; static final Pattern UNQUOTED_NAMES = Pattern.compile("^[a-zA-Z0-9_]+$"); + // type attributes + public static final String ENCRYPT_ATTRIBUTE = "encrypt"; + public static final String MASK_ATTRIBUTE = "mask"; + @Override public int compareTo(TypeDescription other) { if (this == other) { @@ -193,231 +198,6 @@ public static TypeDescription createDecimal() { return new TypeDescription(Category.DECIMAL); } - static class StringPosition { - final String value; - int position; - final int length; - - StringPosition(String value) { - this.value = value; - position = 0; - length = value.length(); - } - - @Override - public String toString() { - StringBuilder buffer = new StringBuilder(); - buffer.append('\''); - buffer.append(value.substring(0, position)); - buffer.append('^'); - buffer.append(value.substring(position)); - buffer.append('\''); - return buffer.toString(); - } - } - - static Category parseCategory(StringPosition source) { - StringBuilder word = new StringBuilder(); - boolean hadSpace = true; - while (source.position < source.length) { - char ch = source.value.charAt(source.position); - if (Character.isLetter(ch)) { - word.append(Character.toLowerCase(ch)); - hadSpace = false; - } else if (ch == ' ') { - if (!hadSpace) { - hadSpace = true; - word.append(ch); - } - } else { - break; - } - source.position += 1; - } - String catString = word.toString(); - // if there were trailing spaces, remove them. - if (hadSpace) { - catString = catString.trim(); - } - if (!catString.isEmpty()) { - for (Category cat : Category.values()) { - if (cat.getName().equals(catString)) { - return cat; - } - } - } - throw new IllegalArgumentException("Can't parse category at " + source); - } - - static int parseInt(StringPosition source) { - int start = source.position; - int result = 0; - while (source.position < source.length) { - char ch = source.value.charAt(source.position); - if (!Character.isDigit(ch)) { - break; - } - result = result * 10 + (ch - '0'); - source.position += 1; - } - if (source.position == start) { - throw new IllegalArgumentException("Missing integer at " + source); - } - return result; - } - - static String parseName(StringPosition source) { - if (source.position == source.length) { - throw new IllegalArgumentException("Missing name at " + source); - } - final int start = source.position; - if (source.value.charAt(source.position) == '`') { - source.position += 1; - StringBuilder buffer = new StringBuilder(); - boolean closed = false; - while (source.position < source.length) { - char ch = source.value.charAt(source.position); - source.position += 1; - if (ch == '`') { - if (source.position < source.length && - source.value.charAt(source.position) == '`') { - source.position += 1; - buffer.append('`'); - } else { - closed = true; - break; - } - } else { - buffer.append(ch); - } - } - if (!closed) { - source.position = start; - throw new IllegalArgumentException("Unmatched quote at " + source); - } else if (buffer.length() == 0) { - throw new IllegalArgumentException("Empty quoted field name at " + source); - } - return buffer.toString(); - } else { - while (source.position < source.length) { - char ch = source.value.charAt(source.position); - if (!Character.isLetterOrDigit(ch) && ch != '_') { - break; - } - source.position += 1; - } - if (source.position == start) { - throw new IllegalArgumentException("Missing name at " + source); - } - return source.value.substring(start, source.position); - } - } - - static void requireChar(StringPosition source, char required) { - if (source.position >= source.length || - source.value.charAt(source.position) != required) { - throw new IllegalArgumentException("Missing required char '" + - required + "' at " + source); - } - source.position += 1; - } - - static boolean consumeChar(StringPosition source, char ch) { - boolean result = source.position < source.length && - source.value.charAt(source.position) == ch; - if (result) { - source.position += 1; - } - return result; - } - - static void parseUnion(TypeDescription type, StringPosition source) { - requireChar(source, '<'); - do { - type.addUnionChild(parseType(source)); - } while (consumeChar(source, ',')); - requireChar(source, '>'); - } - - static void parseStruct(TypeDescription type, StringPosition source) { - requireChar(source, '<'); - boolean needComma = false; - while (!consumeChar(source, '>')) { - if (needComma) { - requireChar(source, ','); - } else { - needComma = true; - } - String fieldName = parseName(source); - requireChar(source, ':'); - type.addField(fieldName, parseType(source)); - } - } - - static TypeDescription parseType(StringPosition source) { - TypeDescription result = new TypeDescription(parseCategory(source)); - switch (result.getCategory()) { - case BINARY: - case BOOLEAN: - case BYTE: - case DATE: - case DOUBLE: - case FLOAT: - case INT: - case LONG: - case SHORT: - case STRING: - case TIMESTAMP: - case TIMESTAMP_INSTANT: - break; - case CHAR: - case VARCHAR: - requireChar(source, '('); - result.withMaxLength(parseInt(source)); - requireChar(source, ')'); - break; - case DECIMAL: { - requireChar(source, '('); - int precision = parseInt(source); - requireChar(source, ','); - result.withScale(parseInt(source)); - result.withPrecision(precision); - requireChar(source, ')'); - break; - } - case LIST: { - requireChar(source, '<'); - TypeDescription child = parseType(source); - result.children.add(child); - child.parent = result; - requireChar(source, '>'); - break; - } - case MAP: { - requireChar(source, '<'); - TypeDescription keyType = parseType(source); - result.children.add(keyType); - keyType.parent = result; - requireChar(source, ','); - TypeDescription valueType = parseType(source); - result.children.add(valueType); - valueType.parent = result; - requireChar(source, '>'); - break; - } - case UNION: - parseUnion(result, source); - break; - case STRUCT: - parseStruct(result, source); - break; - default: - throw new IllegalArgumentException("Unknown type " + - result.getCategory() + " at " + source); - } - return result; - } - /** * Parse TypeDescription from the Hive type names. This is the inverse * of TypeDescription.toString() @@ -429,9 +209,9 @@ public static TypeDescription fromString(String typeName) { if (typeName == null) { return null; } - StringPosition source = new StringPosition(typeName); - TypeDescription result = parseType(source); - if (source.position != source.length) { + ParserUtils.StringPosition source = new ParserUtils.StringPosition(typeName); + TypeDescription result = ParserUtils.parseType(source); + if (source.hasCharactersLeft()) { throw new IllegalArgumentException("Extra characters at " + source); } return result; @@ -473,7 +253,7 @@ public TypeDescription withScale(int scale) { /** * Set an attribute on this type. * @param key the attribute name - * @param value the attribute value + * @param value the attribute value or null to clear the value * @return this for method chaining */ public TypeDescription setAttribute(@NotNull String key, @@ -549,8 +329,7 @@ public TypeDescription addUnionChild(TypeDescription child) { throw new IllegalArgumentException("Can only add types to union type" + " and not " + category); } - children.add(child); - child.parent = this; + addChild(child); return this; } @@ -566,8 +345,7 @@ public TypeDescription addField(String field, TypeDescription fieldType) { " and not " + category); } fieldNames.add(field); - children.add(fieldType); - fieldType.parent = this; + addChild(fieldType); return this; } @@ -876,6 +654,30 @@ private int assignIds(int startId) { return startId; } + /** + * Add a child to a type. + * @param child the child to add + */ + public void addChild(TypeDescription child) { + switch (category) { + case LIST: + if (children.size() >= 1) { + throw new IllegalArgumentException("Can't add more children to list"); + } + case MAP: + if (children.size() >= 2) { + throw new IllegalArgumentException("Can't add more children to map"); + } + case UNION: + case STRUCT: + children.add(child); + child.parent = this; + break; + default: + throw new IllegalArgumentException("Can't add children to " + category); + } + } + public TypeDescription(Category category) { this.category = category; if (category.isPrimitive) { @@ -1049,79 +851,6 @@ public TypeDescription findSubtype(int goal) { } } - /** - * Split a compound name into parts separated by '.'. - * @param source the string to parse into simple names - * @return a list of simple names from the source - */ - private static List splitName(StringPosition source) { - List result = new ArrayList<>(); - do { - result.add(parseName(source)); - } while (consumeChar(source, '.')); - return result; - } - - private static final Pattern INTEGER_PATTERN = Pattern.compile("^[0-9]+$"); - - private TypeDescription findSubtype(StringPosition source) { - List names = splitName(source); - if (names.size() == 1 && INTEGER_PATTERN.matcher(names.get(0)).matches()) { - return findSubtype(Integer.parseInt(names.get(0))); - } - TypeDescription current = SchemaEvolution.checkAcidSchema(this) - ? SchemaEvolution.getBaseRow(this) : this; - while (names.size() > 0) { - String first = names.remove(0); - switch (current.category) { - case STRUCT: { - int posn = current.fieldNames.indexOf(first); - if (posn == -1) { - throw new IllegalArgumentException("Field " + first + - " not found in " + current.toString()); - } - current = current.children.get(posn); - break; - } - case LIST: - if (first.equals("_elem")) { - current = current.getChildren().get(0); - } else { - throw new IllegalArgumentException("Field " + first + - "not found in " + current.toString()); - } - break; - case MAP: - if (first.equals("_key")) { - current = current.getChildren().get(0); - } else if (first.equals("_value")) { - current = current.getChildren().get(1); - } else { - throw new IllegalArgumentException("Field " + first + - "not found in " + current.toString()); - } - break; - case UNION: { - try { - int posn = Integer.parseInt(first); - if (posn < 0 || posn >= current.getChildren().size()) { - throw new NumberFormatException("off end of union"); - } - current = current.getChildren().get(posn); - } catch (NumberFormatException e) { - throw new IllegalArgumentException("Field " + first + - "not found in " + current.toString(), e); - } - break; - } - default: - throw new IllegalArgumentException("Field " + first + - "not found in " + current.toString()); - } - } - return current; - } - /** * Find a subtype of this schema by name. * If the name is a simple integer, it will be used as a column number. @@ -1137,9 +866,9 @@ private TypeDescription findSubtype(StringPosition source) { * @return the subtype */ public TypeDescription findSubtype(String columnName) { - StringPosition source = new StringPosition(columnName); - TypeDescription result = findSubtype(source); - if (source.position != source.length) { + ParserUtils.StringPosition source = new ParserUtils.StringPosition(columnName); + TypeDescription result = ParserUtils.findSubtype(this, source); + if (source.hasCharactersLeft()) { throw new IllegalArgumentException("Remaining text in parsing field name " + source); } @@ -1154,20 +883,32 @@ public TypeDescription findSubtype(String columnName) { * @return the list of subtypes that correspond to the column names */ public List findSubtypes(String columnNameList) { - StringPosition source = new StringPosition(columnNameList); - List result = new ArrayList<>(); - boolean needComma = false; - while (source.position != source.length) { - if (needComma) { - if (!consumeChar(source, ',')) { - throw new IllegalArgumentException("Comma expected in list of column" - + " names at " + source); - } - } else { - needComma = true; - } - result.add(findSubtype(source)); + ParserUtils.StringPosition source = new ParserUtils.StringPosition(columnNameList); + List result = ParserUtils.findSubtypeList(this, source); + if (source.hasCharactersLeft()) { + throw new IllegalArgumentException("Remaining text in parsing field name " + + source); } return result; } + + /** + * Annotate a schema with the encryption keys and masks. + * @param encryption the encryption keys and the fields + * @param masks the encryption masks and the fields + */ + public void annotateEncryption(String encryption, String masks) { + ParserUtils.StringPosition source = new ParserUtils.StringPosition(encryption); + ParserUtils.parseKeys(source, this); + if (source.hasCharactersLeft()) { + throw new IllegalArgumentException("Remaining text in parsing encryption keys " + + source); + } + source = new ParserUtils.StringPosition(masks); + ParserUtils.parseMasks(source, this); + if (source.hasCharactersLeft()) { + throw new IllegalArgumentException("Remaining text in parsing encryption masks " + + source); + } + } } diff --git a/java/core/src/java/org/apache/orc/impl/CryptoUtils.java b/java/core/src/java/org/apache/orc/impl/CryptoUtils.java index 7d88796ae0..eb4eaced95 100644 --- a/java/core/src/java/org/apache/orc/impl/CryptoUtils.java +++ b/java/core/src/java/org/apache/orc/impl/CryptoUtils.java @@ -18,8 +18,16 @@ package org.apache.orc.impl; +import org.apache.hadoop.conf.Configuration; +import org.apache.orc.InMemoryKeystore; +import org.apache.orc.OrcConf; import org.apache.orc.OrcProto; +import java.io.IOException; +import java.util.HashMap; +import java.util.Map; +import java.util.Random; +import java.util.ServiceLoader; import java.util.function.Consumer; /** @@ -110,4 +118,48 @@ public static void clearCounter(byte[] iv) { iv[i] = 0; } } + + /** A cache for the key providers */ + private static final Map keyProviderCache = new HashMap<>(); + + /** + * Create a KeyProvider. + * It will cache the result, so that only one provider of each kind will be + * created. + * + * @param random the random generator to use + * @return the new KeyProvider + */ + public static KeyProvider getKeyProvider(Configuration conf, + Random random) throws IOException { + String kind = OrcConf.KEY_PROVIDER.getString(conf); + String cacheKey = kind + "." + random.getClass().getName(); + KeyProvider result = keyProviderCache.get(cacheKey); + if (result == null) { + ServiceLoader loader = ServiceLoader.load(KeyProvider.Factory.class); + for (KeyProvider.Factory factory : loader) { + result = factory.create(kind, conf, random); + if (result != null) { + keyProviderCache.put(cacheKey, result); + break; + } + } + } + return result; + } + + public static class HadoopKeyProviderFactory implements KeyProvider.Factory { + + @Override + public KeyProvider create(String kind, + Configuration conf, + Random random) throws IOException { + if ("hadoop".equals(kind)) { + return HadoopShimsFactory.get().getHadoopKeyProvider(conf, random); + } else if ("memory".equals(kind)) { + return new InMemoryKeystore(random); + } + return null; + } + } } diff --git a/java/core/src/java/org/apache/orc/impl/ParserUtils.java b/java/core/src/java/org/apache/orc/impl/ParserUtils.java new file mode 100644 index 0000000000..a6d227b165 --- /dev/null +++ b/java/core/src/java/org/apache/orc/impl/ParserUtils.java @@ -0,0 +1,425 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.orc.impl; + +import org.apache.orc.TypeDescription; + +import java.util.ArrayList; +import java.util.List; +import java.util.regex.Pattern; + +public class ParserUtils { + + static TypeDescription.Category parseCategory(ParserUtils.StringPosition source) { + StringBuilder word = new StringBuilder(); + boolean hadSpace = true; + while (source.position < source.length) { + char ch = source.value.charAt(source.position); + if (Character.isLetter(ch)) { + word.append(Character.toLowerCase(ch)); + hadSpace = false; + } else if (ch == ' ') { + if (!hadSpace) { + hadSpace = true; + word.append(ch); + } + } else { + break; + } + source.position += 1; + } + String catString = word.toString(); + // if there were trailing spaces, remove them. + if (hadSpace) { + catString = catString.trim(); + } + if (!catString.isEmpty()) { + for (TypeDescription.Category cat : TypeDescription.Category.values()) { + if (cat.getName().equals(catString)) { + return cat; + } + } + } + throw new IllegalArgumentException("Can't parse category at " + source); + } + + static int parseInt(ParserUtils.StringPosition source) { + int start = source.position; + int result = 0; + while (source.position < source.length) { + char ch = source.value.charAt(source.position); + if (!Character.isDigit(ch)) { + break; + } + result = result * 10 + (ch - '0'); + source.position += 1; + } + if (source.position == start) { + throw new IllegalArgumentException("Missing integer at " + source); + } + return result; + } + + static String parseName(ParserUtils.StringPosition source) { + if (source.position == source.length) { + throw new IllegalArgumentException("Missing name at " + source); + } + final int start = source.position; + if (source.value.charAt(source.position) == '`') { + source.position += 1; + StringBuilder buffer = new StringBuilder(); + boolean closed = false; + while (source.position < source.length) { + char ch = source.value.charAt(source.position); + source.position += 1; + if (ch == '`') { + if (source.position < source.length && + source.value.charAt(source.position) == '`') { + source.position += 1; + buffer.append('`'); + } else { + closed = true; + break; + } + } else { + buffer.append(ch); + } + } + if (!closed) { + source.position = start; + throw new IllegalArgumentException("Unmatched quote at " + source); + } else if (buffer.length() == 0) { + throw new IllegalArgumentException("Empty quoted field name at " + source); + } + return buffer.toString(); + } else { + while (source.position < source.length) { + char ch = source.value.charAt(source.position); + if (!Character.isLetterOrDigit(ch) && ch != '_') { + break; + } + source.position += 1; + } + if (source.position == start) { + throw new IllegalArgumentException("Missing name at " + source); + } + return source.value.substring(start, source.position); + } + } + + static void requireChar(ParserUtils.StringPosition source, char required) { + if (source.position >= source.length || + source.value.charAt(source.position) != required) { + throw new IllegalArgumentException("Missing required char '" + + required + "' at " + source); + } + source.position += 1; + } + + private static boolean consumeChar(ParserUtils.StringPosition source, + char ch) { + boolean result = source.position < source.length && + source.value.charAt(source.position) == ch; + if (result) { + source.position += 1; + } + return result; + } + + private static void parseUnion(TypeDescription type, + ParserUtils.StringPosition source) { + requireChar(source, '<'); + do { + type.addUnionChild(parseType(source)); + } while (consumeChar(source, ',')); + requireChar(source, '>'); + } + + private static void parseStruct(TypeDescription type, + ParserUtils.StringPosition source) { + requireChar(source, '<'); + boolean needComma = false; + while (!consumeChar(source, '>')) { + if (needComma) { + requireChar(source, ','); + } else { + needComma = true; + } + String fieldName = parseName(source); + requireChar(source, ':'); + type.addField(fieldName, parseType(source)); + } + } + + public static TypeDescription parseType(ParserUtils.StringPosition source) { + TypeDescription result = new TypeDescription(parseCategory(source)); + switch (result.getCategory()) { + case BINARY: + case BOOLEAN: + case BYTE: + case DATE: + case DOUBLE: + case FLOAT: + case INT: + case LONG: + case SHORT: + case STRING: + case TIMESTAMP: + case TIMESTAMP_INSTANT: + break; + case CHAR: + case VARCHAR: + requireChar(source, '('); + result.withMaxLength(parseInt(source)); + requireChar(source, ')'); + break; + case DECIMAL: { + requireChar(source, '('); + int precision = parseInt(source); + requireChar(source, ','); + result.withScale(parseInt(source)); + result.withPrecision(precision); + requireChar(source, ')'); + break; + } + case LIST: { + requireChar(source, '<'); + TypeDescription child = parseType(source); + result.addChild(child); + requireChar(source, '>'); + break; + } + case MAP: { + requireChar(source, '<'); + TypeDescription keyType = parseType(source); + result.addChild(keyType); + requireChar(source, ','); + TypeDescription valueType = parseType(source); + result.addChild(valueType); + requireChar(source, '>'); + break; + } + case UNION: + parseUnion(result, source); + break; + case STRUCT: + parseStruct(result, source); + break; + default: + throw new IllegalArgumentException("Unknown type " + + result.getCategory() + " at " + source); + } + return result; + } + + /** + * Split a compound name into parts separated by '.'. + * @param source the string to parse into simple names + * @return a list of simple names from the source + */ + private static List splitName(ParserUtils.StringPosition source) { + List result = new ArrayList<>(); + do { + result.add(parseName(source)); + } while (consumeChar(source, '.')); + return result; + } + + + private static final Pattern INTEGER_PATTERN = Pattern.compile("^[0-9]+$"); + + public static TypeDescription findSubtype(TypeDescription schema, + ParserUtils.StringPosition source) { + List names = ParserUtils.splitName(source); + if (names.size() == 1 && INTEGER_PATTERN.matcher(names.get(0)).matches()) { + return schema.findSubtype(Integer.parseInt(names.get(0))); + } + TypeDescription current = SchemaEvolution.checkAcidSchema(schema) + ? SchemaEvolution.getBaseRow(schema) : schema; + while (names.size() > 0) { + String first = names.remove(0); + switch (current.getCategory()) { + case STRUCT: { + int posn = current.getFieldNames().indexOf(first); + if (posn == -1) { + throw new IllegalArgumentException("Field " + first + + " not found in " + current.toString()); + } + current = current.getChildren().get(posn); + break; + } + case LIST: + if (first.equals("_elem")) { + current = current.getChildren().get(0); + } else { + throw new IllegalArgumentException("Field " + first + + "not found in " + current.toString()); + } + break; + case MAP: + if (first.equals("_key")) { + current = current.getChildren().get(0); + } else if (first.equals("_value")) { + current = current.getChildren().get(1); + } else { + throw new IllegalArgumentException("Field " + first + + "not found in " + current.toString()); + } + break; + case UNION: { + try { + int posn = Integer.parseInt(first); + if (posn < 0 || posn >= current.getChildren().size()) { + throw new NumberFormatException("off end of union"); + } + current = current.getChildren().get(posn); + } catch (NumberFormatException e) { + throw new IllegalArgumentException("Field " + first + + "not found in " + current.toString(), e); + } + break; + } + default: + throw new IllegalArgumentException("Field " + first + + "not found in " + current.toString()); + } + } + return current; + } + + public static List findSubtypeList(TypeDescription schema, + StringPosition source) { + List result = new ArrayList<>(); + if (source.hasCharactersLeft()) { + do { + result.add(findSubtype(schema, source)); + } while (consumeChar(source, ',')); + } + return result; + } + + public static class StringPosition { + final String value; + int position; + final int length; + + public StringPosition(String value) { + this.value = value == null ? "" : value; + position = 0; + length = this.value.length(); + } + + @Override + public String toString() { + return '\'' + value.substring(0, position) + '^' + + value.substring(position) + '\''; + } + + public String fromPosition(int start) { + return value.substring(start, this.position); + } + + public boolean hasCharactersLeft() { + return position != length; + } + } + + /** + * Annotate the given schema with the encryption information. + * + * Format of the string is a key-list. + *
    + *
  • key-list = key (';' key-list)?
  • + *
  • key = key-name ':' field-list
  • + *
  • field-list = field-name ( ',' field-list )?
  • + *
  • field-name = number | field-part ('.' field-name)?
  • + *
  • field-part = quoted string | simple name
  • + *
+ * + * @param source the string to parse + * @param schema the top level schema + * @throw IllegalArgumentException if there are conflicting keys for a field + */ + public static void parseKeys(StringPosition source, TypeDescription schema) { + if (source.hasCharactersLeft()) { + do { + String keyName = parseName(source); + requireChar(source, ':'); + for (TypeDescription field : findSubtypeList(schema, source)) { + String prev = field.getAttributeValue(TypeDescription.ENCRYPT_ATTRIBUTE); + if (prev != null && !prev.equals(keyName)) { + throw new IllegalArgumentException("Conflicting encryption keys " + + keyName + " and " + prev); + } + field.setAttribute(TypeDescription.ENCRYPT_ATTRIBUTE, keyName); + } + } while (consumeChar(source, ';')); + } + } + + /** + * Annotate the given schema with the masking information. + * + * Format of the string is a mask-list. + *
    + *
  • mask-list = mask (';' mask-list)?
  • + *
  • mask = mask-name (',' parameter)* ':' field-list
  • + *
  • field-list = field-name ( ',' field-list )?
  • + *
  • field-name = number | field-part ('.' field-name)?
  • + *
  • field-part = quoted string | simple name
  • + *
+ * + * @param source the string to parse + * @param schema the top level schema + * @throw IllegalArgumentException if there are conflicting masks for a field + */ + public static void parseMasks(StringPosition source, TypeDescription schema) { + if (source.hasCharactersLeft()) { + do { + // parse the mask and parameters, but only get the underlying string + int start = source.position; + parseName(source); + while (consumeChar(source, ',')) { + parseName(source); + } + String maskString = source.fromPosition(start); + requireChar(source, ':'); + for (TypeDescription field : findSubtypeList(schema, source)) { + String prev = field.getAttributeValue(TypeDescription.MASK_ATTRIBUTE); + if (prev != null && !prev.equals(maskString)) { + throw new IllegalArgumentException("Conflicting encryption masks " + + maskString + " and " + prev); + } + field.setAttribute(TypeDescription.MASK_ATTRIBUTE, maskString); + } + } while (consumeChar(source, ';')); + } + } + + public static MaskDescriptionImpl buildMaskDescription(String value) { + StringPosition source = new StringPosition(value); + String maskName = parseName(source); + List params = new ArrayList<>(); + while (consumeChar(source, ',')) { + params.add(parseName(source)); + } + return new MaskDescriptionImpl(maskName, + params.toArray(new String[params.size()])); + } +} diff --git a/java/core/src/java/org/apache/orc/impl/WriterImpl.java b/java/core/src/java/org/apache/orc/impl/WriterImpl.java index 7f9cb63254..41449017a7 100644 --- a/java/core/src/java/org/apache/orc/impl/WriterImpl.java +++ b/java/core/src/java/org/apache/orc/impl/WriterImpl.java @@ -25,10 +25,8 @@ import java.util.List; import java.util.Map; import java.util.SortedMap; -import java.util.SortedSet; import java.util.TimeZone; import java.util.TreeMap; -import java.util.TreeSet; import io.airlift.compress.lz4.Lz4Compressor; import io.airlift.compress.lz4.Lz4Decompressor; @@ -124,14 +122,14 @@ public class WriterImpl implements WriterInternal, MemoryManager.Callback { new ArrayList<>(); // the list of maskDescriptions, keys, and variants - private SortedSet maskDescriptions = new TreeSet<>(); + private SortedMap maskDescriptions = new TreeMap<>(); private SortedMap keys = new TreeMap<>(); private final WriterEncryptionVariant[] encryption; // the mapping of columns to maskDescriptions private final MaskDescriptionImpl[] columnMaskDescriptions; // the mapping of columns to EncryptionVariants private final WriterEncryptionVariant[] columnEncryption; - private HadoopShims.KeyProvider keyProvider; + private KeyProvider keyProvider; // do we need to include the current encryption keys in the next stripe // information private boolean needKeyFlush; @@ -139,23 +137,24 @@ public class WriterImpl implements WriterInternal, MemoryManager.Callback { public WriterImpl(FileSystem fs, Path path, OrcFile.WriterOptions opts) throws IOException { - this.schema = opts.getSchema(); + // clone it so that we can annotate it with encryption + this.schema = opts.getSchema().clone(); int numColumns = schema.getMaximumId() + 1; if (!opts.isEnforceBufferSize()) { opts.bufferSize(getEstimatedBufferSize(opts.getStripeSize(), numColumns, opts.getBufferSize())); } - // Do we have column encryption? - List encryptionOptions = opts.getEncryption(); + // Annotate the schema with the column encryption + schema.annotateEncryption(opts.getEncryption(), opts.getMasks()); columnEncryption = new WriterEncryptionVariant[numColumns]; - if (encryptionOptions.isEmpty()) { + if (opts.getEncryption() == null) { columnMaskDescriptions = null; encryption = new WriterEncryptionVariant[0]; needKeyFlush = false; } else { columnMaskDescriptions = new MaskDescriptionImpl[numColumns]; - encryption = setupEncryption(opts.getKeyProvider(), encryptionOptions); + encryption = setupEncryption(opts.getKeyProvider(), schema); needKeyFlush = true; } @@ -582,7 +581,7 @@ private OrcProto.EncryptionKey.Builder writeEncryptionKey(WriterEncryptionKey ke private OrcProto.Encryption.Builder writeEncryptionFooter() { OrcProto.Encryption.Builder encrypt = OrcProto.Encryption.newBuilder(); - for(MaskDescriptionImpl mask: maskDescriptions) { + for(MaskDescriptionImpl mask: maskDescriptions.values()) { OrcProto.DataMask.Builder maskBuilder = OrcProto.DataMask.newBuilder(); maskBuilder.setName(mask.getName()); for(String param: mask.getParameters()) { @@ -799,7 +798,7 @@ private static boolean hasTimestamp(TypeDescription schema) { } WriterEncryptionKey getKey(String keyName, - HadoopShims.KeyProvider provider) throws IOException { + KeyProvider provider) throws IOException { WriterEncryptionKey result = keys.get(keyName); if (result == null) { result = new WriterEncryptionKey(provider.getCurrentKeyVersion(keyName)); @@ -808,12 +807,43 @@ WriterEncryptionKey getKey(String keyName, return result; } - MaskDescriptionImpl getMask(OrcFile.EncryptionOption opt) { - MaskDescriptionImpl result = new MaskDescriptionImpl(opt.getMask(), - opt.getMaskParameters()); + MaskDescriptionImpl getMask(String maskString) { + MaskDescriptionImpl result = maskDescriptions.get(maskString); // if it is already there, get the earlier object - if (!maskDescriptions.add(result)) { - result = maskDescriptions.tailSet(result).first(); + if (result == null) { + result = ParserUtils.buildMaskDescription(maskString); + maskDescriptions.put(maskString, result); + } + return result; + } + + int visitTypeTree(TypeDescription schema, + boolean encrypted, + KeyProvider provider) throws IOException { + int result = 0; + String keyName = schema.getAttributeValue(TypeDescription.ENCRYPT_ATTRIBUTE); + String maskName = schema.getAttributeValue(TypeDescription.MASK_ATTRIBUTE); + if (keyName != null) { + if (encrypted) { + throw new IllegalArgumentException("Nested encryption type: " + schema); + } + encrypted = true; + result += 1; + WriterEncryptionKey key = getKey(keyName, provider); + HadoopShims.KeyMetadata metadata = key.getMetadata(); + WriterEncryptionVariant variant = new WriterEncryptionVariant(key, + schema, keyProvider.createLocalKey(metadata)); + key.addRoot(variant); + } + if (encrypted && (keyName != null || maskName != null)) { + MaskDescriptionImpl mask = getMask(maskName == null ? "nullify" : maskName); + mask.addColumn(schema); + } + List children = schema.getChildren(); + if (children != null) { + for(TypeDescription child: children) { + result += visitTypeTree(child, encrypted, provider); + } } return result; } @@ -822,37 +852,21 @@ MaskDescriptionImpl getMask(OrcFile.EncryptionOption opt) { * Iterate through the encryption options given by the user and set up * our data structures. * @param provider the KeyProvider to use to generate keys - * @param options the options from the user + * @param schema the a */ - WriterEncryptionVariant[] setupEncryption(HadoopShims.KeyProvider provider, - List options + WriterEncryptionVariant[] setupEncryption(KeyProvider provider, + TypeDescription schema ) throws IOException { keyProvider = provider != null ? provider : - SHIMS.getKeyProvider(conf, new SecureRandom()); + CryptoUtils.getKeyProvider(conf, new SecureRandom()); if (keyProvider == null) { throw new IllegalArgumentException("Encryption requires a KeyProvider."); } - // fill out the primary encryption keys - int variantCount = 0; - for(OrcFile.EncryptionOption option: options) { - MaskDescriptionImpl mask = getMask(option); - for(TypeDescription col: schema.findSubtypes(option.getColumnNames())) { - mask.addColumn(col); - } - if (option.getKeyName() != null) { - WriterEncryptionKey key = getKey(option.getKeyName(), keyProvider); - HadoopShims.KeyMetadata metadata = key.getMetadata(); - for(TypeDescription rootType: schema.findSubtypes(option.getColumnNames())) { - WriterEncryptionVariant variant = new WriterEncryptionVariant(key, - rootType, keyProvider.createLocalKey(metadata)); - key.addRoot(variant); - variantCount += 1; - } - } - } + int variantCount = visitTypeTree(schema, false, provider); + // Now that we have de-duped the keys and maskDescriptions, make the arrays int nextId = 0; - for (MaskDescriptionImpl mask: maskDescriptions) { + for (MaskDescriptionImpl mask: maskDescriptions.values()) { mask.setId(nextId++); for(TypeDescription column: mask.getColumns()) { this.columnMaskDescriptions[column.getId()] = mask; diff --git a/java/core/src/java/org/apache/orc/impl/reader/ReaderEncryption.java b/java/core/src/java/org/apache/orc/impl/reader/ReaderEncryption.java index c647c107ea..fdf7e1ceb6 100644 --- a/java/core/src/java/org/apache/orc/impl/reader/ReaderEncryption.java +++ b/java/core/src/java/org/apache/orc/impl/reader/ReaderEncryption.java @@ -22,17 +22,16 @@ import org.apache.orc.OrcProto; import org.apache.orc.StripeInformation; import org.apache.orc.TypeDescription; -import org.apache.orc.impl.HadoopShims; -import org.apache.orc.impl.HadoopShimsFactory; +import org.apache.orc.impl.CryptoUtils; +import org.apache.orc.impl.KeyProvider; import org.apache.orc.impl.MaskDescriptionImpl; import java.io.IOException; import java.security.SecureRandom; -import java.util.Arrays; import java.util.List; public class ReaderEncryption { - private final HadoopShims.KeyProvider keyProvider; + private final KeyProvider keyProvider; private final ReaderEncryptionKey[] keys; private final MaskDescriptionImpl[] masks; private final ReaderEncryptionVariant[] variants; @@ -51,7 +50,7 @@ public ReaderEncryption() { public ReaderEncryption(OrcProto.Footer footer, TypeDescription schema, List stripes, - HadoopShims.KeyProvider provider, + KeyProvider provider, Configuration conf) throws IOException { if (footer == null || !footer.hasEncryption()) { keyProvider = null; @@ -61,7 +60,7 @@ public ReaderEncryption(OrcProto.Footer footer, columnVariants = null; } else { keyProvider = provider != null ? provider : - HadoopShimsFactory.get().getKeyProvider(conf, new SecureRandom()); + CryptoUtils.getKeyProvider(conf, new SecureRandom()); OrcProto.Encryption encrypt = footer.getEncryption(); masks = new MaskDescriptionImpl[encrypt.getMaskCount()]; for(int m=0; m < masks.length; ++m) { diff --git a/java/core/src/java/org/apache/orc/impl/reader/ReaderEncryptionVariant.java b/java/core/src/java/org/apache/orc/impl/reader/ReaderEncryptionVariant.java index 255952d15c..c20bd8ef38 100644 --- a/java/core/src/java/org/apache/orc/impl/reader/ReaderEncryptionVariant.java +++ b/java/core/src/java/org/apache/orc/impl/reader/ReaderEncryptionVariant.java @@ -24,7 +24,7 @@ import org.apache.orc.OrcProto; import org.apache.orc.StripeInformation; import org.apache.orc.TypeDescription; -import org.apache.orc.impl.HadoopShims; +import org.apache.orc.impl.KeyProvider; import org.apache.orc.impl.LocalKey; import org.jetbrains.annotations.NotNull; import org.slf4j.Logger; @@ -42,7 +42,7 @@ public class ReaderEncryptionVariant implements EncryptionVariant { private static final Logger LOG = LoggerFactory.getLogger(ReaderEncryptionVariant.class); - private final HadoopShims.KeyProvider provider; + private final KeyProvider provider; private final ReaderEncryptionKey key; private final TypeDescription column; private final int variantId; @@ -63,7 +63,7 @@ public ReaderEncryptionVariant(ReaderEncryptionKey key, OrcProto.EncryptionVariant proto, TypeDescription schema, List stripes, - HadoopShims.KeyProvider provider) { + KeyProvider provider) { this.key = key; this.variantId = variantId; this.provider = provider; diff --git a/java/core/src/resources/META-INF/services/org.apache.orc.impl.KeyProvider$Factory b/java/core/src/resources/META-INF/services/org.apache.orc.impl.KeyProvider$Factory new file mode 100644 index 0000000000..da1659f442 --- /dev/null +++ b/java/core/src/resources/META-INF/services/org.apache.orc.impl.KeyProvider$Factory @@ -0,0 +1,15 @@ +# Licensed to the Apache Software Foundation (ASF) under one or more +# contributor license agreements. See the NOTICE file distributed with +# this work for additional information regarding copyright ownership. +# The ASF licenses this file to You under the Apache License, Version 2.0 +# (the "License"); you may not use this file except in compliance with +# the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +org.apache.orc.impl.CryptoUtils$HadoopKeyProviderFactory \ No newline at end of file diff --git a/java/core/src/test/org/apache/orc/TestTypeDescription.java b/java/core/src/test/org/apache/orc/TestTypeDescription.java index 4a6a1993be..7519ff2137 100644 --- a/java/core/src/test/org/apache/orc/TestTypeDescription.java +++ b/java/core/src/test/org/apache/orc/TestTypeDescription.java @@ -19,6 +19,7 @@ import static org.junit.Assert.assertArrayEquals; import static org.junit.Assert.assertEquals; +import static org.junit.Assert.assertNotEquals; import static org.junit.Assert.assertTrue; import org.apache.hadoop.conf.Configuration; @@ -375,4 +376,84 @@ public void testAttributes() throws IOException { assertEquals("nullify", street.getAttributeValue("mask")); assertEquals(null, street.getAttributeValue("foobar")); } + + static int clearAttributes(TypeDescription schema) { + int result = 0; + for(String attribute: schema.getAttributeNames()) { + schema.removeAttribute(attribute); + result += 1; + } + List children = schema.getChildren(); + if (children != null) { + for (TypeDescription child : children) { + result += clearAttributes(child); + } + } + return result; + } + + @Test + public void testEncryption() { + String schemaString = "struct<" + + "name:struct," + + "address:struct," + + "credit_cards:array>>"; + TypeDescription schema = TypeDescription.fromString(schemaString); + TypeDescription copy = TypeDescription.fromString(schemaString); + assertEquals(copy, schema); + + // set some encryption + schema.annotateEncryption("pii:name,address.street;credit:credit_cards", null); + assertEquals("pii", + schema.findSubtype("name").getAttributeValue(TypeDescription.ENCRYPT_ATTRIBUTE)); + assertEquals("pii", + schema.findSubtype("address.street").getAttributeValue(TypeDescription.ENCRYPT_ATTRIBUTE)); + assertEquals("credit", + schema.findSubtype("credit_cards").getAttributeValue(TypeDescription.ENCRYPT_ATTRIBUTE)); + assertNotEquals(copy, schema); + assertEquals(3, clearAttributes(schema)); + assertEquals(copy, schema); + + schema.annotateEncryption("pii:name.first", "redact,Yy:name.first"); + // check that we ignore if already set + schema.annotateEncryption("pii:name.first", "redact,Yy:name.first,credit_cards"); + assertEquals("pii", + schema.findSubtype("name.first").getAttributeValue(TypeDescription.ENCRYPT_ATTRIBUTE)); + assertEquals("redact,Yy", + schema.findSubtype("name.first").getAttributeValue(TypeDescription.MASK_ATTRIBUTE)); + assertEquals("redact,Yy", + schema.findSubtype("credit_cards").getAttributeValue(TypeDescription.MASK_ATTRIBUTE)); + assertEquals(3, clearAttributes(schema)); + + schema.annotateEncryption("pii:name", "redact:name.first;nullify:name.last"); + assertEquals("pii", + schema.findSubtype("name").getAttributeValue(TypeDescription.ENCRYPT_ATTRIBUTE)); + assertEquals("redact", + schema.findSubtype("name.first").getAttributeValue(TypeDescription.MASK_ATTRIBUTE)); + assertEquals("nullify", + schema.findSubtype("name.last").getAttributeValue(TypeDescription.MASK_ATTRIBUTE)); + assertEquals(3, clearAttributes(schema)); + } + + @Test(expected = IllegalArgumentException.class) + public void testEncryptionConflict() { + TypeDescription schema = TypeDescription.fromString( + "struct<" + + "name:struct," + + "address:struct," + + "credit_cards:array>>"); + // set some encryption + schema.annotateEncryption("pii:address,personal:address",null); + } + + @Test(expected = IllegalArgumentException.class) + public void testMaskConflict() { + TypeDescription schema = TypeDescription.fromString( + "struct<" + + "name:struct," + + "address:struct," + + "credit_cards:array>>"); + // set some encryption + schema.annotateEncryption(null,"nullify:name;sha256:name"); + } } diff --git a/java/core/src/test/org/apache/orc/TestVectorOrcFile.java b/java/core/src/test/org/apache/orc/TestVectorOrcFile.java index 69e1a405cc..93b0aa71ad 100644 --- a/java/core/src/test/org/apache/orc/TestVectorOrcFile.java +++ b/java/core/src/test/org/apache/orc/TestVectorOrcFile.java @@ -3745,14 +3745,12 @@ public void testColumnEncryption() throws Exception { InMemoryKeystore keys = new InMemoryKeystore() .addKey("pii", EncryptionAlgorithm.AES_CTR_128, piiKey) .addKey("credit", EncryptionAlgorithm.AES_CTR_256, creditKey); - Writer writer = OrcFile.createWriter(testFilePath, OrcFile.writerOptions(conf) .setSchema(schema) .version(fileFormat) .setKeyProvider(keys) - .encryptColumn("i", "pii") - .encryptColumn("x", "credit")); + .encrypt("pii:i;credit:x")); VectorizedRowBatch batch = schema.createRowBatch(); batch.size = ROWS; LongColumnVector i = (LongColumnVector) batch.cols[0]; @@ -3889,12 +3887,7 @@ public void testMultiStripeColumnEncryption() throws Exception { .version(fileFormat) .stripeSize(10000) .setKeyProvider(allKeys) - .encryptColumn("dec", "key_0") - .encryptColumn("dt", "key_1") - .encryptColumn("time", "key_2") - .encryptColumn("dbl", "key_3") - .encryptColumn("bool", "key_4") - .encryptColumn("bin", "key_5")); + .encrypt("key_0:dec;key_1:dt;key_2:time;key_3:dbl;key_4:bool;key_5:bin")); // Set size to 1000 precisely so that stripes are exactly 5000 rows long. VectorizedRowBatch batch = schema.createRowBatch(1000); DecimalColumnVector dec = (DecimalColumnVector) batch.cols[0]; diff --git a/java/core/src/test/org/apache/orc/impl/TestCryptoUtils.java b/java/core/src/test/org/apache/orc/impl/TestCryptoUtils.java index a2caa2e9ed..2811bbfbf2 100644 --- a/java/core/src/test/org/apache/orc/impl/TestCryptoUtils.java +++ b/java/core/src/test/org/apache/orc/impl/TestCryptoUtils.java @@ -18,14 +18,20 @@ package org.apache.orc.impl; +import org.apache.hadoop.conf.Configuration; +import org.apache.hadoop.io.BytesWritable; import org.apache.orc.EncryptionAlgorithm; +import org.apache.orc.InMemoryKeystore; +import org.apache.orc.OrcConf; import org.apache.orc.OrcProto; import org.junit.Test; -import java.util.Arrays; +import java.io.IOException; +import java.security.Key; +import java.util.List; +import java.util.Random; import static junit.framework.Assert.assertEquals; -import static org.junit.Assert.assertNotEquals; public class TestCryptoUtils { @@ -45,4 +51,39 @@ public void testCreateStreamIv() throws Exception { assertEquals(0x34, iv[6]); assertEquals(0x56, iv[7]); } + + @Test + public void testMemoryKeyProvider() throws IOException { + Configuration conf = new Configuration(); + OrcConf.KEY_PROVIDER.setString(conf, "memory"); + // Hard code the random so that we know the bytes that will come out. + InMemoryKeystore provider = + (InMemoryKeystore) CryptoUtils.getKeyProvider(conf, new Random(24)); + byte[] piiKey = new byte[]{0,1,2,3,4,5,6,7,8,9,0xa,0xb,0xc,0xd,0xe,0xf}; + provider.addKey("pii", EncryptionAlgorithm.AES_CTR_128, piiKey); + byte[] piiKey2 = new byte[]{0x10,0x11,0x12,0x13,0x14,0x15,0x16,0x17, + 0x18,0x19,0x1a,0x1b,0x1c,0x1d,0x1e,0x1f}; + provider.addKey("pii", 1, EncryptionAlgorithm.AES_CTR_128, piiKey2); + byte[] secretKey = new byte[]{0x20,0x21,0x22,0x23,0x24,0x25,0x26,0x27, + 0x28,0x29,0x2a,0x2b,0x2c,0x2d,0x2e,0x2f}; + provider.addKey("secret", EncryptionAlgorithm.AES_CTR_128, secretKey); + + List keyNames = provider.getKeyNames(); + assertEquals(2, keyNames.size()); + assertEquals(true, keyNames.contains("pii")); + assertEquals(true, keyNames.contains("secret")); + HadoopShims.KeyMetadata meta = provider.getCurrentKeyVersion("pii"); + assertEquals(1, meta.getVersion()); + LocalKey localKey = provider.createLocalKey(meta); + byte[] encrypted = localKey.getEncryptedKey(); + // make sure that we get exactly what we expect to test the encryption + assertEquals("c7 ab 4f bb 38 f4 de ad d0 b3 59 e2 21 2a 95 32", + new BytesWritable(encrypted).toString()); + // now check to make sure that we get the expected bytes back + assertEquals("c7 a1 d0 41 7b 24 72 44 1a 58 c7 72 4a d4 be b3", + new BytesWritable(localKey.getDecryptedKey().getEncoded()).toString()); + Key key = provider.decryptLocalKey(meta, encrypted); + assertEquals(new BytesWritable(localKey.getDecryptedKey().getEncoded()).toString(), + new BytesWritable(key.getEncoded()).toString()); + } } diff --git a/java/core/src/test/org/apache/orc/impl/TestPhysicalFsWriter.java b/java/core/src/test/org/apache/orc/impl/TestPhysicalFsWriter.java index 333bc980fb..6028307709 100644 --- a/java/core/src/test/org/apache/orc/impl/TestPhysicalFsWriter.java +++ b/java/core/src/test/org/apache/orc/impl/TestPhysicalFsWriter.java @@ -277,7 +277,7 @@ public boolean endVariableLengthBlock(OutputStream output) throws IOException { } @Override - public KeyProvider getKeyProvider(Configuration conf, Random random) { + public KeyProvider getHadoopKeyProvider(Configuration conf, Random random) { return null; } } diff --git a/java/mapreduce/src/java/org/apache/orc/mapred/OrcOutputFormat.java b/java/mapreduce/src/java/org/apache/orc/mapred/OrcOutputFormat.java index 341fbcd624..2322d1b084 100644 --- a/java/mapreduce/src/java/org/apache/orc/mapred/OrcOutputFormat.java +++ b/java/mapreduce/src/java/org/apache/orc/mapred/OrcOutputFormat.java @@ -61,7 +61,9 @@ public static OrcFile.WriterOptions buildOptions(Configuration conf) { .stripeSize(OrcConf.STRIPE_SIZE.getLong(conf)) .rowIndexStride((int) OrcConf.ROW_INDEX_STRIDE.getLong(conf)) .bufferSize((int) OrcConf.BUFFER_SIZE.getLong(conf)) - .paddingTolerance(OrcConf.BLOCK_PADDING_TOLERANCE.getDouble(conf)); + .paddingTolerance(OrcConf.BLOCK_PADDING_TOLERANCE.getDouble(conf)) + .encrypt(OrcConf.ENCRYPTION.getString(conf)) + .masks(OrcConf.DATA_MASK.getString(conf)); } @Override diff --git a/java/pom.xml b/java/pom.xml index 10e245869b..83f6590c16 100644 --- a/java/pom.xml +++ b/java/pom.xml @@ -217,6 +217,7 @@ **/*.md **/target/** .idea/** + **/*.iml diff --git a/java/shims/src/java/org/apache/orc/impl/HadoopShims.java b/java/shims/src/java/org/apache/orc/impl/HadoopShims.java index 69ab8f1900..ac58b63ddc 100644 --- a/java/shims/src/java/org/apache/orc/impl/HadoopShims.java +++ b/java/shims/src/java/org/apache/orc/impl/HadoopShims.java @@ -26,8 +26,6 @@ import java.io.IOException; import java.io.OutputStream; import java.nio.ByteBuffer; -import java.security.Key; -import java.util.List; import java.util.Random; public interface HadoopShims { @@ -144,55 +142,6 @@ public int getValue() { } } - /** - * A source of crypto keys. This is usually backed by a Ranger KMS. - */ - interface KeyProvider { - - /** - * Get the list of key names from the key provider. - * @return a list of key names - */ - List getKeyNames() throws IOException; - - /** - * Get the current metadata for a given key. This is used when encrypting - * new data. - * - * @param keyName the name of a key - * @return metadata for the current version of the key - * @throws IllegalArgumentException if the key is unknown - */ - KeyMetadata getCurrentKeyVersion(String keyName) throws IOException; - - /** - * Create a local key for the given key version. This local key will be - * randomly generated and encrypted with the given version of the master - * key. The encryption and decryption is done with the local key and the - * user process never has access to the master key, because it stays on the - * Ranger KMS. - * - * @param key the master key version - * @return the local key's material both encrypted and unencrypted - */ - LocalKey createLocalKey(KeyMetadata key) throws IOException; - - /** - * Decrypt a local key for reading a file. - * - * @param key the master key version - * @param encryptedKey the encrypted key - * @return the decrypted local key's material or null if the key is not - * available - */ - Key decryptLocalKey(KeyMetadata key, byte[] encryptedKey) throws IOException; - - /** - * Get the kind of this provider. - */ - KeyProviderKind getKind(); - } - /** * Information about a crypto key including the key name, version, and the * algorithm. @@ -233,23 +182,17 @@ public int getVersion() { @Override public String toString() { - StringBuilder buffer = new StringBuilder(); - buffer.append(keyName); - buffer.append('@'); - buffer.append(version); - buffer.append(' '); - buffer.append(algorithm); - return buffer.toString(); + return keyName + '@' + version + ' ' + algorithm; } } /** - * Create a KeyProvider to get encryption keys. + * Create a Hadoop KeyProvider to get encryption keys. * @param conf the configuration * @param random a secure random number generator * @return a key provider or null if none was provided */ - KeyProvider getKeyProvider(Configuration conf, - Random random) throws IOException; + KeyProvider getHadoopKeyProvider(Configuration conf, + Random random) throws IOException; } diff --git a/java/shims/src/java/org/apache/orc/impl/HadoopShimsCurrent.java b/java/shims/src/java/org/apache/orc/impl/HadoopShimsCurrent.java index ff11159ff6..c32ed2e950 100644 --- a/java/shims/src/java/org/apache/orc/impl/HadoopShimsCurrent.java +++ b/java/shims/src/java/org/apache/orc/impl/HadoopShimsCurrent.java @@ -59,8 +59,8 @@ public boolean endVariableLengthBlock(OutputStream output) throws IOException { } @Override - public KeyProvider getKeyProvider(Configuration conf, - Random random) throws IOException { + public KeyProvider getHadoopKeyProvider(Configuration conf, + Random random) throws IOException { return HadoopShimsPre2_7.createKeyProvider(conf, random); } } diff --git a/java/shims/src/java/org/apache/orc/impl/HadoopShimsPre2_3.java b/java/shims/src/java/org/apache/orc/impl/HadoopShimsPre2_3.java index c186e996b1..07e45b2212 100644 --- a/java/shims/src/java/org/apache/orc/impl/HadoopShimsPre2_3.java +++ b/java/shims/src/java/org/apache/orc/impl/HadoopShimsPre2_3.java @@ -55,7 +55,7 @@ public boolean endVariableLengthBlock(OutputStream output) { } @Override - public KeyProvider getKeyProvider(Configuration conf, Random random) { + public KeyProvider getHadoopKeyProvider(Configuration conf, Random random) { return new NullKeyProvider(); } diff --git a/java/shims/src/java/org/apache/orc/impl/HadoopShimsPre2_6.java b/java/shims/src/java/org/apache/orc/impl/HadoopShimsPre2_6.java index 618e4c85c6..75a11459e6 100644 --- a/java/shims/src/java/org/apache/orc/impl/HadoopShimsPre2_6.java +++ b/java/shims/src/java/org/apache/orc/impl/HadoopShimsPre2_6.java @@ -129,7 +129,7 @@ public boolean endVariableLengthBlock(OutputStream output) { } @Override - public KeyProvider getKeyProvider(Configuration conf, Random random) { + public KeyProvider getHadoopKeyProvider(Configuration conf, Random random) { return new HadoopShimsPre2_3.NullKeyProvider(); } } diff --git a/java/shims/src/java/org/apache/orc/impl/HadoopShimsPre2_7.java b/java/shims/src/java/org/apache/orc/impl/HadoopShimsPre2_7.java index b92be65dc7..552c6c59ba 100644 --- a/java/shims/src/java/org/apache/orc/impl/HadoopShimsPre2_7.java +++ b/java/shims/src/java/org/apache/orc/impl/HadoopShimsPre2_7.java @@ -238,8 +238,8 @@ static EncryptionAlgorithm findAlgorithm(KeyProviderCryptoExtension.Metadata met } @Override - public KeyProvider getKeyProvider(Configuration conf, - Random random) throws IOException { + public KeyProvider getHadoopKeyProvider(Configuration conf, + Random random) throws IOException { return createKeyProvider(conf, random); } } diff --git a/java/shims/src/java/org/apache/orc/impl/KeyProvider.java b/java/shims/src/java/org/apache/orc/impl/KeyProvider.java new file mode 100644 index 0000000000..23840206fa --- /dev/null +++ b/java/shims/src/java/org/apache/orc/impl/KeyProvider.java @@ -0,0 +1,84 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.orc.impl; + +import org.apache.hadoop.conf.Configuration; + +import java.io.IOException; +import java.security.Key; +import java.util.List; +import java.util.Random; + +/** + * A source of crypto keys. This is usually backed by a Ranger KMS. + */ +public interface KeyProvider { + + /** + * Get the list of key names from the key provider. + * @return a list of key names + */ + List getKeyNames() throws IOException; + + /** + * Get the current metadata for a given key. This is used when encrypting + * new data. + * + * @param keyName the name of a key + * @return metadata for the current version of the key + * @throws IllegalArgumentException if the key is unknown + */ + HadoopShims.KeyMetadata getCurrentKeyVersion(String keyName) throws IOException; + + /** + * Create a local key for the given key version. This local key will be + * randomly generated and encrypted with the given version of the master + * key. The encryption and decryption is done with the local key and the + * user process never has access to the master key, because it stays on the + * Ranger KMS. + * + * @param key the master key version + * @return the local key's material both encrypted and unencrypted + */ + LocalKey createLocalKey(HadoopShims.KeyMetadata key) throws IOException; + + /** + * Decrypt a local key for reading a file. + * + * @param key the master key version + * @param encryptedKey the encrypted key + * @return the decrypted local key's material or null if the key is not + * available + */ + Key decryptLocalKey(HadoopShims.KeyMetadata key, byte[] encryptedKey) throws IOException; + + /** + * Get the kind of this provider. + */ + HadoopShims.KeyProviderKind getKind(); + + /** + * A service loader factory interface. + */ + interface Factory { + KeyProvider create(String kind, + Configuration conf, + Random random) throws IOException; + } +} diff --git a/java/shims/src/test/org/apache/orc/impl/TestHadoopShimsPre2_7.java b/java/shims/src/test/org/apache/orc/impl/TestHadoopShimsPre2_7.java index 2db90a573b..a07fdb1eb4 100644 --- a/java/shims/src/test/org/apache/orc/impl/TestHadoopShimsPre2_7.java +++ b/java/shims/src/test/org/apache/orc/impl/TestHadoopShimsPre2_7.java @@ -67,144 +67,4 @@ public void testFindingAesEncryption() { assertEquals(EncryptionAlgorithm.AES_CTR_256, HadoopShimsPre2_7.findAlgorithm(meta)); } - - @Test - public void testHadoopKeyProvider() throws IOException { - HadoopShims shims = new HadoopShimsPre2_7(); - Configuration conf = new Configuration(); - conf.set("hadoop.security.key.provider.path", "test:///"); - // Hard code the random so that we know the bytes that will come out. - HadoopShims.KeyProvider provider = shims.getKeyProvider(conf, new Random(24)); - List keyNames = provider.getKeyNames(); - assertEquals(2, keyNames.size()); - assertEquals(true, keyNames.contains("pii")); - assertEquals(true, keyNames.contains("secret")); - HadoopShims.KeyMetadata piiKey = provider.getCurrentKeyVersion("pii"); - assertEquals(1, piiKey.getVersion()); - LocalKey localKey = provider.createLocalKey(piiKey); - byte[] encrypted = localKey.getEncryptedKey(); - // make sure that we get exactly what we expect to test the encryption - assertEquals("c7 ab 4f bb 38 f4 de ad d0 b3 59 e2 21 2a 95 32", - new BytesWritable(encrypted).toString()); - // now check to make sure that we get the expected bytes back - assertEquals("c7 a1 d0 41 7b 24 72 44 1a 58 c7 72 4a d4 be b3", - new BytesWritable(localKey.getDecryptedKey().getEncoded()).toString()); - Key key = provider.decryptLocalKey(piiKey, encrypted); - assertEquals(new BytesWritable(localKey.getDecryptedKey().getEncoded()).toString(), - new BytesWritable(key.getEncoded()).toString()); - } - - /** - * Create a Hadoop KeyProvider that lets us test the interaction - * with the Hadoop code. - * Must only be used in unit tests! - */ - public static class TestKeyProviderFactory extends KeyProviderFactory { - - @Override - public KeyProvider createProvider(URI uri, - Configuration conf) throws IOException { - if ("test".equals(uri.getScheme())) { - KeyProvider provider = new TestKeyProvider(conf); - // populate a couple keys into the provider - byte[] piiKey = new byte[]{0,1,2,3,4,5,6,7,8,9,0xa,0xb,0xc,0xd,0xe,0xf}; - KeyProvider.Options aes128 = new KeyProvider.Options(conf); - provider.createKey("pii", piiKey, aes128); - byte[] piiKey2 = new byte[]{0x10,0x11,0x12,0x13,0x14,0x15,0x16,0x17, - 0x18,0x19,0x1a,0x1b,0x1c,0x1d,0x1e,0x1f}; - provider.rollNewVersion("pii", piiKey2); - byte[] secretKey = new byte[]{0x20,0x21,0x22,0x23,0x24,0x25,0x26,0x27, - 0x28,0x29,0x2a,0x2b,0x2c,0x2d,0x2e,0x2f}; - provider.createKey("secret", secretKey, aes128); - return KeyProviderCryptoExtension.createKeyProviderCryptoExtension(provider); - } - return null; - } - } - - /** - * A Hadoop KeyProvider that lets us test the interaction - * with the Hadoop code. - * Must only be used in unit tests! - */ - static class TestKeyProvider extends KeyProvider { - // map from key name to metadata - private final Map keyMetdata = new HashMap<>(); - // map from key version name to material - private final Map keyVersions = new HashMap<>(); - - public TestKeyProvider(Configuration conf) { - super(conf); - } - - @Override - public KeyVersion getKeyVersion(String name) { - return keyVersions.get(name); - } - - @Override - public List getKeys() { - return new ArrayList<>(keyMetdata.keySet()); - } - - @Override - public List getKeyVersions(String name) { - List result = new ArrayList<>(); - Metadata meta = getMetadata(name); - for(int v=0; v < meta.getVersions(); ++v) { - String versionName = buildVersionName(name, v); - KeyVersion material = keyVersions.get(versionName); - if (material != null) { - result.add(material); - } - } - return result; - } - - @Override - public Metadata getMetadata(String name) { - return keyMetdata.get(name); - } - - @Override - public KeyVersion createKey(String name, byte[] bytes, Options options) { - String versionName = buildVersionName(name, 0); - keyMetdata.put(name, new TestMetadata(options.getCipher(), - options.getBitLength(), 1)); - KeyVersion result = new KMSClientProvider.KMSKeyVersion(name, versionName, bytes); - keyVersions.put(versionName, result); - return result; - } - - @Override - public void deleteKey(String name) { - throw new UnsupportedOperationException("Can't delete keys"); - } - - @Override - public KeyVersion rollNewVersion(String name, byte[] bytes) { - TestMetadata key = keyMetdata.get(name); - String versionName = buildVersionName(name, key.addVersion()); - KeyVersion result = new KMSClientProvider.KMSKeyVersion(name, versionName, - bytes); - keyVersions.put(versionName, result); - return result; - } - - @Override - public void flush() { - // Nothing - } - - static class TestMetadata extends KeyProvider.Metadata { - - protected TestMetadata(String cipher, int bitLength, int versions) { - super(cipher, bitLength, null, null, null, versions); - } - - public int addVersion() { - return super.addVersion(); - } - } - } } diff --git a/java/tools/src/java/org/apache/orc/tools/KeyTool.java b/java/tools/src/java/org/apache/orc/tools/KeyTool.java index 5339334647..ea592bca43 100644 --- a/java/tools/src/java/org/apache/orc/tools/KeyTool.java +++ b/java/tools/src/java/org/apache/orc/tools/KeyTool.java @@ -26,8 +26,9 @@ import org.apache.hadoop.conf.Configuration; import org.apache.hadoop.io.BytesWritable; import org.apache.orc.EncryptionAlgorithm; +import org.apache.orc.impl.CryptoUtils; import org.apache.orc.impl.HadoopShims; -import org.apache.orc.impl.HadoopShimsFactory; +import org.apache.orc.impl.KeyProvider; import org.codehaus.jettison.json.JSONException; import org.codehaus.jettison.json.JSONWriter; @@ -42,7 +43,7 @@ public class KeyTool { static void printKey(JSONWriter writer, - HadoopShims.KeyProvider provider, + KeyProvider provider, String keyName) throws JSONException, IOException { HadoopShims.KeyMetadata meta = provider.getCurrentKeyVersion(keyName); writer.object(); @@ -79,8 +80,8 @@ public KeyTool(Configuration conf, } void run() throws IOException, JSONException { - HadoopShims.KeyProvider provider = - HadoopShimsFactory.get().getKeyProvider(conf, new SecureRandom()); + KeyProvider provider = + CryptoUtils.getKeyProvider(conf, new SecureRandom()); if (provider == null) { System.err.println("No key provider available."); System.exit(1); diff --git a/java/tools/src/test/org/apache/orc/impl/FakeKeyProvider.java b/java/tools/src/test/org/apache/orc/impl/FakeKeyProvider.java new file mode 100644 index 0000000000..1c3f6c5eee --- /dev/null +++ b/java/tools/src/test/org/apache/orc/impl/FakeKeyProvider.java @@ -0,0 +1,142 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.orc.impl; + + +import org.apache.hadoop.conf.Configuration; +import org.apache.hadoop.crypto.key.KeyProvider; +import org.apache.hadoop.crypto.key.KeyProviderCryptoExtension; +import org.apache.hadoop.crypto.key.KeyProviderFactory; +import org.apache.hadoop.crypto.key.kms.KMSClientProvider; + +import java.io.IOException; +import java.net.URI; +import java.util.ArrayList; +import java.util.HashMap; +import java.util.List; +import java.util.Map; + +/** + * A Hadoop KeyProvider that lets us test the interaction + * with the Hadoop code. + * Must only be used in unit tests! + */ +public class FakeKeyProvider extends KeyProvider { + // map from key name to metadata + private final Map keyMetdata = new HashMap<>(); + // map from key version name to material + private final Map keyVersions = new HashMap<>(); + + public FakeKeyProvider(Configuration conf) { + super(conf); + } + + @Override + public KeyVersion getKeyVersion(String name) { + return keyVersions.get(name); + } + + @Override + public List getKeys() { + return new ArrayList<>(keyMetdata.keySet()); + } + + @Override + public List getKeyVersions(String name) { + List result = new ArrayList<>(); + Metadata meta = getMetadata(name); + for(int v=0; v < meta.getVersions(); ++v) { + String versionName = buildVersionName(name, v); + KeyVersion material = keyVersions.get(versionName); + if (material != null) { + result.add(material); + } + } + return result; + } + + @Override + public Metadata getMetadata(String name) { + return keyMetdata.get(name); + } + + @Override + public KeyVersion createKey(String name, byte[] bytes, Options options) { + String versionName = buildVersionName(name, 0); + keyMetdata.put(name, new TestMetadata(options.getCipher(), + options.getBitLength(), 1)); + KeyVersion result = new KMSClientProvider.KMSKeyVersion(name, versionName, bytes); + keyVersions.put(versionName, result); + return result; + } + + @Override + public void deleteKey(String name) { + throw new UnsupportedOperationException("Can't delete keys"); + } + + @Override + public KeyVersion rollNewVersion(String name, byte[] bytes) { + TestMetadata key = keyMetdata.get(name); + String versionName = buildVersionName(name, key.addVersion()); + KeyVersion result = new KMSClientProvider.KMSKeyVersion(name, versionName, + bytes); + keyVersions.put(versionName, result); + return result; + } + + @Override + public void flush() { + // Nothing + } + + static class TestMetadata extends KeyProvider.Metadata { + + TestMetadata(String cipher, int bitLength, int versions) { + super(cipher, bitLength, null, null, null, versions); + } + + public int addVersion() { + return super.addVersion(); + } + } + + public static class Factory extends KeyProviderFactory { + + @Override + public KeyProvider createProvider(URI uri, + Configuration conf) throws IOException { + if ("test".equals(uri.getScheme())) { + KeyProvider provider = new FakeKeyProvider(conf); + // populate a couple keys into the provider + byte[] piiKey = new byte[]{0,1,2,3,4,5,6,7,8,9,0xa,0xb,0xc,0xd,0xe,0xf}; + org.apache.hadoop.crypto.key.KeyProvider.Options aes128 = new KeyProvider.Options(conf); + provider.createKey("pii", piiKey, aes128); + byte[] piiKey2 = new byte[]{0x10,0x11,0x12,0x13,0x14,0x15,0x16,0x17, + 0x18,0x19,0x1a,0x1b,0x1c,0x1d,0x1e,0x1f}; + provider.rollNewVersion("pii", piiKey2); + byte[] secretKey = new byte[]{0x20,0x21,0x22,0x23,0x24,0x25,0x26,0x27, + 0x28,0x29,0x2a,0x2b,0x2c,0x2d,0x2e,0x2f}; + provider.createKey("secret", secretKey, aes128); + return KeyProviderCryptoExtension.createKeyProviderCryptoExtension(provider); + } + return null; + } + } +} diff --git a/java/tools/src/test/org/apache/orc/impl/TestHadoopKeyProvider.java b/java/tools/src/test/org/apache/orc/impl/TestHadoopKeyProvider.java new file mode 100644 index 0000000000..d3cbf60899 --- /dev/null +++ b/java/tools/src/test/org/apache/orc/impl/TestHadoopKeyProvider.java @@ -0,0 +1,62 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.orc.impl; + +import org.apache.hadoop.conf.Configuration; +import org.apache.hadoop.io.BytesWritable; +import org.junit.Test; + +import java.io.IOException; +import java.security.Key; +import java.util.List; +import java.util.Random; + +import static junit.framework.Assert.assertEquals; + +public class TestHadoopKeyProvider { + + /** + * Tests the path through the hadoop key provider code base. + * This should be consistent with TestCryptoUtils.testMemoryKeyProvider. + * @throws IOException + */ + @Test + public void testHadoopKeyProvider() throws IOException { + Configuration conf = new Configuration(); + conf.set("hadoop.security.key.provider.path", "test:///"); + // Hard code the random so that we know the bytes that will come out. + KeyProvider provider = CryptoUtils.getKeyProvider(conf, new Random(24)); + List keyNames = provider.getKeyNames(); + assertEquals(2, keyNames.size()); + assertEquals(true, keyNames.contains("pii")); + assertEquals(true, keyNames.contains("secret")); + HadoopShims.KeyMetadata piiKey = provider.getCurrentKeyVersion("pii"); + assertEquals(1, piiKey.getVersion()); + LocalKey localKey = provider.createLocalKey(piiKey); + byte[] encrypted = localKey.getEncryptedKey(); + // make sure that we get exactly what we expect to test the encryption + assertEquals("c7 ab 4f bb 38 f4 de ad d0 b3 59 e2 21 2a 95 32", + new BytesWritable(encrypted).toString()); + // now check to make sure that we get the expected bytes back + assertEquals("c7 a1 d0 41 7b 24 72 44 1a 58 c7 72 4a d4 be b3", + new BytesWritable(localKey.getDecryptedKey().getEncoded()).toString()); + Key key = provider.decryptLocalKey(piiKey, encrypted); + assertEquals(new BytesWritable(localKey.getDecryptedKey().getEncoded()).toString(), + new BytesWritable(key.getEncoded()).toString()); + } +} diff --git a/java/shims/src/test/resources/META-INF/services/org.apache.hadoop.crypto.key.KeyProviderFactory b/java/tools/src/test/resources/META-INF/services/org.apache.hadoop.crypto.key.KeyProviderFactory similarity index 92% rename from java/shims/src/test/resources/META-INF/services/org.apache.hadoop.crypto.key.KeyProviderFactory rename to java/tools/src/test/resources/META-INF/services/org.apache.hadoop.crypto.key.KeyProviderFactory index 14ee9a50b9..9648d705f8 100644 --- a/java/shims/src/test/resources/META-INF/services/org.apache.hadoop.crypto.key.KeyProviderFactory +++ b/java/tools/src/test/resources/META-INF/services/org.apache.hadoop.crypto.key.KeyProviderFactory @@ -13,4 +13,4 @@ # See the License for the specific language governing permissions and # limitations under the License. -org.apache.orc.impl.TestHadoopShimsPre2_7$TestKeyProviderFactory +org.apache.orc.impl.FakeKeyProvider$Factory