Skip to content
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
5 changes: 3 additions & 2 deletions java/core/src/java/org/apache/orc/InMemoryKeystore.java
Original file line number Diff line number Diff line change
Expand Up @@ -18,6 +18,7 @@
package org.apache.orc;

import org.apache.orc.impl.HadoopShims;
import org.apache.orc.impl.KeyProvider;
import org.apache.orc.impl.LocalKey;

import javax.crypto.BadPaddingException;
Expand All @@ -40,7 +41,7 @@
import java.util.TreeMap;

/**
* This is an in-memory implementation of {@link HadoopShims.KeyProvider}.
* This is an in-memory implementation of {@link KeyProvider}.
*
* The primary use of this class is for when the user doesn't have a
* Hadoop KMS running and wishes to use encryption. It is also useful for
Expand All @@ -52,7 +53,7 @@
*
* This class is not thread safe.
*/
public class InMemoryKeystore implements HadoopShims.KeyProvider {
public class InMemoryKeystore implements KeyProvider {
/**
* Support AES 256 ?
*/
Expand Down
5 changes: 4 additions & 1 deletion java/core/src/java/org/apache/orc/OrcConf.java
Original file line number Diff line number Diff line change
Expand Up @@ -162,7 +162,10 @@ public enum OrcConf {
"Comma-separated list of columns for which dictionary encoding is to be skipped."),
// some JVM doesn't allow array creation of size Integer.MAX_VALUE, so chunk size is slightly less than max int
ORC_MAX_DISK_RANGE_CHUNK_LIMIT("orc.max.disk.range.chunk.limit", "hive.exec.orc.max.disk.range.chunk.limit",
Integer.MAX_VALUE - 1024, "When reading stripes >2GB, specify max limit for the chunk size.")
Integer.MAX_VALUE - 1024, "When reading stripes >2GB, specify max limit for the chunk size."),
ENCRYPTION("orc.encrypt", "orc.encrypt", null, "The list of keys and columns to encrypt with"),
DATA_MASK("orc.mask", "orc.mask", null, "The masks to apply to the encrypted columns"),
KEY_PROVIDER("orc.key.provider", "orc.key.provider", "hadoop", "The kind of KeyProvider to use for encryption.")
;

private final String attribute;
Expand Down
98 changes: 22 additions & 76 deletions java/core/src/java/org/apache/orc/OrcFile.java
Original file line number Diff line number Diff line change
Expand Up @@ -32,6 +32,7 @@
import org.apache.hadoop.fs.Path;
import org.apache.orc.impl.HadoopShims;
import org.apache.orc.impl.HadoopShimsFactory;
import org.apache.orc.impl.KeyProvider;
import org.apache.orc.impl.MemoryManagerImpl;
import org.apache.orc.impl.OrcTail;
import org.apache.orc.impl.ReaderImpl;
Expand Down Expand Up @@ -275,7 +276,7 @@ public static class ReaderOptions {
private FileSystem filesystem;
private long maxLength = Long.MAX_VALUE;
private OrcTail orcTail;
private HadoopShims.KeyProvider keyProvider;
private KeyProvider keyProvider;
// TODO: We can generalize FileMetada interface. Make OrcTail implement FileMetadata interface
// and remove this class altogether. Both footer caching and llap caching just needs OrcTail.
// For now keeping this around to avoid complex surgery
Expand Down Expand Up @@ -306,7 +307,7 @@ public ReaderOptions orcTail(OrcTail tail) {
* @param provider
* @return
*/
public ReaderOptions setKeyProvider(HadoopShims.KeyProvider provider) {
public ReaderOptions setKeyProvider(KeyProvider provider) {
this.keyProvider = provider;
return this;
}
Expand All @@ -327,7 +328,7 @@ public OrcTail getOrcTail() {
return orcTail;
}

public HadoopShims.KeyProvider getKeyProvider() {
public KeyProvider getKeyProvider() {
return keyProvider;
}

Expand Down Expand Up @@ -396,40 +397,6 @@ public static BloomFilterVersion fromString(String s) {
}
}

/**
* An internal class that describes how to encrypt a column.
*/
public static class EncryptionOption {
private final String columnNames;
private final String keyName;
private final String mask;
private final String[] maskParameters;

EncryptionOption(String columnNames, String keyName, String mask,
String... maskParams) {
this.columnNames = columnNames;
this.keyName = keyName;
this.mask = mask;
this.maskParameters = maskParams;
}

public String getColumnNames() {
return columnNames;
}

public String getKeyName() {
return keyName;
}

public String getMask() {
return mask;
}

public String[] getMaskParameters() {
return maskParameters;
}
}

/**
* Options for creating ORC file writers.
*/
Expand Down Expand Up @@ -460,8 +427,9 @@ public static class WriterOptions implements Cloneable {
private boolean writeVariableLengthBlocks;
private HadoopShims shims;
private String directEncodingColumns;
private List<EncryptionOption> encryption = new ArrayList<>();
private HadoopShims.KeyProvider provider;
private String encryption;
private String masks;
private KeyProvider provider;

protected WriterOptions(Properties tableProperties, Configuration conf) {
configuration = conf;
Expand Down Expand Up @@ -757,50 +725,24 @@ public WriterOptions directEncodingColumns(String value) {
return this;
}

/*
* Encrypt a set of columns with a key.
* For readers without access to the key, they will read nulls.
* @param columnNames the columns to encrypt
* @param keyName the key name to encrypt the data with
* @return this
*/
public WriterOptions encryptColumn(String columnNames,
String keyName) {
return encryptColumn(columnNames, keyName,
DataMask.Standard.NULLIFY.getName());
}

/**
* Encrypt a set of columns with a key.
* The data is also masked and stored unencrypted in the file. Readers
* without access to the key will instead get the masked data.
* @param columnNames the column names to encrypt
* @param keyName the key name to encrypt the data with
* @param mask the kind of masking
* @param maskParameters the parameters to the mask
* For readers without access to the key, they will read nulls.
* @param value a key-list of which columns to encrypt
* @return this
*/
public WriterOptions encryptColumn(String columnNames,
String keyName,
String mask,
String... maskParameters) {
encryption.add(new EncryptionOption(columnNames, keyName, mask,
maskParameters));
public WriterOptions encrypt(String value) {
encryption = value;
return this;
}

/**
* Set a different mask on a subtree that is already being encrypted.
* @param columnNames the column names to change the mask on
* @param mask the name of the mask
* @param maskParameters the parameters for the mask
* Set the masks for the unencrypted data.
* @param value a list of the masks and column names
* @return this
*/
public WriterOptions maskColumn(String columnNames,
String mask,
String... maskParameters) {
encryption.add(new EncryptionOption(columnNames, null,
mask, maskParameters));
public WriterOptions masks(String value) {
masks = value;
return this;
}

Expand All @@ -809,12 +751,12 @@ public WriterOptions maskColumn(String columnNames,
* @param provider
* @return
*/
public WriterOptions setKeyProvider(HadoopShims.KeyProvider provider) {
public WriterOptions setKeyProvider(KeyProvider provider) {
this.provider = provider;
return this;
}

public HadoopShims.KeyProvider getKeyProvider() {
public KeyProvider getKeyProvider() {
return provider;
}

Expand Down Expand Up @@ -922,9 +864,13 @@ public String getDirectEncodingColumns() {
return directEncodingColumns;
}

public List<EncryptionOption> getEncryption() {
public String getEncryption() {
return encryption;
}

public String getMasks() {
return masks;
}
}

/**
Expand Down
Loading