diff --git a/java/examples/src/java/org/apache/orc/examples/CompressionWriter.java b/java/examples/src/java/org/apache/orc/examples/CompressionWriter.java new file mode 100644 index 0000000000..3f6ce35576 --- /dev/null +++ b/java/examples/src/java/org/apache/orc/examples/CompressionWriter.java @@ -0,0 +1,66 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + *

+ * http://www.apache.org/licenses/LICENSE-2.0 + *

+ * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.orc.examples; + +import org.apache.hadoop.conf.Configuration; +import org.apache.hadoop.fs.Path; +import org.apache.hadoop.hive.ql.exec.vector.BytesColumnVector; +import org.apache.hadoop.hive.ql.exec.vector.LongColumnVector; +import org.apache.hadoop.hive.ql.exec.vector.VectorizedRowBatch; +import org.apache.orc.OrcFile; +import org.apache.orc.OrcFile.WriterOptions; +import org.apache.orc.TypeDescription; +import org.apache.orc.Writer; + +import java.io.IOException; + +import static org.apache.orc.CompressionKind.SNAPPY; + +public class CompressionWriter { + public static void main(Configuration conf, String[] args) throws IOException { + TypeDescription schema = TypeDescription.fromString("struct"); + // Set compress kind to snappy. Now we have much compression + // method such as ZLIB, SNAPPY, LZO, LZ4, ZSTD. + WriterOptions options = OrcFile.writerOptions(conf).setSchema(schema).compress(SNAPPY); + Writer snappyWriter = OrcFile.createWriter(new Path("compressed.orc"), options); + + VectorizedRowBatch batch = schema.createRowBatch(); + LongColumnVector x = (LongColumnVector) batch.cols[0]; + BytesColumnVector y = (BytesColumnVector) batch.cols[1]; + for (int r = 0; r < 10000; ++r) { + int row = batch.size++; + x.vector[row] = r; + byte[] buffer = ("byte-" + r).getBytes(); + y.setRef(row, buffer, 0, buffer.length); + // If the batch is full, write it out and start over. + if (batch.size == batch.getMaxSize()) { + snappyWriter.addRowBatch(batch); + batch.reset(); + } + } + if (batch.size != 0) { + snappyWriter.addRowBatch(batch); + } + snappyWriter.close(); + } + + public static void main(String[] args) throws IOException { + main(new Configuration(), args); + } +} diff --git a/java/examples/src/java/org/apache/orc/examples/Driver.java b/java/examples/src/java/org/apache/orc/examples/Driver.java index 7d13a1f410..0b7c983f24 100644 --- a/java/examples/src/java/org/apache/orc/examples/Driver.java +++ b/java/examples/src/java/org/apache/orc/examples/Driver.java @@ -75,6 +75,9 @@ public static void main(String[] args) throws Exception { System.err.println(" read - read a sample ORC file"); System.err.println(" write2 - write a sample ORC file with a map"); System.err.println(" read2 - read a sample ORC file with a map"); + System.err.println(" compressWriter - write a ORC file with snappy compression"); + System.err.println(" inMemoryEncryptionWriter - write a ORC file with encryption"); + System.err.println(" inMemoryEncryptionReader - read a ORC file with encryption"); System.err.println(); System.err.println("To get more help, provide -h to the command"); System.exit(1); @@ -95,6 +98,12 @@ public static void main(String[] args) throws Exception { AdvancedWriter.main(conf, options.commandArgs); } else if ("read2".equals(options.command)) { AdvancedReader.main(conf, options.commandArgs); + } else if ("compressWriter".equals(options.command)) { + CompressionWriter.main(conf, options.commandArgs); + } else if ("inMemoryEncryptionWriter".equals(options.command)) { + InMemoryEncryptionWriter.main(conf, options.commandArgs); + } else if ("inMemoryEncryptionReader".equals(options.command)) { + InMemoryEncryptionReader.main(conf, options.commandArgs); } else { System.err.println("Unknown subcommand: " + options.command); System.exit(1); diff --git a/java/examples/src/java/org/apache/orc/examples/InMemoryEncryptionReader.java b/java/examples/src/java/org/apache/orc/examples/InMemoryEncryptionReader.java new file mode 100644 index 0000000000..9b06db449a --- /dev/null +++ b/java/examples/src/java/org/apache/orc/examples/InMemoryEncryptionReader.java @@ -0,0 +1,68 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + *

+ * http://www.apache.org/licenses/LICENSE-2.0 + *

+ * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.orc.examples; + +import org.apache.hadoop.conf.Configuration; +import org.apache.hadoop.fs.Path; +import org.apache.hadoop.hive.ql.exec.vector.BytesColumnVector; +import org.apache.hadoop.hive.ql.exec.vector.LongColumnVector; +import org.apache.hadoop.hive.ql.exec.vector.VectorizedRowBatch; +import org.apache.orc.InMemoryKeystore; +import org.apache.orc.OrcFile; +import org.apache.orc.OrcFile.ReaderOptions; +import org.apache.orc.Reader; +import org.apache.orc.RecordReader; +import org.apache.orc.TypeDescription; + +import java.io.IOException; +import java.nio.charset.StandardCharsets; + +import static org.apache.orc.EncryptionAlgorithm.AES_CTR_128; + +public class InMemoryEncryptionReader { + public static void main(Configuration conf, String[] args) throws IOException { + byte[] kmsKey = "secret123".getBytes(StandardCharsets.UTF_8); + // InMemoryKeystore is used to get key to read encryption data. + InMemoryKeystore keyProvider = new InMemoryKeystore().addKey("pii", AES_CTR_128, kmsKey); + ReaderOptions readerOptions = OrcFile.readerOptions(conf).setKeyProvider(keyProvider); + Reader reader = OrcFile.createReader(new Path("encrypted.orc"),readerOptions); + + System.out.println("File schema: " + reader.getSchema()); + System.out.println("Row count: " + reader.getNumberOfRows()); + + // Pick the schema we want to read using schema evolution + TypeDescription schema = TypeDescription.fromString("struct"); + // Read the encryption data + VectorizedRowBatch batch = schema.createRowBatch(); + RecordReader rowIterator = reader.rows(reader.options().schema(schema)); + LongColumnVector x = (LongColumnVector) batch.cols[0]; + BytesColumnVector y = (BytesColumnVector) batch.cols[1]; + while (rowIterator.nextBatch(batch)) { + for (int row = 0; row < batch.size; ++row) { + System.out.println("x: " + x.vector[row]); + System.out.println("y: " + y.toString(row)); + } + } + rowIterator.close(); + } + + public static void main(String[] args) throws IOException { + main(new Configuration(), args); + } +} diff --git a/java/examples/src/java/org/apache/orc/examples/InMemoryEncryptionWriter.java b/java/examples/src/java/org/apache/orc/examples/InMemoryEncryptionWriter.java new file mode 100644 index 0000000000..a2481af3aa --- /dev/null +++ b/java/examples/src/java/org/apache/orc/examples/InMemoryEncryptionWriter.java @@ -0,0 +1,72 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + *

+ * http://www.apache.org/licenses/LICENSE-2.0 + *

+ * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.orc.examples; + +import org.apache.hadoop.conf.Configuration; +import org.apache.hadoop.fs.Path; +import org.apache.hadoop.hive.ql.exec.vector.BytesColumnVector; +import org.apache.hadoop.hive.ql.exec.vector.LongColumnVector; +import org.apache.hadoop.hive.ql.exec.vector.VectorizedRowBatch; +import org.apache.orc.InMemoryKeystore; +import org.apache.orc.OrcFile; +import org.apache.orc.OrcFile.WriterOptions; +import org.apache.orc.TypeDescription; +import org.apache.orc.Writer; + +import java.io.IOException; +import java.nio.charset.StandardCharsets; + +import static org.apache.orc.EncryptionAlgorithm.AES_CTR_128; + +public class InMemoryEncryptionWriter { + public static void main(Configuration conf, String[] args) throws IOException { + TypeDescription schema = TypeDescription.fromString("struct"); + byte[] kmsKey = "secret123".getBytes(StandardCharsets.UTF_8); + // The primary use of InMemoryKeystore is for used who doesn't have a + // Hadoop KMS. + InMemoryKeystore provider = new InMemoryKeystore().addKey("pii", AES_CTR_128, kmsKey); + String encryption = "pii:x,y"; + WriterOptions writerOptions = + OrcFile.writerOptions(conf).setSchema(schema).setKeyProvider(provider).encrypt(encryption); + Writer writer = OrcFile.createWriter(new Path("encrypted.orc"),writerOptions); + + VectorizedRowBatch batch = schema.createRowBatch(); + LongColumnVector x = (LongColumnVector) batch.cols[0]; + BytesColumnVector y = (BytesColumnVector) batch.cols[1]; + for (int r = 0; r < 10000; ++r) { + int row = batch.size++; + x.vector[row] = r; + byte[] buffer = ("byte-" + r).getBytes(); + y.setRef(row, buffer, 0, buffer.length); + // If the batch is full, write it out and start over. + if (batch.size == batch.getMaxSize()) { + writer.addRowBatch(batch); + batch.reset(); + } + } + if (batch.size != 0) { + writer.addRowBatch(batch); + } + writer.close(); + } + + public static void main(String[] args) throws IOException { + main(new Configuration(), args); + } +}