apache · rdblue · Nov 13, 2016 · Nov 14, 2016 · Nov 14, 2016 · Dec 7, 2016
diff --git a/NOTICE b/NOTICE
@@ -54,3 +54,41 @@ its NOTICE file:
   This product includes software developed at
   The Apache Software Foundation (http://www.apache.org/).
 
+--------------------------------------------------------------------------------
+
+This project includes code from Kite, developed at Cloudera, Inc. with
+the following copyright notice:
+
+| Copyright 2013 Cloudera Inc.
+|
+| Licensed under the Apache License, Version 2.0 (the "License");
+| you may not use this file except in compliance with the License.
+| You may obtain a copy of the License at
+|
+|   http://www.apache.org/licenses/LICENSE-2.0
+|
+| Unless required by applicable law or agreed to in writing, software
+| distributed under the License is distributed on an "AS IS" BASIS,
+| WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+| See the License for the specific language governing permissions and
+| limitations under the License.
+
+--------------------------------------------------------------------------------
+
+This project includes code from Netflix, Inc. with the following copyright
+notice:
+
+| Copyright 2016 Netflix, Inc.
+|
+| Licensed under the Apache License, Version 2.0 (the "License");
+| you may not use this file except in compliance with the License.
+| You may obtain a copy of the License at
+|
+|   http://www.apache.org/licenses/LICENSE-2.0
+|
+| Unless required by applicable law or agreed to in writing, software
+| distributed under the License is distributed on an "AS IS" BASIS,
+| WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+| See the License for the specific language governing permissions and
+| limitations under the License.
+
diff --git a/parquet-cli/README.md b/parquet-cli/README.md
@@ -0,0 +1,107 @@
+<!--
+  - Licensed to the Apache Software Foundation (ASF) under one
+  - or more contributor license agreements.  See the NOTICE file
+  - distributed with this work for additional information
+  - regarding copyright ownership.  The ASF licenses this file
+  - to you under the Apache License, Version 2.0 (the
+  - "License"); you may not use this file except in compliance
+  - with the License.  You may obtain a copy of the License at
+  -
+  -   http://www.apache.org/licenses/LICENSE-2.0
+  -
+  - Unless required by applicable law or agreed to in writing,
+  - software distributed under the License is distributed on an
+  - "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+  - KIND, either express or implied.  See the License for the
+  - specific language governing permissions and limitations
+  - under the License.
+  -->
+
+## Building
+
+You can build this project using maven:
+
+```
+mvn clean install -DskipTests
+```
+
+
+## Running
+
+The build produces a shaded Jar that can be run using the `hadoop` command:
+
+```
+hadoop jar parquet-cli-1.9.1-runtime.jar org.apache.parquet.cli.Main
+```
+
+For a shorter command-line invocation, add an alias to your shell like this:
+
+```
+alias parquet="hadoop jar /path/to/parquet-cli-1.9.1-runtime.jar org.apache.parquet.cli.Main --dollar-zero parquet"
+```
+
+### Running without Hadoop
+
+To run from the target directory instead of using the `hadoop` command, first copy the dependencies to a folder:
+
+```
+mvn dependency:copy-dependencies
+```
+
+Then, run the command-line and add `target/dependencies/*` to the classpath:
+
+```
+java -cp 'target/*:target/dependency/*' org.apache.parquet.cli.Main
+```
+
+
+### Help
+
+The `parquet` tool includes help for the included commands:
+
+```
+parquet help
+```
+```
+Usage: parquet [options] [command] [command options]
+
+  Options:
+
+    -v, --verbose, --debug
+        Print extra debugging information
+
+  Commands:
+
+    help
+        Retrieves details on the functions of other commands
+    meta
+        Print a Parquet file's metadata
+    pages
+        Print page summaries for a Parquet file
+    dictionary
+        Print dictionaries for a Parquet column
+    check-stats
+        Check Parquet files for corrupt page and column stats (PARQUET-251)
+    schema
+        Print the Avro schema for a file
+    csv-schema
+        Build a schema from a CSV data sample
+    convert-csv
+        Create a file from CSV data
+    convert
+        Create a Parquet file from a data file
+    to-avro
+        Create an Avro file from a data file
+    cat
+        Print the first N records from a file
+    head
+        Print the first N records from a file
+
+  Examples:
+
+    # print information for create
+    parquet help create
+
+  See 'parquet help <command>' for more information on a specific command.
+```
+
diff --git a/parquet-cli/pom.xml b/parquet-cli/pom.xml
@@ -0,0 +1,153 @@
+<!--
+  ~ Licensed to the Apache Software Foundation (ASF) under one
+  ~ or more contributor license agreements.  See the NOTICE file
+  ~ distributed with this work for additional information
+  ~ regarding copyright ownership.  The ASF licenses this file
+  ~ to you under the Apache License, Version 2.0 (the
+  ~ "License"); you may not use this file except in compliance
+  ~ with the License.  You may obtain a copy of the License at
+  ~
+  ~   http://www.apache.org/licenses/LICENSE-2.0
+  ~
+  ~ Unless required by applicable law or agreed to in writing,
+  ~ software distributed under the License is distributed on an
+  ~ "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+  ~ KIND, either express or implied.  See the License for the
+  ~ specific language governing permissions and limitations
+  ~ under the License.
+  -->
+<project xmlns="http://maven.apache.org/POM/4.0.0" xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance" xsi:schemaLocation="http://maven.apache.org/POM/4.0.0 http://maven.apache.org/xsd/maven-4.0.0.xsd">
+  <parent>
+    <groupId>org.apache.parquet</groupId>
+    <artifactId>parquet</artifactId>
+    <relativePath>../pom.xml</relativePath>
+    <version>1.9.1-SNAPSHOT</version>
+  </parent>
+
+  <modelVersion>4.0.0</modelVersion>
+
+  <artifactId>parquet-cli</artifactId>
+  <packaging>jar</packaging>
+
+  <name>Apache Parquet Command-line</name>
+  <url>https://parquet.apache.org</url>
+
+  <dependencies>
+    <dependency>
+      <groupId>org.apache.parquet</groupId>
+      <artifactId>parquet-avro</artifactId>
+      <version>${project.version}</version>
+    </dependency>
+    <dependency>
+      <groupId>org.apache.avro</groupId>
+      <artifactId>avro</artifactId>
+      <version>${avro.version}</version>
+    </dependency>
+    <dependency>
+      <groupId>org.slf4j</groupId>
+      <artifactId>slf4j-api</artifactId>
+      <version>${slf4j.version}</version>
+    </dependency>
+    <dependency>
+      <groupId>net.sf.opencsv</groupId>
+      <artifactId>opencsv</artifactId>
+      <version>${opencsv.version}</version>
+    </dependency>
+    <dependency>
+      <groupId>com.fasterxml.jackson.core</groupId>
+      <artifactId>jackson-databind</artifactId>
+      <version>${jackson2.version}</version>
+    </dependency>
+    <dependency>
+      <groupId>com.beust</groupId>
+      <artifactId>jcommander</artifactId>
+      <version>${jcommander.version}</version>
+    </dependency>
+
+    <dependency>
+      <groupId>org.slf4j</groupId>
+      <artifactId>slf4j-log4j12</artifactId>
+      <version>${slf4j.version}</version>
+      <scope>provided</scope>
+    </dependency>
+    <dependency>
+      <groupId>com.google.guava</groupId>
+      <artifactId>guava</artifactId>
+      <version>${guava.version}</version>
+      <scope>provided</scope>
+    </dependency>
+    <dependency>
+      <groupId>commons-codec</groupId>
+      <artifactId>commons-codec</artifactId>
+      <version>${commons-codec.version}</version>
+      <scope>provided</scope>
+    </dependency>
+    <dependency>
+      <groupId>org.apache.hadoop</groupId>
+      <artifactId>hadoop-client</artifactId>
+      <version>${hadoop.version}</version>
+      <scope>provided</scope>
+    </dependency>
+  </dependencies>
+
+  <build>
+    <plugins>
+      <!-- This module disables semver checks because it is not a public API.
+      <plugin>
+        <artifactId>maven-enforcer-plugin</artifactId>
+      </plugin>
+      -->
+      <plugin>
+        <groupId>org.apache.maven.plugins</groupId>
+        <artifactId>maven-jar-plugin</artifactId>
+      </plugin>
+      <plugin>
+        <groupId>org.apache.maven.plugins</groupId>
+        <artifactId>maven-shade-plugin</artifactId>
+        <executions>
+          <execution>
+            <phase>package</phase>
+            <goals>
+              <goal>shade</goal>
+            </goals>
+            <configuration>
+							<shadedArtifactAttached>true</shadedArtifactAttached>
+							<shadedClassifierName>runtime</shadedClassifierName>
+              <minimizeJar>false</minimizeJar>
+              <filters>
+                <filter>
+                  <artifact>org.xerial.snappy:*</artifact>
+                  <excludes>
+                    <exclude>**/LICENSE</exclude>
+                  </excludes>
+                </filter>
+                <filter>
+                  <artifact>*:*</artifact>
+                  <excludes>
+                    <exclude>META-INF/LICENSE.txt</exclude>
+                    <exclude>META-INF/NOTICE.txt</exclude>
+                  </excludes>
+                </filter>
+              </filters>
+              <artifactSet>
+                <includes>
+                  <include>*</include>
+                </includes>
+              </artifactSet>
+              <relocations>
+                <relocation>
+									<!-- relocate Avro in the runtime jar to avoid conflicts with
+                       on-cluster Avro versions.
+											 -->
+                  <pattern>org.apache.avro</pattern>
+                  <shadedPattern>${shade.prefix}.org.apache.avro</shadedPattern>
+                </relocation>
+              </relocations>
+            </configuration>
+          </execution>
+        </executions>
+      </plugin>
+    </plugins>
+  </build>
+
+</project>