[Infra][Spark] Repackaging Spark library under the Apache namespace (#…

…428) Signed-off-by: acezen <qiaozi.zwb@alibaba-inc.com>
apache · Mar 28, 2024 · b9ccb2a · b9ccb2a
1 parent 18c0845
commit b9ccb2a
Show file tree

Hide file tree

Showing 72 changed files with 151 additions and 151 deletions.
diff --git a/.licenserc.yaml b/.licenserc.yaml
@@ -25,8 +25,8 @@ header:
     - 'cpp/thirdparty'
     - 'cpp/include/gar/external/result.hpp'
     - 'cpp/misc/cpplint.py'
-    - 'spark/datasources-32/src/main/scala/com/alibaba/graphar/datasources'
-    - 'spark/datasources-33/src/main/scala/com/alibaba/graphar/datasources'
+    - 'spark/datasources-32/src/main/scala/org/apache/graphar/datasources'
+    - 'spark/datasources-33/src/main/scala/org/apache/graphar/datasources'
     - 'java/src/main/java/com/alibaba/graphar/stdcxx/StdString.java'
     - 'java/src/main/java/com/alibaba/graphar/stdcxx/StdVector.java'
     - '*.md'

diff --git a/docs/spark/spark-lib.rst b/docs/spark/spark-lib.rst
@@ -68,7 +68,7 @@ You can include GraphAr as a dependency in your maven project
    </repositories>
    <dependencies>
       <dependency>
-         <groupId>com.alibaba</groupId>
+         <groupId>org.apache</groupId>
          <artifactId>graphar</artifactId>
          <version>0.1.0</version>
       </dependency>

diff --git a/pyspark/graphar_pyspark/__init__.py b/pyspark/graphar_pyspark/__init__.py
@@ -40,7 +40,7 @@ def set_spark_session(self, spark_session: SparkSession) -> None:
         self.ss = spark_session  # Python SparkSession
         self.sc = spark_session.sparkContext  # Python SparkContext
         self.jvm = spark_session._jvm  # JVM
-        self.graphar = spark_session._jvm.com.alibaba.graphar  # Alias to scala graphar
+        self.graphar = spark_session._jvm.org.apache.graphar  # Alias to scala graphar
         self.jsc = spark_session._jsc  # Java SparkContext
         self.jss = spark_session._jsparkSession  # Java SparkSession
 

diff --git a/pyspark/graphar_pyspark/graph.py b/pyspark/graphar_pyspark/graph.py
@@ -15,7 +15,7 @@
 # specific language governing permissions and limitations
 # under the License.
 
-"""Bidnings to com.alibaba.graphar.graph."""
+"""Bidnings to org.apache.graphar.graph."""
 
 from __future__ import annotations
 
@@ -188,7 +188,7 @@ def write(
     ) -> None:
         """Write graph data in graphar format.
 
-        Note: for default parameters check com.alibaba.graphar.GeneralParams;
+        Note: for default parameters check org.apache.graphar.GeneralParams;
         For this method None for any of arguments means that the default value will be used.
 
         :param path: the directory to write.

diff --git a/pyspark/graphar_pyspark/info.py b/pyspark/graphar_pyspark/info.py
@@ -29,7 +29,7 @@
 # see the license for the specific language governing permissions and
 # limitations under the license.
 
-"""Bindings to com.alibaba.graphar info classes."""
+"""Bindings to org.apache.graphar info classes."""
 
 # because we are using type-hints, we need to define few custom TypeVar
 # to describe returns of classmethods;

diff --git a/pyspark/graphar_pyspark/reader.py b/pyspark/graphar_pyspark/reader.py
@@ -29,7 +29,7 @@
 # see the license for the specific language governing permissions and
 # limitations under the license.
 
-"""Bidnings to com.alibaba.graphar.graph."""
+"""Bidnings to org.apache.graphar.graph."""
 
 from __future__ import annotations
 

diff --git a/pyspark/graphar_pyspark/util.py b/pyspark/graphar_pyspark/util.py
@@ -29,7 +29,7 @@
 # see the license for the specific language governing permissions and
 # limitations under the license.
 
-"""Bindings to com.alibaba.graphar.util."""
+"""Bindings to org.apache.graphar.util."""
 
 from __future__ import annotations
 

diff --git a/pyspark/graphar_pyspark/writer.py b/pyspark/graphar_pyspark/writer.py
@@ -29,7 +29,7 @@
 # see the license for the specific language governing permissions and
 # limitations under the license.
 
-"""Bindings to com.alibaba.graphar.writer."""
+"""Bindings to org.apache.graphar.writer."""
 
 
 from __future__ import annotations

diff --git a/spark/README.md b/spark/README.md
@@ -51,8 +51,8 @@ Build and run the unit tests:
 Build and run certain unit test:
 
 ```bash
-    $ mvn clean test -Dsuites='com.alibaba.graphar.GraphInfoSuite'   # run the GraphInfo test suite
-    $ mvn clean test -Dsuites='com.alibaba.graphar.GraphInfoSuite load graph info'  # run the `load graph info` test of test suite
+    $ mvn clean test -Dsuites='org.apache.graphar.GraphInfoSuite'   # run the GraphInfo test suite
+    $ mvn clean test -Dsuites='org.apache.graphar.GraphInfoSuite load graph info'  # run the `load graph info` test of test suite
 ```
 
 ### Generate API document
@@ -68,7 +68,7 @@ The API document is generated in the directory ``spark/graphar/target/site/scala
 ## Running Neo4j to GraphAr example
 
 Spark provides a simple example to convert Neo4j data to GraphAr data.
-The example is located in the directory ``spark/graphar/src/main/scala/com/alibaba/graphar/examples/``.
+The example is located in the directory ``spark/graphar/src/main/scala/org/apache/graphar/examples/``.
 
 To run the example, download Spark and Neo4j first.
 
@@ -159,7 +159,7 @@ We can write a json configuration file like `import/neo4j.json` to do the import
 Running this example requires `Docker` to be installed, if not, follow [this link](https://docs.docker.com/engine/install/). Run `docker version` to check it.
 
 Spark provides a simple example to convert NebulaGraph data to GraphAr data.
-The example is located in the directory ``spark/src/main/scala/com/alibaba/graphar/examples/``.
+The example is located in the directory ``spark/src/main/scala/org/apache/graphar/examples/``.
 
 To run the example, download Spark and Neo4j first.
 
@@ -242,7 +242,7 @@ You can include GraphAr as a dependency in your maven project
 </repositories>
 <dependencies>
     <dependency>
-        <groupId>com.alibaba</groupId>
+        <groupId>org.apache</groupId>
         <artifactId>graphar</artifactId>
         <version>0.1.0</version>
     </dependency>

diff --git a/spark/datasources-32/pom.xml b/spark/datasources-32/pom.xml
@@ -23,12 +23,12 @@
     <modelVersion>4.0.0</modelVersion>
 
     <parent>
-        <groupId>com.alibaba</groupId>
+        <groupId>org.apache</groupId>
         <artifactId>graphar</artifactId>
         <version>${graphar.version}</version>
     </parent>
 
-    <groupId>com.alibaba</groupId>
+    <groupId>org.apache</groupId>
     <artifactId>graphar-datasources</artifactId>
     <version>${graphar.version}</version>
     <packaging>jar</packaging>

diff --git a/spark/datasources-32/src/main/java/com/alibaba/graphar/GeneralParams.java b/spark/datasources-32/src/main/java/com/alibaba/graphar/GeneralParams.java
diff --git a/spark/datasources-32/src/main/java/org/apache/graphar/GeneralParams.java b/spark/datasources-32/src/main/java/org/apache/graphar/GeneralParams.java
@@ -0,0 +1 @@
+../../../../../../../graphar/src/main/java/org/apache/graphar/GeneralParams.java
diff --git a/...aphar/datasources/GarCommitProtocol.scala → ...aphar/datasources/GarCommitProtocol.scala b/...aphar/datasources/GarCommitProtocol.scala → ...aphar/datasources/GarCommitProtocol.scala
@@ -14,9 +14,9 @@
  * limitations under the License.
  */
 
-package com.alibaba.graphar.datasources
+package org.apache.graphar.datasources
 
-import com.alibaba.graphar.GeneralParams
+import org.apache.graphar.GeneralParams
 
 import org.json4s._
 import org.json4s.jackson.JsonMethods._

diff --git a/...a/graphar/datasources/GarDataSource.scala → ...e/graphar/datasources/GarDataSource.scala b/...a/graphar/datasources/GarDataSource.scala → ...e/graphar/datasources/GarDataSource.scala
@@ -14,7 +14,7 @@
  * limitations under the License.
  */
 
-package com.alibaba.graphar.datasources
+package org.apache.graphar.datasources
 
 import scala.collection.JavaConverters._
 import scala.util.matching.Regex

diff --git a/...alibaba/graphar/datasources/GarScan.scala → .../apache/graphar/datasources/GarScan.scala b/...alibaba/graphar/datasources/GarScan.scala → .../apache/graphar/datasources/GarScan.scala
@@ -14,7 +14,7 @@
  * limitations under the License.
  */
 
-package com.alibaba.graphar.datasources
+package org.apache.graphar.datasources
 
 import scala.collection.JavaConverters._
 import scala.collection.mutable.ArrayBuffer

diff --git a/.../graphar/datasources/GarScanBuilder.scala → .../graphar/datasources/GarScanBuilder.scala b/.../graphar/datasources/GarScanBuilder.scala → .../graphar/datasources/GarScanBuilder.scala
@@ -14,7 +14,7 @@
  * limitations under the License.
  */
 
-package com.alibaba.graphar.datasources
+package org.apache.graphar.datasources
 
 import org.apache.spark.sql.SparkSession
 import org.apache.spark.sql.connector.read.{Scan, SupportsPushDownFilters}

diff --git a/...libaba/graphar/datasources/GarTable.scala → ...apache/graphar/datasources/GarTable.scala b/...libaba/graphar/datasources/GarTable.scala → ...apache/graphar/datasources/GarTable.scala
@@ -14,7 +14,7 @@
  * limitations under the License.
  */
 
-package com.alibaba.graphar.datasources
+package org.apache.graphar.datasources
 
 import scala.collection.JavaConverters._
 
@@ -31,9 +31,9 @@ import org.apache.spark.sql.execution.datasources.v2.FileTable
 import org.apache.spark.sql.types._
 import org.apache.spark.sql.util.CaseInsensitiveStringMap
 
-import com.alibaba.graphar.datasources.csv.CSVWriteBuilder
-import com.alibaba.graphar.datasources.parquet.ParquetWriteBuilder
-import com.alibaba.graphar.datasources.orc.OrcWriteBuilder
+import org.apache.graphar.datasources.csv.CSVWriteBuilder
+import org.apache.graphar.datasources.parquet.ParquetWriteBuilder
+import org.apache.graphar.datasources.orc.OrcWriteBuilder
 
 /** GarTable is a class to represent the graph data in GraphAr as a table. */
 case class GarTable(

diff --git a/...raphar/datasources/GarWriterBuilder.scala → ...raphar/datasources/GarWriterBuilder.scala b/...raphar/datasources/GarWriterBuilder.scala → ...raphar/datasources/GarWriterBuilder.scala
@@ -16,7 +16,7 @@
  * The implementation of GarWriteBuilder is referred from FileWriteBuilder of spark 3.1.1
  */
 
-package com.alibaba.graphar.datasources
+package org.apache.graphar.datasources
 
 import java.util.UUID
 

diff --git a/...ar/datasources/csv/CSVWriterBuilder.scala → ...ar/datasources/csv/CSVWriterBuilder.scala b/...ar/datasources/csv/CSVWriterBuilder.scala → ...ar/datasources/csv/CSVWriterBuilder.scala
@@ -16,7 +16,7 @@
  * The implementation of CSVWriteBuilder is refered from CSVWriteBuilder of spark 3.1.1
  */
 
-package com.alibaba.graphar.datasources.csv
+package org.apache.graphar.datasources.csv
 
 import org.apache.hadoop.mapreduce.{Job, TaskAttemptContext}
 import org.apache.spark.sql.catalyst.csv.CSVOptions
@@ -31,7 +31,7 @@ import org.apache.spark.sql.execution.datasources.csv.CsvOutputWriter
 import org.apache.spark.sql.internal.SQLConf
 import org.apache.spark.sql.types.{DataType, StructType}
 
-import com.alibaba.graphar.datasources.GarWriteBuilder
+import org.apache.graphar.datasources.GarWriteBuilder
 
 class CSVWriteBuilder(
     paths: Seq[String],

diff --git a/...har/datasources/orc/OrcOutputWriter.scala → ...har/datasources/orc/OrcOutputWriter.scala b/...har/datasources/orc/OrcOutputWriter.scala → ...har/datasources/orc/OrcOutputWriter.scala
@@ -16,7 +16,7 @@
  * The implementation of OrcOutputWriter is referred from OrcOutputWriter of spark 3.1.1
  */
 
-package com.alibaba.graphar.datasources.orc
+package org.apache.graphar.datasources.orc
 
 import org.apache.hadoop.fs.Path
 import org.apache.hadoop.io.NullWritable

diff --git a/...har/datasources/orc/OrcWriteBuilder.scala → ...har/datasources/orc/OrcWriteBuilder.scala b/...har/datasources/orc/OrcWriteBuilder.scala → ...har/datasources/orc/OrcWriteBuilder.scala
@@ -16,7 +16,7 @@
  * The implementation of OrcWriteBuilder is referred from OrcWriteBuilder of spark 3.1.1
  */
 
-package com.alibaba.graphar.datasources.orc
+package org.apache.graphar.datasources.orc
 
 import org.apache.hadoop.mapred.JobConf
 import org.apache.hadoop.mapreduce.{Job, TaskAttemptContext}
@@ -32,7 +32,7 @@ import org.apache.spark.sql.execution.datasources.orc.{OrcOptions, OrcUtils}
 import org.apache.spark.sql.internal.SQLConf
 import org.apache.spark.sql.types._
 
-import com.alibaba.graphar.datasources.GarWriteBuilder
+import org.apache.graphar.datasources.GarWriteBuilder
 
 object OrcWriteBuilder {
   // the getQuotedSchemaString method of spark OrcFileFormat

diff --git a/...ources/parquet/ParquetWriterBuilder.scala → ...ources/parquet/ParquetWriterBuilder.scala b/...ources/parquet/ParquetWriterBuilder.scala → ...ources/parquet/ParquetWriterBuilder.scala
@@ -16,7 +16,7 @@
  * The implementation of ParquetWriteBuilder is referred from ParquetWriteBuilder of spark 3.1.1
  */
 
-package com.alibaba.graphar.datasources.parquet
+package org.apache.graphar.datasources.parquet
 
 import org.apache.hadoop.mapreduce.{Job, OutputCommitter, TaskAttemptContext}
 import org.apache.parquet.hadoop.{ParquetOutputCommitter, ParquetOutputFormat}
@@ -35,7 +35,7 @@ import org.apache.spark.sql.execution.datasources.parquet._
 import org.apache.spark.sql.internal.SQLConf
 import org.apache.spark.sql.types._
 
-import com.alibaba.graphar.datasources.GarWriteBuilder
+import org.apache.graphar.datasources.GarWriteBuilder
 
 class ParquetWriteBuilder(
     paths: Seq[String],

diff --git a/spark/datasources-33/pom.xml b/spark/datasources-33/pom.xml
@@ -23,12 +23,12 @@
     <modelVersion>4.0.0</modelVersion>
 
     <parent>
-        <groupId>com.alibaba</groupId>
+        <groupId>org.apache</groupId>
         <artifactId>graphar</artifactId>
         <version>${graphar.version}</version>
     </parent>
 
-    <groupId>com.alibaba</groupId>
+    <groupId>org.apache</groupId>
     <artifactId>graphar-datasources</artifactId>
     <version>${graphar.version}</version>
     <packaging>jar</packaging>

diff --git a/spark/datasources-33/src/main/java/com/alibaba/graphar/GeneralParams.java b/spark/datasources-33/src/main/java/com/alibaba/graphar/GeneralParams.java
diff --git a/spark/datasources-33/src/main/java/org/apache/graphar/GeneralParams.java b/spark/datasources-33/src/main/java/org/apache/graphar/GeneralParams.java
@@ -0,0 +1 @@
+../../../../../../../graphar/src/main/java/org/apache/graphar/GeneralParams.java
diff --git a/...aphar/datasources/GarCommitProtocol.scala → ...aphar/datasources/GarCommitProtocol.scala b/...aphar/datasources/GarCommitProtocol.scala → ...aphar/datasources/GarCommitProtocol.scala
@@ -14,9 +14,9 @@
  * limitations under the License.
  */
 
-package com.alibaba.graphar.datasources
+package org.apache.graphar.datasources
 
-import com.alibaba.graphar.GeneralParams
+import org.apache.graphar.GeneralParams
 
 import org.json4s._
 import org.json4s.jackson.JsonMethods._

diff --git a/...a/graphar/datasources/GarDataSource.scala → ...e/graphar/datasources/GarDataSource.scala b/...a/graphar/datasources/GarDataSource.scala → ...e/graphar/datasources/GarDataSource.scala
@@ -14,7 +14,7 @@
  * limitations under the License.
  */
 
-package com.alibaba.graphar.datasources
+package org.apache.graphar.datasources
 
 import scala.collection.JavaConverters._
 import scala.util.matching.Regex

diff --git a/...alibaba/graphar/datasources/GarScan.scala → .../apache/graphar/datasources/GarScan.scala b/...alibaba/graphar/datasources/GarScan.scala → .../apache/graphar/datasources/GarScan.scala
@@ -14,7 +14,7 @@
  * limitations under the License.
  */
 
-package com.alibaba.graphar.datasources
+package org.apache.graphar.datasources
 
 import scala.collection.JavaConverters._
 import scala.collection.mutable.ArrayBuffer

diff --git a/.../graphar/datasources/GarScanBuilder.scala → .../graphar/datasources/GarScanBuilder.scala b/.../graphar/datasources/GarScanBuilder.scala → .../graphar/datasources/GarScanBuilder.scala
@@ -14,7 +14,7 @@
  * limitations under the License.
  */
 
-package com.alibaba.graphar.datasources
+package org.apache.graphar.datasources
 
 import org.apache.spark.sql.SparkSession
 import org.apache.spark.sql.connector.read.Scan

diff --git a/...libaba/graphar/datasources/GarTable.scala → ...apache/graphar/datasources/GarTable.scala b/...libaba/graphar/datasources/GarTable.scala → ...apache/graphar/datasources/GarTable.scala
@@ -14,7 +14,7 @@
  * limitations under the License.
  */
 
-package com.alibaba.graphar.datasources
+package org.apache.graphar.datasources
 
 import scala.collection.JavaConverters._
 
@@ -31,9 +31,9 @@ import org.apache.spark.sql.execution.datasources.v2.FileTable
 import org.apache.spark.sql.types._
 import org.apache.spark.sql.util.CaseInsensitiveStringMap
 
-import com.alibaba.graphar.datasources.csv.CSVWriteBuilder
-import com.alibaba.graphar.datasources.parquet.ParquetWriteBuilder
-import com.alibaba.graphar.datasources.orc.OrcWriteBuilder
+import org.apache.graphar.datasources.csv.CSVWriteBuilder
+import org.apache.graphar.datasources.parquet.ParquetWriteBuilder
+import org.apache.graphar.datasources.orc.OrcWriteBuilder
 
 /** GarTable is a class to represent the graph data in GraphAr as a table. */
 case class GarTable(

diff --git a/...raphar/datasources/GarWriterBuilder.scala → ...raphar/datasources/GarWriterBuilder.scala b/...raphar/datasources/GarWriterBuilder.scala → ...raphar/datasources/GarWriterBuilder.scala
@@ -16,7 +16,7 @@
  * The implementation of GarWriteBuilder is referred from FileWriteBuilder of spark 3.1.1
  */
 
-package com.alibaba.graphar.datasources
+package org.apache.graphar.datasources
 
 import java.util.UUID
 

diff --git a/...ar/datasources/csv/CSVWriterBuilder.scala → ...ar/datasources/csv/CSVWriterBuilder.scala b/...ar/datasources/csv/CSVWriterBuilder.scala → ...ar/datasources/csv/CSVWriterBuilder.scala
@@ -16,7 +16,7 @@
  * The implementation of CSVWriteBuilder is refered from CSVWriteBuilder of spark 3.1.1
  */
 
-package com.alibaba.graphar.datasources.csv
+package org.apache.graphar.datasources.csv
 
 import org.apache.hadoop.mapreduce.{Job, TaskAttemptContext}
 import org.apache.spark.sql.catalyst.csv.CSVOptions
@@ -31,7 +31,7 @@ import org.apache.spark.sql.execution.datasources.csv.CsvOutputWriter
 import org.apache.spark.sql.internal.SQLConf
 import org.apache.spark.sql.types.{DataType, StructType}
 
-import com.alibaba.graphar.datasources.GarWriteBuilder
+import org.apache.graphar.datasources.GarWriteBuilder
 
 class CSVWriteBuilder(
     paths: Seq[String],

diff --git a/...har/datasources/orc/OrcOutputWriter.scala → ...har/datasources/orc/OrcOutputWriter.scala b/...har/datasources/orc/OrcOutputWriter.scala → ...har/datasources/orc/OrcOutputWriter.scala
@@ -16,7 +16,7 @@
  * The implementation of OrcOutputWriter is referred from OrcOutputWriter of spark 3.1.1
  */
 
-package com.alibaba.graphar.datasources.orc
+package org.apache.graphar.datasources.orc
 
 import org.apache.hadoop.fs.Path
 import org.apache.hadoop.io.NullWritable