diff --git a/README.rst b/README.rst index 31a57238..0601bb4f 100644 --- a/README.rst +++ b/README.rst @@ -19,11 +19,28 @@ using monitoring business rules. |example| +Announcements +============= + +Spark 3.0 +--------- + +With Spark 3.0, based on Scala 2.12, make sure to pick up the correct `histogrammar` jar file: + +.. code-block:: python + + spark = SparkSession.builder.config("spark.jars.packages", "io.github.histogrammar:histogrammar-sparksql_2.12:1.0.11").getOrCreate() + +For Spark 2.X compiled against scala 2.11, in the string above simply replace 2.12 with 2.11. + +`January 29, 2021` + Documentation ============= The entire `popmon` documentation including tutorials can be found at `read-the-docs `_. + Examples ======== diff --git a/docs/source/configuration.rst b/docs/source/configuration.rst index bad45d3a..9dd90b14 100644 --- a/docs/source/configuration.rst +++ b/docs/source/configuration.rst @@ -198,7 +198,7 @@ Spark usage from pyspark.sql import SparkSession # downloads histogrammar jar files if not already installed, used for histogramming of spark dataframe - spark = SparkSession.builder.config('spark.jars.packages','org.diana-hep:histogrammar-sparksql_2.11:1.0.4').getOrCreate() + spark = SparkSession.builder.config('spark.jars.packages','io.github.histogrammar:histogrammar-sparksql_2.12:1.0.11').getOrCreate() # load a dataframe spark_df = spark.read.format('csv').options(header='true').load('file.csv') @@ -216,8 +216,8 @@ This snippet contains the instructions for setting up a minimal environment for !apt-get install openjdk-8-jdk-headless -qq > /dev/null !wget -q https://www-us.apache.org/dist/spark/spark-2.4.7/spark-2.4.7-bin-hadoop2.7.tgz !tar xf spark-2.4.7-bin-hadoop2.7.tgz - !wget -P /content/spark-2.4.7-bin-hadoop2.7/jars/ -q https://repo1.maven.org/maven2/org/diana-hep/histogrammar-sparksql_2.11/1.0.4/histogrammar-sparksql_2.11-1.0.4.jar - !wget -P /content/spark-2.4.7-bin-hadoop2.7/jars/ -q https://repo1.maven.org/maven2/org/diana-hep/histogrammar_2.11/1.0.4/histogrammar_2.11-1.0.4.jar + !wget -P /content/spark-2.4.7-bin-hadoop2.7/jars/ -q https://repo1.maven.org/maven2/io/github/histogrammar/histogrammar-sparksql_2.12/1.0.11/histogrammar-sparksql_2.12-1.0.11.jar + !wget -P /content/spark-2.4.7-bin-hadoop2.7/jars/ -q https://repo1.maven.org/maven2/io/github/histogrammar/histogrammar_2.12/1.0.11/histogrammar_2.12-1.0.11.jar !pip install -q findspark popmon Now that spark is installed, restart the runtime. @@ -234,7 +234,7 @@ Now that spark is installed, restart the runtime. from pyspark.sql import SparkSession spark = SparkSession.builder.master("local[*]") \ - .config("spark.jars", "/content/jars/histogrammar_2.11-1.0.4.jar,/content/jars/histogrammar-sparksql_2.11-1.0.4.jar") \ + .config("spark.jars", "/content/jars/histogrammar_2.12-1.0.11.jar,/content/jars/histogrammar-sparksql_2.12-1.0.11.jar") \ .config("spark.sql.execution.arrow.enabled", "false") \ .config("spark.sql.session.timeZone", "GMT") \ .getOrCreate() \ No newline at end of file diff --git a/popmon/notebooks/popmon_tutorial_advanced.ipynb b/popmon/notebooks/popmon_tutorial_advanced.ipynb index 61ab3f2e..eb11b3ce 100644 --- a/popmon/notebooks/popmon_tutorial_advanced.ipynb +++ b/popmon/notebooks/popmon_tutorial_advanced.ipynb @@ -162,7 +162,7 @@ "source": [ "if pyspark_installed:\n", " spark = SparkSession.builder.config(\n", - " \"spark.jars.packages\", \"org.diana-hep:histogrammar-sparksql_2.11:1.0.4\"\n", + " \"spark.jars.packages\", \"io.github.histogrammar:histogrammar-sparksql_2.12:1.0.11\"\n", " ).getOrCreate()\n", "\n", " sdf = spark.createDataFrame(df)\n", diff --git a/tests/popmon/hist/jars/histogrammar-sparksql_2.11-1.0.4.jar b/tests/popmon/hist/jars/histogrammar-sparksql_2.11-1.0.11.jar similarity index 100% rename from tests/popmon/hist/jars/histogrammar-sparksql_2.11-1.0.4.jar rename to tests/popmon/hist/jars/histogrammar-sparksql_2.11-1.0.11.jar diff --git a/tests/popmon/hist/jars/histogrammar-sparksql_2.12-1.0.11.jar b/tests/popmon/hist/jars/histogrammar-sparksql_2.12-1.0.11.jar new file mode 100644 index 00000000..cbca9d67 Binary files /dev/null and b/tests/popmon/hist/jars/histogrammar-sparksql_2.12-1.0.11.jar differ diff --git a/tests/popmon/hist/jars/histogrammar_2.11-1.0.4.jar b/tests/popmon/hist/jars/histogrammar_2.11-1.0.11.jar similarity index 100% rename from tests/popmon/hist/jars/histogrammar_2.11-1.0.4.jar rename to tests/popmon/hist/jars/histogrammar_2.11-1.0.11.jar diff --git a/tests/popmon/hist/jars/histogrammar_2.12-1.0.11.jar b/tests/popmon/hist/jars/histogrammar_2.12-1.0.11.jar new file mode 100644 index 00000000..dbfe5ea2 Binary files /dev/null and b/tests/popmon/hist/jars/histogrammar_2.12-1.0.11.jar differ diff --git a/tests/popmon/hist/test_spark_histogrammar.py b/tests/popmon/hist/test_spark_histogrammar.py index 2a0fb225..714362f7 100644 --- a/tests/popmon/hist/test_spark_histogrammar.py +++ b/tests/popmon/hist/test_spark_histogrammar.py @@ -21,8 +21,8 @@ def get_spark(): current_path = dirname(abspath(__file__)) - hist_spark_jar = join(current_path, "jars/histogrammar-sparksql_2.11-1.0.4.jar") - hist_jar = join(current_path, "jars/histogrammar_2.11-1.0.4.jar") + hist_spark_jar = join(current_path, "jars/histogrammar-sparksql_2.11-1.0.11.jar") + hist_jar = join(current_path, "jars/histogrammar_2.11-1.0.11.jar") spark = ( SparkSession.builder.master("local")