Skip to content

Commit

Permalink
Avoid initialize SparkSession but only JVM
Browse files Browse the repository at this point in the history
  • Loading branch information
HyukjinKwon committed Jul 25, 2017
1 parent b95be04 commit c92533b
Show file tree
Hide file tree
Showing 3 changed files with 10 additions and 43 deletions.
8 changes: 2 additions & 6 deletions sql/create-docs.sh
Original file line number Diff line number Diff line change
Expand Up @@ -42,15 +42,11 @@ fi

# Now create markdown file
rm -fr docs
rm -rf "$WAREHOUSE_DIR"
mkdir docs
echo "Generating markdown files for SQL documentation."
"$SPARK_HOME/bin/spark-submit" \
--driver-java-options "-Dlog4j.configuration=file:$FWDIR/log4j.properties" \
--conf spark.sql.warehouse.dir="$WAREHOUSE_DIR" \
gen-sql-markdown.py
rm -rf "$WAREHOUSE_DIR"
"$SPARK_HOME/bin/spark-submit" gen-sql-markdown.py

# Now create HTML files
echo "Generating HTML files for SQL documentation."
mkdocs build --clean
rm -fr docs
21 changes: 8 additions & 13 deletions sql/gen-sql-markdown.py
Original file line number Diff line number Diff line change
Expand Up @@ -19,19 +19,16 @@
import os
from collections import namedtuple

from pyspark.sql import SparkSession

ExpressionInfo = namedtuple("ExpressionInfo", "className usage name extended")


def _list_function_infos(spark):
def _list_function_infos(jvm):
"""
Returns a list of function information via JVM. Sorts wrapped expression infos by name
and returns them.
"""

jinfos = spark.sparkContext._jvm \
.org.apache.spark.sql.api.python.PythonSQLUtils.listBuiltinFunctionInfos()
jinfos = jvm.org.apache.spark.sql.api.python.PythonSQLUtils.listBuiltinFunctionInfos()
infos = []
for jinfo in jinfos:
name = jinfo.getName()
Expand Down Expand Up @@ -69,14 +66,14 @@ def _make_pretty_extended(extended):
return "```%s```\n\n" % extended


def generate_sql_markdown(spark, path):
def generate_sql_markdown(jvm, path):
"""
Generates a markdown file after listing the function information. The output file
is created in `path`.
"""

with open(path, 'w') as mdfile:
for info in _list_function_infos(spark):
for info in _list_function_infos(jvm):
mdfile.write("### %s\n\n" % info.name)
usage = _make_pretty_usage(info.usage)
extended = _make_pretty_extended(info.extended)
Expand All @@ -87,10 +84,8 @@ def generate_sql_markdown(spark, path):


if __name__ == "__main__":
spark = SparkSession \
.builder \
.appName("GenSQLDocs") \
.getOrCreate()
from pyspark.java_gateway import launch_gateway

jvm = launch_gateway().jvm
markdown_file_path = "%s/docs/index.md" % os.path.dirname(sys.argv[0])
generate_sql_markdown(spark, markdown_file_path)
spark.stop()
generate_sql_markdown(jvm, markdown_file_path)
24 changes: 0 additions & 24 deletions sql/log4j.properties

This file was deleted.

0 comments on commit c92533b

Please sign in to comment.