Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

[SPARK-46935][DOCS] Consolidate error documentation #44971

Closed
wants to merge 22 commits into from
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
22 commits
Select commit Hold shift + click to select a range
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Jump to
Jump to file
Failed to load files.
Diff view
Diff view
2 changes: 2 additions & 0 deletions .github/workflows/build_and_test.yml
Original file line number Diff line number Diff line change
Expand Up @@ -766,6 +766,8 @@ jobs:
run: ./dev/lint-r
- name: Run documentation build
run: |
# We need this link because the jekyll build calls `python`.
ln -s "$(which python3.9)" "/usr/local/bin/python"
# Build docs first with SKIP_API to ensure they are buildable without requiring any
# language docs to be built beforehand.
cd docs; SKIP_API=1 bundle exec jekyll build; cd ..
Expand Down
222 changes: 0 additions & 222 deletions core/src/test/scala/org/apache/spark/SparkThrowableSuite.scala
Original file line number Diff line number Diff line change
Expand Up @@ -20,11 +20,8 @@ package org.apache.spark
import java.io.File
import java.nio.charset.StandardCharsets
import java.nio.file.Files
import java.util.Locale

import scala.jdk.CollectionConverters._
import scala.util.Properties.lineSeparator
import scala.util.matching.Regex

import com.fasterxml.jackson.annotation.JsonInclude.Include
import com.fasterxml.jackson.core.JsonParser.Feature.STRICT_DUPLICATE_DETECTION
Expand All @@ -48,12 +45,6 @@ class SparkThrowableSuite extends SparkFunSuite {
SPARK_GENERATE_GOLDEN_FILES=1 build/sbt \
"core/testOnly *SparkThrowableSuite -- -t \"Error classes are correctly formatted\""
}}}

To regenerate the error class document. Run:
{{{
SPARK_GENERATE_GOLDEN_FILES=1 build/sbt \
"core/testOnly *SparkThrowableSuite -- -t \"Error classes match with document\""
}}}
*/
private val regenerateCommand = "SPARK_GENERATE_GOLDEN_FILES=1 build/sbt " +
"\"core/testOnly *SparkThrowableSuite -- -t \\\"Error classes match with document\\\"\""
Expand Down Expand Up @@ -173,219 +164,6 @@ class SparkThrowableSuite extends SparkFunSuite {
checkIfUnique(messageFormats)
}

test("Error classes match with document") {
val errors = errorReader.errorInfoMap

// the black list of error class name which should not add quote
val contentQuoteBlackList = Seq(
"INCOMPLETE_TYPE_DEFINITION.MAP",
"INCOMPLETE_TYPE_DEFINITION.STRUCT")

def quoteParameter(content: String, errorName: String): String = {
if (contentQuoteBlackList.contains(errorName)) {
content
} else {
"<(.*?)>".r.replaceAllIn(content, (m: Regex.Match) => {
val matchStr = m.group(1)
if (matchStr.nonEmpty) {
s"`<$matchStr>`"
} else {
m.matched
}
}).replaceAll("%(.*?)\\$", "`\\%$1\\$`")
}
}

val sqlStates = IOUtils.toString(getWorkspaceFilePath("docs",
"sql-error-conditions-sqlstates.md").toUri, StandardCharsets.UTF_8).split("\n")
.filter(_.startsWith("##")).map(s => {

val errorHeader = s.split("[`|:|#|\\s]+").filter(_.nonEmpty)
val sqlState = errorHeader(1)
(sqlState, errorHeader.head.toLowerCase(Locale.ROOT) + "-" + sqlState + "-" +
errorHeader.takeRight(errorHeader.length - 2).mkString("-").toLowerCase(Locale.ROOT))
}).toMap

def getSqlState(sqlState: Option[String]): String = {
if (sqlState.isDefined) {
val prefix = sqlState.get.substring(0, 2)
if (sqlStates.contains(prefix)) {
s"[SQLSTATE: ${sqlState.get}](sql-error-conditions-sqlstates.html#${sqlStates(prefix)})"
} else {
"SQLSTATE: " + sqlState.get
}
} else {
"SQLSTATE: none assigned"
}
}

def getErrorPath(error: String): String = {
s"sql-error-conditions-${error.toLowerCase(Locale.ROOT).replaceAll("_", "-")}-error-class"
}

def getHeader(title: String): String = {
s"""---
|layout: global
|title: $title
|displayTitle: $title
|license: |
| Licensed to the Apache Software Foundation (ASF) under one or more
| contributor license agreements. See the NOTICE file distributed with
| this work for additional information regarding copyright ownership.
| The ASF licenses this file to You under the Apache License, Version 2.0
| (the "License"); you may not use this file except in compliance with
| the License. You may obtain a copy of the License at
|
| http://www.apache.org/licenses/LICENSE-2.0
|
| Unless required by applicable law or agreed to in writing, software
| distributed under the License is distributed on an "AS IS" BASIS,
| WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
| See the License for the specific language governing permissions and
| limitations under the License.
|---
|
|<!--
| DO NOT EDIT THIS FILE.
| It was generated automatically by `${getClass().getName()}`.
|-->""".stripMargin
}

def orphanedGoldenFiles(): Iterable[File] = {
val subErrorFileNames = errors.filter(_._2.subClass.isDefined).map(error => {
getErrorPath(error._1) + ".md"
}).toSet

val docsDir = getWorkspaceFilePath("docs")
val orphans = FileUtils.listFiles(docsDir.toFile, Array("md"), false).asScala.filter { f =>
(f.getName.startsWith("sql-error-conditions-") && f.getName.endsWith("-error-class.md")) &&
!subErrorFileNames.contains(f.getName)
}
orphans
}

val sqlErrorParentDocContent = errors.toSeq.filter(!_._1.startsWith("_LEGACY_ERROR"))
.sortBy(_._1).map(error => {
val name = error._1
val info = error._2
if (info.subClass.isDefined) {
val title = s"[$name](${getErrorPath(name)}.html)"
s"""|### $title
|
|${getSqlState(info.sqlState)}
|
|${quoteParameter(info.messageTemplate, name)}
|
|For more details see $title
|""".stripMargin
} else {
s"""|### $name
|
|${getSqlState(info.sqlState)}
|
|${quoteParameter(info.messageTemplate, name)}
|""".stripMargin
}
}).mkString("\n")

val sqlErrorParentDoc =
s"""${getHeader("Error Conditions")}
|
|This is a list of common, named error conditions returned by Spark SQL.
|
|Also see [SQLSTATE Codes](sql-error-conditions-sqlstates.html).
|
|$sqlErrorParentDocContent""".stripMargin

errors.filter(_._2.subClass.isDefined).foreach(error => {
val name = error._1
val info = error._2

val subErrorContent = info.subClass.get.toSeq.sortBy(_._1).map(subError => {
s"""|## ${subError._1}
|
|${quoteParameter(subError._2.messageTemplate, s"$name.${subError._1}")}
|""".stripMargin
}).mkString("\n")

val subErrorDoc =
s"""${getHeader(name + " error class")}
|
|${getSqlState(info.sqlState)}
|
|${quoteParameter(info.messageTemplate, name)}
|
|This error class has the following derived error classes:
|
|$subErrorContent
|""".stripMargin

val errorDocPath = getWorkspaceFilePath("docs", getErrorPath(name) + ".md")
val errorsInDoc = if (errorDocPath.toFile.exists()) {
IOUtils.toString(errorDocPath.toUri, StandardCharsets.UTF_8)
} else {
""
}
if (regenerateGoldenFiles) {
if (subErrorDoc.trim != errorsInDoc.trim) {
logInfo(s"Regenerating sub error class document $errorDocPath")
if (errorDocPath.toFile.exists()) {
Files.delete(errorDocPath)
}
FileUtils.writeStringToFile(
errorDocPath.toFile,
subErrorDoc + lineSeparator,
StandardCharsets.UTF_8)
}
} else {
assert(subErrorDoc.trim == errorsInDoc.trim,
"The error class document is not up to date. " +
s"Please regenerate it by running `$regenerateCommand`")
}
})

val parentDocPath = getWorkspaceFilePath("docs", "sql-error-conditions.md")
val commonErrorsInDoc = if (parentDocPath.toFile.exists()) {
IOUtils.toString(parentDocPath.toUri, StandardCharsets.UTF_8)
} else {
""
}
if (regenerateGoldenFiles) {
if (sqlErrorParentDoc.trim != commonErrorsInDoc.trim) {
logInfo(s"Regenerating error class document $parentDocPath")
if (parentDocPath.toFile.exists()) {
Files.delete(parentDocPath)
}
FileUtils.writeStringToFile(
parentDocPath.toFile,
sqlErrorParentDoc,
StandardCharsets.UTF_8)
}
} else {
assert(sqlErrorParentDoc.trim == commonErrorsInDoc.trim,
"The error class document is not up to date. " +
s"Please regenerate it by running `$regenerateCommand`")
}

val orphans = orphanedGoldenFiles()
if (regenerateGoldenFiles) {
if (orphans.nonEmpty) {
logInfo(s"Orphaned error class documents (${orphans.size}) is not empty, " +
"executing cleanup operation.")
orphans.foreach { f =>
FileUtils.deleteQuietly(f)
logInfo(s"Cleanup orphaned error document: ${f.getName}.")
}
} else {
logInfo("Orphaned error class documents is empty")
}
} else {
assert(orphans.isEmpty,
"Exist orphaned error class documents. " +
s"Please regenerate it by running `$regenerateCommand`")
}
}

test("Round trip") {
val tmpFile = File.createTempFile("rewritten", ".json")
val mapper = JsonMapper.builder()
Expand Down
24 changes: 13 additions & 11 deletions docs/README.md
Original file line number Diff line number Diff line change
Expand Up @@ -39,24 +39,22 @@ You need to have [Ruby 3][ruby] and [Python 3][python] installed. Make sure the
$ gem install bundler
```

After this all the required ruby dependencies can be installed from the `docs/` directory via the Bundler:
After this all the required Ruby dependencies can be installed from the `docs/` directory via Bundler:

```sh
$ cd docs
$ cd "$SPARK_HOME"/docs
$ bundle install
```

To generate the Python or R docs, you'll need to [install Pandoc](https://pandoc.org/installing.html).

### SQL and Python API Documentation (Optional)

To generate SQL and Python API docs, you'll need to install these libraries:
And the required Python dependencies can be installed using pip:

Run the following command from $SPARK_HOME:
```sh
$ cd "$SPARK_HOME"
$ pip install --upgrade -r dev/requirements.txt
```

To generate the Python or R API docs, you'll also need to [install Pandoc](https://pandoc.org/installing.html).

### R API Documentation (Optional)

If you'd like to generate R API documentation, install these libraries:
Expand Down Expand Up @@ -121,6 +119,10 @@ The jekyll plugin also generates the PySpark docs using [Sphinx](http://sphinx-d
using [roxygen2](https://cran.r-project.org/web/packages/roxygen2/index.html) and SQL docs
using [MkDocs](https://www.mkdocs.org/).

NOTE: To skip the step of building and copying over the Scala, Java, Python, R and SQL API docs, run `SKIP_API=1
bundle exec jekyll build`. In addition, `SKIP_SCALADOC=1`, `SKIP_PYTHONDOC=1`, `SKIP_RDOC=1` and `SKIP_SQLDOC=1` can be used
to skip a single step of the corresponding language. `SKIP_SCALADOC` indicates skipping both the Scala and Java docs.
To control what API docs get built, you can set any combination of the following shell variables before you run `bundle exec jekyll build`:
* `SKIP_API=1`: Skip building all the API docs.
* `SKIP_SCALADOC=1`: Skip the Scala and Java API docs.
* `SKIP_PYTHONDOC=1`: Skip the Python API docs.
* `SKIP_RDOC=1`: Skip the R API docs.
* `SKIP_SQLDOC=1`: Skip the SQL API docs.

45 changes: 0 additions & 45 deletions docs/_data/menu-sql.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -106,48 +106,3 @@
url: sql-ref-syntax.html#auxiliary-statements
- text: Error Conditions
url: sql-error-conditions.html
subitems:
- text: SQLSTATE Codes
url: sql-error-conditions-sqlstates.html
- text: COLLECTION_SIZE_LIMIT_EXCEEDED error class
url: sql-error-conditions-collection-size-limit-exceeded-error-class.html
- text: CONNECT error class
url: sql-error-conditions-connect-error-class.html
- text: DATATYPE_MISMATCH error class
url: sql-error-conditions-datatype-mismatch-error-class.html
- text: INCOMPATIBLE_DATA_FOR_TABLE error class
url: sql-error-conditions-incompatible-data-for-table-error-class.html
- text: INCOMPLETE_TYPE_DEFINITION error class
url: sql-error-conditions-incomplete-type-definition-error-class.html
- text: INCONSISTENT_BEHAVIOR_CROSS_VERSION error class
url: sql-error-conditions-inconsistent-behavior-cross-version-error-class.html
- text: INVALID_FORMAT error class
url: sql-error-conditions-invalid-format-error-class.html
- text: INVALID_OPTIONS error class
url: sql-error-conditions-invalid-options-error-class.html
- text: INVALID_PARAMETER_VALUE error class
url: sql-error-conditions-invalid-parameter-value-error-class.html
- text: INVALID_SCHEMA error class
url: sql-error-conditions-invalid-schema-error-class.html
- text: INVALID_SUBQUERY_EXPRESSION error class
url: sql-error-conditions-invalid-subquery-expression-error-class.html
- text: NOT_NULL_CONSTRAINT_VIOLATION error class
url: sql-error-conditions-not-null-constraint-violation-error-class.html
- text: UNRESOLVED_COLUMN error class
url: sql-error-conditions-unresolved-column-error-class.html
- text: UNRESOLVED_FIELD error class
url: sql-error-conditions-unresolved-field-error-class.html
- text: UNRESOLVED_MAP_KEY error class
url: sql-error-conditions-unresolved-map-key-error-class.html
- text: UNSUPPORTED_DESERIALIZER error class
url: sql-error-conditions-unsupported-deserializer-error-class.html
- text: UNSUPPORTED_FEATURE error class
url: sql-error-conditions-unsupported-feature-error-class.html
- text: UNSUPPORTED_GENERATOR error class
url: sql-error-conditions-unsupported-generator-error-class.html
- text: UNSUPPORTED_SAVE_MODE error class
url: sql-error-conditions-unsupported-save-mode-error-class.html
- text: UNSUPPORTED_SUBQUERY_EXPRESSION_CATEGORY error class
url: sql-error-conditions-unsupported-subquery-expression-category-error-class.html
- text: WRONG_NUM_ARGS error class
url: sql-error-conditions-wrong-num-args-error-class.html
8 changes: 8 additions & 0 deletions docs/_plugins/build_api_docs.rb
Original file line number Diff line number Diff line change
Expand Up @@ -188,6 +188,14 @@ def build_sql_docs
cp_r("../sql/site/.", "api/sql")
end

def build_error_docs
print_header "Building error docs."
system("python '#{SPARK_PROJECT_ROOT}/docs/util/build-error-docs.py'") \
|| raise("Error doc generation failed")
end

build_error_docs

if not (ENV['SKIP_API'] == '1')
if not (ENV['SKIP_SCALADOC'] == '1')
build_scala_and_java_docs
Expand Down
23 changes: 23 additions & 0 deletions docs/css/custom.css
Original file line number Diff line number Diff line change
Expand Up @@ -988,3 +988,26 @@ table.spark-config th:nth-child(4),
table.spark-config td:nth-child(4) {
width: 90px;
}

table#error-conditions {
table-layout: fixed;

span.error-condition-name {
/* Any error names that wrap will have the wrapped lines indented
relative to the first line thanks to these three rules.
*/
padding-left: 2em;
text-indent: -2em;
display: block;
}

th:nth-child(1),
td:nth-child(1) {
/* width: 85px; */
width: 105px;
}

td.error-sub-condition {
padding-left: 2.5em;
}
}