diff --git a/website/build_api_reference.sh b/website/build_api_reference.sh
index bb43ead6..fc2f982c 100755
--- a/website/build_api_reference.sh
+++ b/website/build_api_reference.sh
@@ -11,7 +11,7 @@ sed_no_backup() {
}
# Create docspec dump of this package's source code through pydoc-markdown
-pydoc-markdown --quiet --dump > docspec-dump.jsonl
+python ./pydoc-markdown/generate_ast.py > docspec-dump.jsonl
sed_no_backup "s#${PWD}/..#REPO_ROOT_PLACEHOLDER#g" docspec-dump.jsonl
# Create docpec dump from the right version of the apify-shared package
diff --git a/website/docusaurus.config.js b/website/docusaurus.config.js
index a697ed9c..cbd5d42b 100644
--- a/website/docusaurus.config.js
+++ b/website/docusaurus.config.js
@@ -66,6 +66,9 @@ module.exports = {
rehypePlugins: [externalLinkProcessor],
editUrl: 'https://github.com/apify/apify-sdk-python/edit/master/website/',
},
+ theme: {
+ customCss: require.resolve('./src/css/custom.css'),
+ },
}),
],
]),
diff --git a/website/pydoc-markdown/__init__.py b/website/pydoc-markdown/__init__.py
new file mode 100644
index 00000000..e69de29b
diff --git a/website/pydoc-markdown/generate_ast.py b/website/pydoc-markdown/generate_ast.py
new file mode 100644
index 00000000..3eb9ff07
--- /dev/null
+++ b/website/pydoc-markdown/generate_ast.py
@@ -0,0 +1,46 @@
+"""
+Replaces the default pydoc-markdown shell script with a custom Python script calling the pydoc-markdown API directly.
+
+This script generates an AST from the Python source code in the `src` directory and prints it as a JSON object.
+"""
+
+from pydoc_markdown.interfaces import Context
+from pydoc_markdown.contrib.loaders.python import PythonLoader
+from pydoc_markdown.contrib.processors.filter import FilterProcessor
+from pydoc_markdown.contrib.processors.crossref import CrossrefProcessor
+from pydoc_markdown.contrib.renderers.markdown import MarkdownReferenceResolver
+from google_docstring_processor import ApifyGoogleProcessor
+from docspec import dump_module
+
+import json
+import os
+
+project_path = os.path.join(os.path.dirname(os.path.realpath(__file__)), '../../src')
+
+context = Context(directory='.')
+loader = PythonLoader(search_path=[project_path])
+filter = FilterProcessor(
+ documented_only=False,
+ skip_empty_modules=False,
+)
+crossref = CrossrefProcessor()
+google = ApifyGoogleProcessor()
+
+loader.init(context)
+filter.init(context)
+google.init(context)
+crossref.init(context)
+
+processors = [filter, google, crossref]
+
+dump = []
+
+modules = list(loader.load())
+
+for processor in processors:
+ processor.process(modules, None)
+
+for module in modules:
+ dump.append(dump_module(module))
+
+print(json.dumps(dump, indent=4))
diff --git a/website/pydoc-markdown/google_docstring_processor.py b/website/pydoc-markdown/google_docstring_processor.py
new file mode 100644
index 00000000..0e01b303
--- /dev/null
+++ b/website/pydoc-markdown/google_docstring_processor.py
@@ -0,0 +1,183 @@
+# -*- coding: utf8 -*-
+# Copyright (c) 2019 Niklas Rosenstein
+# !!! Modified 2024 Jindřich Bär
+#
+# Permission is hereby granted, free of charge, to any person obtaining a copy
+# of this software and associated documentation files (the "Software"), to
+# deal in the Software without restriction, including without limitation the
+# rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
+# sell copies of the Software, and to permit persons to whom the Software is
+# furnished to do so, subject to the following conditions:
+#
+# The above copyright notice and this permission notice shall be included in
+# all copies or substantial portions of the Software.
+#
+# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+# FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
+# IN THE SOFTWARE.
+
+import dataclasses
+import re
+import typing as t
+
+import docspec
+
+from pydoc_markdown.contrib.processors.sphinx import generate_sections_markdown
+from pydoc_markdown.interfaces import Processor, Resolver
+
+import json
+
+
+@dataclasses.dataclass
+class ApifyGoogleProcessor(Processor):
+ """
+ This class implements the preprocessor for Google and PEP 257 docstrings. It converts
+ docstrings formatted in the Google docstyle to Markdown syntax.
+
+ References:
+
+ * https://sphinxcontrib-napoleon.readthedocs.io/en/latest/example_google.html
+ * https://www.python.org/dev/peps/pep-0257/
+
+ Example:
+
+ ```
+ Attributes:
+ module_level_variable1 (int): Module level variables may be documented in
+ either the ``Attributes`` section of the module docstring, or in an
+ inline docstring immediately following the variable.
+
+ Either form is acceptable, but the two should not be mixed. Choose
+ one convention to document module level variables and be consistent
+ with it.
+
+ Todo:
+ * For module TODOs
+ * You have to also use ``sphinx.ext.todo`` extension
+ ```
+
+ Renders as:
+
+ Attributes:
+ module_level_variable1 (int): Module level variables may be documented in
+ either the ``Attributes`` section of the module docstring, or in an
+ inline docstring immediately following the variable.
+
+ Either form is acceptable, but the two should not be mixed. Choose
+ one convention to document module level variables and be consistent
+ with it.
+
+ Todo:
+ * For module TODOs
+ * You have to also use ``sphinx.ext.todo`` extension
+
+ @doc:fmt:google
+ """
+
+ _param_res = [
+ re.compile(r"^(?P\S+):\s+(?P.+)$"),
+ re.compile(r"^(?P\S+)\s+\((?P[^)]+)\):\s+(?P.+)$"),
+ re.compile(r"^(?P\S+)\s+--\s+(?P.+)$"),
+ re.compile(r"^(?P\S+)\s+\{\[(?P\S+)\]\}\s+--\s+(?P.+)$"),
+ re.compile(r"^(?P\S+)\s+\{(?P\S+)\}\s+--\s+(?P.+)$"),
+ ]
+
+ _keywords_map = {
+ "Args:": "Arguments",
+ "Arguments:": "Arguments",
+ "Attributes:": "Attributes",
+ "Example:": "Example",
+ "Examples:": "Examples",
+ "Keyword Args:": "Arguments",
+ "Keyword Arguments:": "Arguments",
+ "Methods:": "Methods",
+ "Note:": "Notes",
+ "Notes:": "Notes",
+ "Other Parameters:": "Arguments",
+ "Parameters:": "Arguments",
+ "Return:": "Returns",
+ "Returns:": "Returns",
+ "Raises:": "Raises",
+ "References:": "References",
+ "See Also:": "See Also",
+ "Todo:": "Todo",
+ "Warning:": "Warnings",
+ "Warnings:": "Warnings",
+ "Warns:": "Warns",
+ "Yield:": "Yields",
+ "Yields:": "Yields",
+ }
+
+ def check_docstring_format(self, docstring: str) -> bool:
+ for section_name in self._keywords_map:
+ if section_name in docstring:
+ return True
+ return False
+
+ def process(self, modules: t.List[docspec.Module], resolver: t.Optional[Resolver]) -> None:
+ docspec.visit(modules, self._process)
+
+ def _process(self, node: docspec.ApiObject):
+ if not node.docstring:
+ return
+
+ lines = []
+ sections = []
+ current_lines: t.List[str] = []
+ in_codeblock = False
+ keyword = None
+ multiline_argument_offset = -1
+
+ def _commit():
+ if keyword:
+ sections.append({keyword: list(current_lines)})
+ else:
+ lines.extend(current_lines)
+ current_lines.clear()
+
+ for line in node.docstring.content.split("\n"):
+ multiline_argument_offset += 1
+ if line.lstrip().startswith("```"):
+ in_codeblock = not in_codeblock
+ current_lines.append(line)
+ continue
+
+ if in_codeblock:
+ current_lines.append(line)
+ continue
+
+ line = line.strip()
+ if line in self._keywords_map:
+ _commit()
+ keyword = self._keywords_map[line]
+ continue
+
+ if keyword is None:
+ lines.append(line)
+ continue
+
+ for param_re in self._param_res:
+ param_match = param_re.match(line)
+ if param_match:
+ current_lines.append(param_match.groupdict())
+ multiline_argument_offset = 0
+ break
+
+ if not param_match:
+ if multiline_argument_offset == 1:
+ current_lines[-1]["desc"] += "\n" + line
+ multiline_argument_offset = 0
+ else:
+ current_lines.append(line)
+
+ _commit()
+ node.docstring.content = json.dumps({
+ "text": "\n".join(lines),
+ "sections": sections,
+ }, indent=None)
+
+
diff --git a/website/src/css/custom.css b/website/src/css/custom.css
new file mode 100644
index 00000000..b3591068
--- /dev/null
+++ b/website/src/css/custom.css
@@ -0,0 +1,12 @@
+.tsd-parameters li {
+ margin-bottom: 16px;
+}
+
+.tsd-parameters-title {
+ font-size: 16px;
+ margin-bottom: 16px !important;
+}
+
+.tsd-returns-title {
+ font-size: 16px;
+}
diff --git a/website/transformDocs.js b/website/transformDocs.js
index dc98e635..71971f34 100644
--- a/website/transformDocs.js
+++ b/website/transformDocs.js
@@ -134,27 +134,6 @@ function sortChildren(typedocMember) {
typedocMember.groups.sort((a, b) => groupSort(a.title, b.title));
}
-// Parses the arguments and return value description of a method from its docstring
-function extractArgsAndReturns(docstring) {
- const parameters = (docstring
- .split('Args:')[1] ?? '').split('Returns:')[0] // Get the part between Args: and Returns:
- .split(/(^|\n)\s*([\w]+)\s*\(.*?\)\s*:\s*/) // Magic regex which splits the arguments into an array, and removes the argument types
- .filter(x => x.length > 1) // Remove empty strings
- .reduce((acc, curr, idx, arr) => { // Collect the argument names and types into an object
- if(idx % 2 === 0){
- return {...acc, [curr]: arr[idx+1]} // If the index is even, the current string is an argument name, and the next string is its type
- }
- return acc;
- }, {});
-
- const returns = (docstring
- .split('Returns:')[1] ?? '').split('Raises:')[0] // Get the part between Returns: and Raises:
- .split(':')[1]?.trim() || undefined; // Split the return value into its type and description, return description
-
-
- return { parameters, returns };
-}
-
// Objects with decorators named 'ignore_docs' or with empty docstrings will be ignored
function isHidden(member) {
return member.decorations?.some(d => d.name === 'ignore_docs')
@@ -211,6 +190,24 @@ function convertObject(obj, parent, module) {
member.name = 'Actor';
}
+ let docstring = { text: member.docstring?.content ?? '' };
+ try {
+ docstring = JSON.parse(docstring.text);
+
+ docstring.args = docstring.sections.find((section) => Object.keys(section)[0] === 'Arguments')['Arguments'] ?? [];
+
+ docstring.args = docstring.args.reduce((acc, arg) => {
+ acc[arg.param] = arg.desc;
+ return acc;
+ }, {});
+
+ docstring.returns = docstring.sections.find((section) => Object.keys(section)[0] === 'Returns')['Returns'] ?? [];
+
+ docstring.returns = docstring.returns.join('\n');
+ } catch {
+ // Do nothing
+ }
+
// Create the Typedoc member object
let typedocMember = {
id: oid++,
@@ -222,7 +219,7 @@ function convertObject(obj, parent, module) {
comment: member.docstring ? {
summary: [{
kind: 'text',
- text: member.docstring?.content,
+ text: docstring.text,
}],
} : undefined,
type: typedocType,
@@ -241,8 +238,6 @@ function convertObject(obj, parent, module) {
}
if(typedocMember.kindString === 'Method') {
- const { parameters, returns } = extractArgsAndReturns(member.docstring?.content ?? '');
-
typedocMember.signatures = [{
id: oid++,
name: member.name,
@@ -250,14 +245,13 @@ function convertObject(obj, parent, module) {
kind: 4096,
kindString: 'Call signature',
flags: {},
- comment: member.docstring ? {
+ comment: docstring.text ? {
summary: [{
kind: 'text',
- text: member.docstring?.content
- .replace(/\**(Args|Arguments|Returns)[\s\S]+/, ''),
+ text: docstring?.text,
}],
- blockTags: returns ? [
- { tag: '@returns', content: [{ kind: 'text', text: returns }] },
+ blockTags: docstring?.returns ? [
+ { tag: '@returns', content: [{ kind: 'text', text: docstring.returns }] },
] : undefined,
} : undefined,
type: inferTypedocType(member.return_type),
@@ -271,10 +265,10 @@ function convertObject(obj, parent, module) {
'keyword-only': arg.type === 'KEYWORD_ONLY' ? 'true' : undefined,
},
type: inferTypedocType(arg.datatype),
- comment: parameters[arg.name] ? {
+ comment: docstring.args?.[arg.name] ? {
summary: [{
kind: 'text',
- text: parameters[arg.name]
+ text: docstring.args[arg.name]
}]
} : undefined,
defaultValue: arg.default_value,
@@ -330,15 +324,14 @@ function main() {
// Load the docspec dump files of this module and of apify-shared
const thisPackageDocspecDump = fs.readFileSync('docspec-dump.jsonl', 'utf8');
- const thisPackageModules = thisPackageDocspecDump.split('\n').filter((line) => line !== '');
+ const thisPackageModules = JSON.parse(thisPackageDocspecDump)
const apifySharedDocspecDump = fs.readFileSync('apify-shared-docspec-dump.jsonl', 'utf8');
const apifySharedModules = apifySharedDocspecDump.split('\n').filter((line) => line !== '');
// Convert all the modules, store them in the root object
- for (const module of [...thisPackageModules, ...apifySharedModules]) {
- const parsedModule = JSON.parse(module);
- convertObject(parsedModule, typedocApiReference, parsedModule);
+ for (const module of thisPackageModules) {
+ convertObject(module, typedocApiReference, module);
};
// Recursively fix references (collect names->ids of all the named entities and then inject those in the reference objects)