diff --git a/website/build_api_reference.sh b/website/build_api_reference.sh index bb43ead6..fc2f982c 100755 --- a/website/build_api_reference.sh +++ b/website/build_api_reference.sh @@ -11,7 +11,7 @@ sed_no_backup() { } # Create docspec dump of this package's source code through pydoc-markdown -pydoc-markdown --quiet --dump > docspec-dump.jsonl +python ./pydoc-markdown/generate_ast.py > docspec-dump.jsonl sed_no_backup "s#${PWD}/..#REPO_ROOT_PLACEHOLDER#g" docspec-dump.jsonl # Create docpec dump from the right version of the apify-shared package diff --git a/website/docusaurus.config.js b/website/docusaurus.config.js index a697ed9c..cbd5d42b 100644 --- a/website/docusaurus.config.js +++ b/website/docusaurus.config.js @@ -66,6 +66,9 @@ module.exports = { rehypePlugins: [externalLinkProcessor], editUrl: 'https://github.com/apify/apify-sdk-python/edit/master/website/', }, + theme: { + customCss: require.resolve('./src/css/custom.css'), + }, }), ], ]), diff --git a/website/pydoc-markdown/__init__.py b/website/pydoc-markdown/__init__.py new file mode 100644 index 00000000..e69de29b diff --git a/website/pydoc-markdown/generate_ast.py b/website/pydoc-markdown/generate_ast.py new file mode 100644 index 00000000..3eb9ff07 --- /dev/null +++ b/website/pydoc-markdown/generate_ast.py @@ -0,0 +1,46 @@ +""" +Replaces the default pydoc-markdown shell script with a custom Python script calling the pydoc-markdown API directly. + +This script generates an AST from the Python source code in the `src` directory and prints it as a JSON object. +""" + +from pydoc_markdown.interfaces import Context +from pydoc_markdown.contrib.loaders.python import PythonLoader +from pydoc_markdown.contrib.processors.filter import FilterProcessor +from pydoc_markdown.contrib.processors.crossref import CrossrefProcessor +from pydoc_markdown.contrib.renderers.markdown import MarkdownReferenceResolver +from google_docstring_processor import ApifyGoogleProcessor +from docspec import dump_module + +import json +import os + +project_path = os.path.join(os.path.dirname(os.path.realpath(__file__)), '../../src') + +context = Context(directory='.') +loader = PythonLoader(search_path=[project_path]) +filter = FilterProcessor( + documented_only=False, + skip_empty_modules=False, +) +crossref = CrossrefProcessor() +google = ApifyGoogleProcessor() + +loader.init(context) +filter.init(context) +google.init(context) +crossref.init(context) + +processors = [filter, google, crossref] + +dump = [] + +modules = list(loader.load()) + +for processor in processors: + processor.process(modules, None) + +for module in modules: + dump.append(dump_module(module)) + +print(json.dumps(dump, indent=4)) diff --git a/website/pydoc-markdown/google_docstring_processor.py b/website/pydoc-markdown/google_docstring_processor.py new file mode 100644 index 00000000..0e01b303 --- /dev/null +++ b/website/pydoc-markdown/google_docstring_processor.py @@ -0,0 +1,183 @@ +# -*- coding: utf8 -*- +# Copyright (c) 2019 Niklas Rosenstein +# !!! Modified 2024 Jindřich Bär +# +# Permission is hereby granted, free of charge, to any person obtaining a copy +# of this software and associated documentation files (the "Software"), to +# deal in the Software without restriction, including without limitation the +# rights to use, copy, modify, merge, publish, distribute, sublicense, and/or +# sell copies of the Software, and to permit persons to whom the Software is +# furnished to do so, subject to the following conditions: +# +# The above copyright notice and this permission notice shall be included in +# all copies or substantial portions of the Software. +# +# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING +# FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS +# IN THE SOFTWARE. + +import dataclasses +import re +import typing as t + +import docspec + +from pydoc_markdown.contrib.processors.sphinx import generate_sections_markdown +from pydoc_markdown.interfaces import Processor, Resolver + +import json + + +@dataclasses.dataclass +class ApifyGoogleProcessor(Processor): + """ + This class implements the preprocessor for Google and PEP 257 docstrings. It converts + docstrings formatted in the Google docstyle to Markdown syntax. + + References: + + * https://sphinxcontrib-napoleon.readthedocs.io/en/latest/example_google.html + * https://www.python.org/dev/peps/pep-0257/ + + Example: + + ``` + Attributes: + module_level_variable1 (int): Module level variables may be documented in + either the ``Attributes`` section of the module docstring, or in an + inline docstring immediately following the variable. + + Either form is acceptable, but the two should not be mixed. Choose + one convention to document module level variables and be consistent + with it. + + Todo: + * For module TODOs + * You have to also use ``sphinx.ext.todo`` extension + ``` + + Renders as: + + Attributes: + module_level_variable1 (int): Module level variables may be documented in + either the ``Attributes`` section of the module docstring, or in an + inline docstring immediately following the variable. + + Either form is acceptable, but the two should not be mixed. Choose + one convention to document module level variables and be consistent + with it. + + Todo: + * For module TODOs + * You have to also use ``sphinx.ext.todo`` extension + + @doc:fmt:google + """ + + _param_res = [ + re.compile(r"^(?P\S+):\s+(?P.+)$"), + re.compile(r"^(?P\S+)\s+\((?P[^)]+)\):\s+(?P.+)$"), + re.compile(r"^(?P\S+)\s+--\s+(?P.+)$"), + re.compile(r"^(?P\S+)\s+\{\[(?P\S+)\]\}\s+--\s+(?P.+)$"), + re.compile(r"^(?P\S+)\s+\{(?P\S+)\}\s+--\s+(?P.+)$"), + ] + + _keywords_map = { + "Args:": "Arguments", + "Arguments:": "Arguments", + "Attributes:": "Attributes", + "Example:": "Example", + "Examples:": "Examples", + "Keyword Args:": "Arguments", + "Keyword Arguments:": "Arguments", + "Methods:": "Methods", + "Note:": "Notes", + "Notes:": "Notes", + "Other Parameters:": "Arguments", + "Parameters:": "Arguments", + "Return:": "Returns", + "Returns:": "Returns", + "Raises:": "Raises", + "References:": "References", + "See Also:": "See Also", + "Todo:": "Todo", + "Warning:": "Warnings", + "Warnings:": "Warnings", + "Warns:": "Warns", + "Yield:": "Yields", + "Yields:": "Yields", + } + + def check_docstring_format(self, docstring: str) -> bool: + for section_name in self._keywords_map: + if section_name in docstring: + return True + return False + + def process(self, modules: t.List[docspec.Module], resolver: t.Optional[Resolver]) -> None: + docspec.visit(modules, self._process) + + def _process(self, node: docspec.ApiObject): + if not node.docstring: + return + + lines = [] + sections = [] + current_lines: t.List[str] = [] + in_codeblock = False + keyword = None + multiline_argument_offset = -1 + + def _commit(): + if keyword: + sections.append({keyword: list(current_lines)}) + else: + lines.extend(current_lines) + current_lines.clear() + + for line in node.docstring.content.split("\n"): + multiline_argument_offset += 1 + if line.lstrip().startswith("```"): + in_codeblock = not in_codeblock + current_lines.append(line) + continue + + if in_codeblock: + current_lines.append(line) + continue + + line = line.strip() + if line in self._keywords_map: + _commit() + keyword = self._keywords_map[line] + continue + + if keyword is None: + lines.append(line) + continue + + for param_re in self._param_res: + param_match = param_re.match(line) + if param_match: + current_lines.append(param_match.groupdict()) + multiline_argument_offset = 0 + break + + if not param_match: + if multiline_argument_offset == 1: + current_lines[-1]["desc"] += "\n" + line + multiline_argument_offset = 0 + else: + current_lines.append(line) + + _commit() + node.docstring.content = json.dumps({ + "text": "\n".join(lines), + "sections": sections, + }, indent=None) + + diff --git a/website/src/css/custom.css b/website/src/css/custom.css new file mode 100644 index 00000000..b3591068 --- /dev/null +++ b/website/src/css/custom.css @@ -0,0 +1,12 @@ +.tsd-parameters li { + margin-bottom: 16px; +} + +.tsd-parameters-title { + font-size: 16px; + margin-bottom: 16px !important; +} + +.tsd-returns-title { + font-size: 16px; +} diff --git a/website/transformDocs.js b/website/transformDocs.js index dc98e635..71971f34 100644 --- a/website/transformDocs.js +++ b/website/transformDocs.js @@ -134,27 +134,6 @@ function sortChildren(typedocMember) { typedocMember.groups.sort((a, b) => groupSort(a.title, b.title)); } -// Parses the arguments and return value description of a method from its docstring -function extractArgsAndReturns(docstring) { - const parameters = (docstring - .split('Args:')[1] ?? '').split('Returns:')[0] // Get the part between Args: and Returns: - .split(/(^|\n)\s*([\w]+)\s*\(.*?\)\s*:\s*/) // Magic regex which splits the arguments into an array, and removes the argument types - .filter(x => x.length > 1) // Remove empty strings - .reduce((acc, curr, idx, arr) => { // Collect the argument names and types into an object - if(idx % 2 === 0){ - return {...acc, [curr]: arr[idx+1]} // If the index is even, the current string is an argument name, and the next string is its type - } - return acc; - }, {}); - - const returns = (docstring - .split('Returns:')[1] ?? '').split('Raises:')[0] // Get the part between Returns: and Raises: - .split(':')[1]?.trim() || undefined; // Split the return value into its type and description, return description - - - return { parameters, returns }; -} - // Objects with decorators named 'ignore_docs' or with empty docstrings will be ignored function isHidden(member) { return member.decorations?.some(d => d.name === 'ignore_docs') @@ -211,6 +190,24 @@ function convertObject(obj, parent, module) { member.name = 'Actor'; } + let docstring = { text: member.docstring?.content ?? '' }; + try { + docstring = JSON.parse(docstring.text); + + docstring.args = docstring.sections.find((section) => Object.keys(section)[0] === 'Arguments')['Arguments'] ?? []; + + docstring.args = docstring.args.reduce((acc, arg) => { + acc[arg.param] = arg.desc; + return acc; + }, {}); + + docstring.returns = docstring.sections.find((section) => Object.keys(section)[0] === 'Returns')['Returns'] ?? []; + + docstring.returns = docstring.returns.join('\n'); + } catch { + // Do nothing + } + // Create the Typedoc member object let typedocMember = { id: oid++, @@ -222,7 +219,7 @@ function convertObject(obj, parent, module) { comment: member.docstring ? { summary: [{ kind: 'text', - text: member.docstring?.content, + text: docstring.text, }], } : undefined, type: typedocType, @@ -241,8 +238,6 @@ function convertObject(obj, parent, module) { } if(typedocMember.kindString === 'Method') { - const { parameters, returns } = extractArgsAndReturns(member.docstring?.content ?? ''); - typedocMember.signatures = [{ id: oid++, name: member.name, @@ -250,14 +245,13 @@ function convertObject(obj, parent, module) { kind: 4096, kindString: 'Call signature', flags: {}, - comment: member.docstring ? { + comment: docstring.text ? { summary: [{ kind: 'text', - text: member.docstring?.content - .replace(/\**(Args|Arguments|Returns)[\s\S]+/, ''), + text: docstring?.text, }], - blockTags: returns ? [ - { tag: '@returns', content: [{ kind: 'text', text: returns }] }, + blockTags: docstring?.returns ? [ + { tag: '@returns', content: [{ kind: 'text', text: docstring.returns }] }, ] : undefined, } : undefined, type: inferTypedocType(member.return_type), @@ -271,10 +265,10 @@ function convertObject(obj, parent, module) { 'keyword-only': arg.type === 'KEYWORD_ONLY' ? 'true' : undefined, }, type: inferTypedocType(arg.datatype), - comment: parameters[arg.name] ? { + comment: docstring.args?.[arg.name] ? { summary: [{ kind: 'text', - text: parameters[arg.name] + text: docstring.args[arg.name] }] } : undefined, defaultValue: arg.default_value, @@ -330,15 +324,14 @@ function main() { // Load the docspec dump files of this module and of apify-shared const thisPackageDocspecDump = fs.readFileSync('docspec-dump.jsonl', 'utf8'); - const thisPackageModules = thisPackageDocspecDump.split('\n').filter((line) => line !== ''); + const thisPackageModules = JSON.parse(thisPackageDocspecDump) const apifySharedDocspecDump = fs.readFileSync('apify-shared-docspec-dump.jsonl', 'utf8'); const apifySharedModules = apifySharedDocspecDump.split('\n').filter((line) => line !== ''); // Convert all the modules, store them in the root object - for (const module of [...thisPackageModules, ...apifySharedModules]) { - const parsedModule = JSON.parse(module); - convertObject(parsedModule, typedocApiReference, parsedModule); + for (const module of thisPackageModules) { + convertObject(module, typedocApiReference, module); }; // Recursively fix references (collect names->ids of all the named entities and then inject those in the reference objects)