Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion website/build_api_reference.sh
Original file line number Diff line number Diff line change
Expand Up @@ -11,7 +11,7 @@ sed_no_backup() {
}

# Create docspec dump of this package's source code through pydoc-markdown
pydoc-markdown --quiet --dump > docspec-dump.jsonl
python ./pydoc-markdown/generate_ast.py > docspec-dump.jsonl
sed_no_backup "s#${PWD}/..#REPO_ROOT_PLACEHOLDER#g" docspec-dump.jsonl

# Create docpec dump from the right version of the apify-shared package
Expand Down
3 changes: 3 additions & 0 deletions website/docusaurus.config.js
Original file line number Diff line number Diff line change
Expand Up @@ -66,6 +66,9 @@ module.exports = {
rehypePlugins: [externalLinkProcessor],
editUrl: 'https://github.com/apify/apify-sdk-python/edit/master/website/',
},
theme: {
customCss: require.resolve('./src/css/custom.css'),
},
}),
],
]),
Expand Down
Empty file.
46 changes: 46 additions & 0 deletions website/pydoc-markdown/generate_ast.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,46 @@
"""
Replaces the default pydoc-markdown shell script with a custom Python script calling the pydoc-markdown API directly.

This script generates an AST from the Python source code in the `src` directory and prints it as a JSON object.
"""

from pydoc_markdown.interfaces import Context
from pydoc_markdown.contrib.loaders.python import PythonLoader
from pydoc_markdown.contrib.processors.filter import FilterProcessor
from pydoc_markdown.contrib.processors.crossref import CrossrefProcessor
from pydoc_markdown.contrib.renderers.markdown import MarkdownReferenceResolver
from google_docstring_processor import ApifyGoogleProcessor
from docspec import dump_module

import json
import os

project_path = os.path.join(os.path.dirname(os.path.realpath(__file__)), '../../src')

context = Context(directory='.')
loader = PythonLoader(search_path=[project_path])
filter = FilterProcessor(
documented_only=False,
skip_empty_modules=False,
)
crossref = CrossrefProcessor()
google = ApifyGoogleProcessor()

loader.init(context)
filter.init(context)
google.init(context)
crossref.init(context)

processors = [filter, google, crossref]

dump = []

modules = list(loader.load())

for processor in processors:
processor.process(modules, None)

for module in modules:
dump.append(dump_module(module))

print(json.dumps(dump, indent=4))
183 changes: 183 additions & 0 deletions website/pydoc-markdown/google_docstring_processor.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,183 @@
# -*- coding: utf8 -*-
# Copyright (c) 2019 Niklas Rosenstein
# !!! Modified 2024 Jindřich Bär
#
# Permission is hereby granted, free of charge, to any person obtaining a copy
# of this software and associated documentation files (the "Software"), to
# deal in the Software without restriction, including without limitation the
# rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
# sell copies of the Software, and to permit persons to whom the Software is
# furnished to do so, subject to the following conditions:
#
# The above copyright notice and this permission notice shall be included in
# all copies or substantial portions of the Software.
#
# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
# FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
# IN THE SOFTWARE.

import dataclasses
import re
import typing as t

import docspec

from pydoc_markdown.contrib.processors.sphinx import generate_sections_markdown
from pydoc_markdown.interfaces import Processor, Resolver

import json


@dataclasses.dataclass
class ApifyGoogleProcessor(Processor):
"""
This class implements the preprocessor for Google and PEP 257 docstrings. It converts
docstrings formatted in the Google docstyle to Markdown syntax.

References:

* https://sphinxcontrib-napoleon.readthedocs.io/en/latest/example_google.html
* https://www.python.org/dev/peps/pep-0257/

Example:

```
Attributes:
module_level_variable1 (int): Module level variables may be documented in
either the ``Attributes`` section of the module docstring, or in an
inline docstring immediately following the variable.

Either form is acceptable, but the two should not be mixed. Choose
one convention to document module level variables and be consistent
with it.

Todo:
* For module TODOs
* You have to also use ``sphinx.ext.todo`` extension
```

Renders as:

Attributes:
module_level_variable1 (int): Module level variables may be documented in
either the ``Attributes`` section of the module docstring, or in an
inline docstring immediately following the variable.

Either form is acceptable, but the two should not be mixed. Choose
one convention to document module level variables and be consistent
with it.

Todo:
* For module TODOs
* You have to also use ``sphinx.ext.todo`` extension

@doc:fmt:google
"""

_param_res = [
re.compile(r"^(?P<param>\S+):\s+(?P<desc>.+)$"),
re.compile(r"^(?P<param>\S+)\s+\((?P<type>[^)]+)\):\s+(?P<desc>.+)$"),
re.compile(r"^(?P<param>\S+)\s+--\s+(?P<desc>.+)$"),
re.compile(r"^(?P<param>\S+)\s+\{\[(?P<type>\S+)\]\}\s+--\s+(?P<desc>.+)$"),
re.compile(r"^(?P<param>\S+)\s+\{(?P<type>\S+)\}\s+--\s+(?P<desc>.+)$"),
]

_keywords_map = {
"Args:": "Arguments",
"Arguments:": "Arguments",
"Attributes:": "Attributes",
"Example:": "Example",
"Examples:": "Examples",
"Keyword Args:": "Arguments",
"Keyword Arguments:": "Arguments",
"Methods:": "Methods",
"Note:": "Notes",
"Notes:": "Notes",
"Other Parameters:": "Arguments",
"Parameters:": "Arguments",
"Return:": "Returns",
"Returns:": "Returns",
"Raises:": "Raises",
"References:": "References",
"See Also:": "See Also",
"Todo:": "Todo",
"Warning:": "Warnings",
"Warnings:": "Warnings",
"Warns:": "Warns",
"Yield:": "Yields",
"Yields:": "Yields",
}

def check_docstring_format(self, docstring: str) -> bool:
for section_name in self._keywords_map:
if section_name in docstring:
return True
return False

def process(self, modules: t.List[docspec.Module], resolver: t.Optional[Resolver]) -> None:
docspec.visit(modules, self._process)

def _process(self, node: docspec.ApiObject):
if not node.docstring:
return

lines = []
sections = []
current_lines: t.List[str] = []
in_codeblock = False
keyword = None
multiline_argument_offset = -1

def _commit():
if keyword:
sections.append({keyword: list(current_lines)})
else:
lines.extend(current_lines)
current_lines.clear()

for line in node.docstring.content.split("\n"):
multiline_argument_offset += 1
if line.lstrip().startswith("```"):
in_codeblock = not in_codeblock
current_lines.append(line)
continue

if in_codeblock:
current_lines.append(line)
continue

line = line.strip()
if line in self._keywords_map:
_commit()
keyword = self._keywords_map[line]
continue

if keyword is None:
lines.append(line)
continue

for param_re in self._param_res:
param_match = param_re.match(line)
if param_match:
current_lines.append(param_match.groupdict())
multiline_argument_offset = 0
break

if not param_match:
if multiline_argument_offset == 1:
current_lines[-1]["desc"] += "\n" + line
multiline_argument_offset = 0
else:
current_lines.append(line)

_commit()
node.docstring.content = json.dumps({
"text": "\n".join(lines),
"sections": sections,
}, indent=None)


12 changes: 12 additions & 0 deletions website/src/css/custom.css
Original file line number Diff line number Diff line change
@@ -0,0 +1,12 @@
.tsd-parameters li {
margin-bottom: 16px;
}

.tsd-parameters-title {
font-size: 16px;
margin-bottom: 16px !important;
}

.tsd-returns-title {
font-size: 16px;
}
63 changes: 28 additions & 35 deletions website/transformDocs.js
Original file line number Diff line number Diff line change
Expand Up @@ -134,27 +134,6 @@ function sortChildren(typedocMember) {
typedocMember.groups.sort((a, b) => groupSort(a.title, b.title));
}

// Parses the arguments and return value description of a method from its docstring
function extractArgsAndReturns(docstring) {
const parameters = (docstring
.split('Args:')[1] ?? '').split('Returns:')[0] // Get the part between Args: and Returns:
.split(/(^|\n)\s*([\w]+)\s*\(.*?\)\s*:\s*/) // Magic regex which splits the arguments into an array, and removes the argument types
.filter(x => x.length > 1) // Remove empty strings
.reduce((acc, curr, idx, arr) => { // Collect the argument names and types into an object
if(idx % 2 === 0){
return {...acc, [curr]: arr[idx+1]} // If the index is even, the current string is an argument name, and the next string is its type
}
return acc;
}, {});

const returns = (docstring
.split('Returns:')[1] ?? '').split('Raises:')[0] // Get the part between Returns: and Raises:
.split(':')[1]?.trim() || undefined; // Split the return value into its type and description, return description


return { parameters, returns };
}

// Objects with decorators named 'ignore_docs' or with empty docstrings will be ignored
function isHidden(member) {
return member.decorations?.some(d => d.name === 'ignore_docs')
Expand Down Expand Up @@ -211,6 +190,24 @@ function convertObject(obj, parent, module) {
member.name = 'Actor';
}

let docstring = { text: member.docstring?.content ?? '' };
try {
docstring = JSON.parse(docstring.text);

docstring.args = docstring.sections.find((section) => Object.keys(section)[0] === 'Arguments')['Arguments'] ?? [];

docstring.args = docstring.args.reduce((acc, arg) => {
acc[arg.param] = arg.desc;
return acc;
}, {});

docstring.returns = docstring.sections.find((section) => Object.keys(section)[0] === 'Returns')['Returns'] ?? [];

docstring.returns = docstring.returns.join('\n');
} catch {
// Do nothing
}

// Create the Typedoc member object
let typedocMember = {
id: oid++,
Expand All @@ -222,7 +219,7 @@ function convertObject(obj, parent, module) {
comment: member.docstring ? {
summary: [{
kind: 'text',
text: member.docstring?.content,
text: docstring.text,
}],
} : undefined,
type: typedocType,
Expand All @@ -241,23 +238,20 @@ function convertObject(obj, parent, module) {
}

if(typedocMember.kindString === 'Method') {
const { parameters, returns } = extractArgsAndReturns(member.docstring?.content ?? '');

typedocMember.signatures = [{
id: oid++,
name: member.name,
modifiers: member.modifiers ?? [],
kind: 4096,
kindString: 'Call signature',
flags: {},
comment: member.docstring ? {
comment: docstring.text ? {
summary: [{
kind: 'text',
text: member.docstring?.content
.replace(/\**(Args|Arguments|Returns)[\s\S]+/, ''),
text: docstring?.text,
}],
blockTags: returns ? [
{ tag: '@returns', content: [{ kind: 'text', text: returns }] },
blockTags: docstring?.returns ? [
{ tag: '@returns', content: [{ kind: 'text', text: docstring.returns }] },
] : undefined,
} : undefined,
type: inferTypedocType(member.return_type),
Expand All @@ -271,10 +265,10 @@ function convertObject(obj, parent, module) {
'keyword-only': arg.type === 'KEYWORD_ONLY' ? 'true' : undefined,
},
type: inferTypedocType(arg.datatype),
comment: parameters[arg.name] ? {
comment: docstring.args?.[arg.name] ? {
summary: [{
kind: 'text',
text: parameters[arg.name]
text: docstring.args[arg.name]
}]
} : undefined,
defaultValue: arg.default_value,
Expand Down Expand Up @@ -330,15 +324,14 @@ function main() {

// Load the docspec dump files of this module and of apify-shared
const thisPackageDocspecDump = fs.readFileSync('docspec-dump.jsonl', 'utf8');
const thisPackageModules = thisPackageDocspecDump.split('\n').filter((line) => line !== '');
const thisPackageModules = JSON.parse(thisPackageDocspecDump)

const apifySharedDocspecDump = fs.readFileSync('apify-shared-docspec-dump.jsonl', 'utf8');
const apifySharedModules = apifySharedDocspecDump.split('\n').filter((line) => line !== '');

// Convert all the modules, store them in the root object
for (const module of [...thisPackageModules, ...apifySharedModules]) {
const parsedModule = JSON.parse(module);
convertObject(parsedModule, typedocApiReference, parsedModule);
for (const module of thisPackageModules) {
convertObject(module, typedocApiReference, module);
};

// Recursively fix references (collect names->ids of all the named entities and then inject those in the reference objects)
Expand Down