In [None]:
# | default_exp docstring_generator

# Docstring Generator

In [None]:
# | export

from typing import *
import ast
from pathlib import Path

import nbformat

In [None]:
from tempfile import TemporaryDirectory

## Create test notebook programmatically using nbformat

In [None]:
default_export = "# | default_export test"
export_libs = """
# | export

from typing import *
import os
from pathlib import Path
# from contextlib import contextmanager
"""
test_libs = """
import shutil
from tempfile import TemporaryDirectory
"""
header = "# Markdown title"
code_cell_1 = """
# | export

# Vehicle class
class Vehicle:
    # Constructor function
    def __init__(self, brand, model, type):
        \"""Constructor function
        
        Args:
            brand: Vehicle's brand
            model: Vehicle's model
            type: Vehicle's type
        \"""
        self.brand = brand
        self.model = model
        self.type = type
        self.gas_tank_size = 14
        self.fuel_level = 0
    
    # fuel_up function
    def fuel_up(self):
        # comment goes here
        self.fuel_level = self.gas_tank_size
        print('Gas tank is now full.')

    # drive function
    async def drive(self):
        print(f'The {self.model} is now driving.')
"""
code_cell_2 = """
v = Vehicle("honda", "city", "hatchback")
v.drive()
"""
text_cell_1 = """
This is a sample markdown cell. The texts goes here
"""
code_cell_3 = """
def say_hello(name: str) -> str:
    return f"Hello, {name}"
    
async def function_with_docstring(name: str) -> str:
    \""" This function already has docstring\"""
    return f"I won't say hello, {name}"
    
async def dont_say_hello(name: str) -> str:
    return f"I won't say hello, {name}"
    
"""

code_cell_4 = """
def outer_func():
    def inner_func():
        print("Hello, World!")
    inner_func()
"""

code_cell_5 = """
@contextmanager
def my_context_mgr():
    try:
        yield
    finally:
        pass
"""


def create_test_notebook(nb_path: Path):
    nb = nbformat.v4.new_notebook()
    nb["cells"] = [
        nbformat.v4.new_code_cell(default_export),
        nbformat.v4.new_code_cell(export_libs),
        nbformat.v4.new_code_cell(test_libs),
        nbformat.v4.new_markdown_cell(header),
        nbformat.v4.new_code_cell(code_cell_1),
        nbformat.v4.new_code_cell(code_cell_2),
        nbformat.v4.new_markdown_cell(text_cell_1),
        nbformat.v4.new_code_cell(code_cell_3),
        nbformat.v4.new_code_cell(code_cell_4),
        nbformat.v4.new_code_cell(code_cell_5),
    ]
    nbformat.write(nb, str(nb_path))

In [None]:
with TemporaryDirectory() as d:
    nbs_path = Path(d) / "nbs"
    nbs_path.mkdir(parents=True)

    nb_path = nbs_path / "test.ipynb"
    create_test_notebook(nb_path)

    assert nb_path.exists()

    with nb_path.open("r") as f:
        nb = nbformat.read(f, as_version=4)

for cell in nb.cells:
    print(cell["source"])

# | default_export test

# | export

from typing import *
import os
from pathlib import Path
# from contextlib import contextmanager


import shutil
from tempfile import TemporaryDirectory

# Markdown title

# | export

# Vehicle class
class Vehicle:
    # Constructor function
    def __init__(self, brand, model, type):
        """Constructor function
        
        Args:
            brand: Vehicle's brand
            model: Vehicle's model
            type: Vehicle's type
        """
        self.brand = brand
        self.model = model
        self.type = type
        self.gas_tank_size = 14
        self.fuel_level = 0
    
    # fuel_up function
    def fuel_up(self):
        # comment goes here
        self.fuel_level = self.gas_tank_size
        print('Gas tank is now full.')

    # drive function
    async def drive(self):
        print(f'The {self.model} is now driving.')


v = Vehicle("honda", "city", "hatchback")
v.drive()


This is a sample markdown cell. The texts goes her

## Docstring generator functions

In [None]:
# | export


def _generate_docstring_using_codex(code: str) -> str:
    return """Sample docstring

    Args:
        s: sample args

    Returns:
        sample return
"""

In [None]:
# | export


def _inject_docstring_to_source(
    source: str, docstring: str, lineno: int, node_col_offset: int
) -> str:
    """Inject a docstring into the source code at a specified line number.

    Args:
        source: the source code
        docstring: the docstring to be added
        lineno: the line number at which the docstring will be inserted
        node_col_offset: the number of spaces to indent the docstring

    Returns:
        The updated source code with the docstring injected
    """
    lines = source.split("\n")
    indented_docstring = "\n".join(
        [
            line
            if i == 0 or i == len(docstring.split("\n")) - 1
            else f"{' ' * node_col_offset}{line}"
            for i, line in enumerate(docstring.split("\n"))
        ]
    )
    indent = node_col_offset + 4
    lines.insert(lineno, f'{" " * indent}"""{indented_docstring}{" " * indent}"""')
    return "\n".join(lines)

In [None]:
source = """
async def drive(self):
    print(f'The {self.model} is now driving.')
"""

docstring = """Sample docstring

    Args:
        s: sample args

    Returns:
        sample return
"""

expected = """
async def drive(self):
    \"""Sample docstring

    Args:
        s: sample args

    Returns:
        sample return
    \"""
    print(f'The {self.model} is now driving.')
"""

lineno = 2
node_col_offset = 0
actual = _inject_docstring_to_source(source, docstring, lineno, node_col_offset)
print(actual)

assert actual == expected


async def drive(self):
    """Sample docstring

    Args:
        s: sample args

    Returns:
        sample return
    """
    print(f'The {self.model} is now driving.')



In [None]:
# | export


def _add_docstring(
    source: str,
    node: Union[ast.ClassDef, ast.FunctionDef, ast.AsyncFunctionDef],
    line_offset: int,
) -> Tuple[str, int]:
    """Add a docstring to the given node and update the source code.

    Args:
        source: the source code from the notebook cell
        node: the AST node representing a class definition, function definition,
            or async function definition
        line_offset: the number of lines added before the current
            node in the source

    Returns:
        A tuple containing the updated source code and the new line number offset
    """
    line_no = node.lineno + line_offset
    docstring = _generate_docstring_using_codex(ast.unparse(node))
    source = _inject_docstring_to_source(source, docstring, line_no, node.col_offset)
    line_offset += len(docstring.split("\n"))
    return source, line_offset

In [None]:
source = """
class test:
    CONST_VAL = 1
    def __init__(self, a):
        self.a = a
        
    async def drive(self):
        print(f'The {self.model} is now driving.')
"""

expected = """
class test:
    \"""Sample docstring

    Args:
        s: sample args

    Returns:
        sample return
    \"""
    CONST_VAL = 1
    def __init__(self, a):
        \"""Sample docstring
    
        Args:
            s: sample args
    
        Returns:
            sample return
        \"""
        self.a = a
        
    async def drive(self):
        \"""Sample docstring
    
        Args:
            s: sample args
    
        Returns:
            sample return
        \"""
        print(f'The {self.model} is now driving.')
"""

tree = ast.parse(source)
line_offset = 0
for node in tree.body:
    source, line_offset = _add_docstring(source, node, line_offset)

    for f in node.body:
        if not isinstance(f, (ast.FunctionDef, ast.AsyncFunctionDef)):
            continue
        source, line_offset = _add_docstring(source, f, line_offset)

print(source)
assert source == expected


class test:
    """Sample docstring

    Args:
        s: sample args

    Returns:
        sample return
    """
    CONST_VAL = 1
    def __init__(self, a):
        """Sample docstring
    
        Args:
            s: sample args
    
        Returns:
            sample return
        """
        self.a = a
        
    async def drive(self):
        """Sample docstring
    
        Args:
            s: sample args
    
        Returns:
            sample return
        """
        print(f'The {self.model} is now driving.')



In [None]:
# | export


def _check_and_add_docstrings_to_source(source: str) -> str:
    """Check for missing docstrings in the source code and add them if necessary.

    Args:
        source: the source code from the notebook cell

    Returns:
        The updated source code with added docstrings
    """

    tree = ast.parse(source)
    line_offset = 0

    for node in tree.body:
        if not isinstance(node, (ast.ClassDef, ast.FunctionDef, ast.AsyncFunctionDef)):
            continue
        if ast.get_docstring(node) is not None:
            continue

        # A class or a function without docstring
        source, line_offset = _add_docstring(source, node, line_offset)
        if not isinstance(node, ast.ClassDef):
            continue

        # Is a class and we need to check the functions inside
        # 29 - 36 make it as a recursive function
        for f in node.body:
            if not isinstance(f, (ast.FunctionDef, ast.AsyncFunctionDef)):
                continue
            if ast.get_docstring(f) is not None:
                continue

            # should be a function inside the class for which there is no docstring
            source, line_offset = _add_docstring(source, f, line_offset)

    return source

In [None]:
# | export


def add_docstring_to_notebook(nb_path: Union[str, Path], version: int = 4):
    """Add docstrings to the source
    
    This function reads through a Jupyter notebook cell by cell and 
    adds docstrings for classes and methods that do not have them.
    
    Args:
        nb_path: The notebook file path
        version: The version of the Jupyter notebook format
    """
    nb_path = Path(nb_path)
    nb = nbformat.read(nb_path, as_version=version)

    for cell in nb.cells:
        if cell.cell_type == "code":
            cell["source"] = _check_and_add_docstrings_to_source(cell["source"])

    nbformat.write(nb, nb_path)

In [None]:
with TemporaryDirectory() as d:
    nbs_path = Path(d) / "nbs"
    nbs_path.mkdir(parents=True)

    nb_path = nbs_path / "test.ipynb"
    create_test_notebook(nb_path)
    assert nb_path.exists()

    add_docstring_to_notebook(nb_path)

    with nb_path.open("r") as f:
        nb = nbformat.read(f, as_version=4)

for cell in nb.cells:
    if cell.cell_type == "code":
        tree = ast.parse(cell["source"])
        for node in tree.body:
            if not isinstance(node, (ast.ClassDef, ast.FunctionDef, ast.AsyncFunctionDef)):
                continue
            assert ast.get_docstring(node)
            print(ast.unparse(node))

class Vehicle:
    """Sample docstring

    Args:
        s: sample args

    Returns:
        sample return
    """

    def __init__(self, brand, model, type):
        """Constructor function
        
        Args:
            brand: Vehicle's brand
            model: Vehicle's model
            type: Vehicle's type
        """
        self.brand = brand
        self.model = model
        self.type = type
        self.gas_tank_size = 14
        self.fuel_level = 0

    def fuel_up(self):
        """Sample docstring
    
        Args:
            s: sample args
    
        Returns:
            sample return
        """
        self.fuel_level = self.gas_tank_size
        print('Gas tank is now full.')

    async def drive(self):
        """Sample docstring
    
        Args:
            s: sample args
    
        Returns:
            sample return
        """
        print(f'The {self.model} is now driving.')
def say_hello(name: str) -> str:
    """Sample docstring

    Args:
      