In [None]:
# | default_exp docstring_generator

# Docstring Generator

In [None]:
# | export

import time
import random
import ast
import textwrap
import os
import re
from typing import *
from pathlib import Path

import nbformat
import openai
import typer

from mypy_extensions import NamedArg

In [None]:
import shutil
from tempfile import TemporaryDirectory
from contextlib import contextmanager
import unittest.mock

import pytest

In [None]:
# | export


def _visit_functions(
    tree: ast.AST,
    *,
    source: str,
    start_lineno: int = 1,
    end_lineno: int = -1,
    callback: Callable[
        [ast.AST, str, int, int, NamedArg(List[Tuple[int, int, int, int]], "retval")],
        Any,
    ],
    **kwargs: Any
) -> None:
    """Walk the abstract syntax tree and call the callback function for every node found

    Args:
        tree: The python AST
        source: The source code
        start_lineno: The start line number
        end_lineno: The end line number
        callback: The callback function
        kwargs: The keyword arguments

    !!! note

        The above docstring is autogenerated by docstring-gen library (https://github.com/airtai/docstring-gen)
    """
    if end_lineno == -1:
        end_lineno = len(source.split("\n"))

    if isinstance(tree, (ast.AsyncFunctionDef, ast.FunctionDef, ast.ClassDef)):
        if ast.get_docstring(tree) is None:
            callback(tree, source, start_lineno, end_lineno, **kwargs)

    if hasattr(tree, "body"):
        for i, n in enumerate(tree.body):
            node_start_lineno = tree.body[i].lineno
            node_end_lineno = (
                tree.body[i + 1].lineno - 1 if i < (len(tree.body) - 1) else end_lineno
            )
            _visit_functions(
                n,
                source=source,
                start_lineno=node_start_lineno,
                end_lineno=node_end_lineno,
                callback=callback,
                **kwargs
            )

In [None]:
source = """
class Test:
    CONST_VAL = 1
    
    def __init__(self, a):
        self.a = a
        
    async def drive(self):
    
    
        print(f'The {self.model} is now driving.')
        
    def func_with_docstring():
        \""" Sample docstring\"""
        pass
        
def _check_and_add_docstrings_to_source(
    source: str, include_auto_gen_txt: bool, **kwargs
) -> str:
    source = _remove_auto_generated_docstring(source)    
    tree = ast.parse(source)
    line_offset = 0

    for node in tree.body:
        if not isinstance(node, (ast.ClassDef, ast.FunctionDef, ast.AsyncFunctionDef)):
            continue
        
        if ast.get_docstring(node) is not None:
            continue

        source, line_offset = _add_docstring(
            source, node, line_offset, include_auto_gen_txt, **kwargs
        )
        if not isinstance(node, ast.ClassDef):
            continue
        # Is a class and we need to check the functions inside
        # 29 - 36 make it as a recursive function
        for f in node.body:
            if not isinstance(f, (ast.FunctionDef, ast.AsyncFunctionDef)):
                continue
            
            if ast.get_docstring(f) is not None:
                continue

            # should be a function inside the class for which there is no docstring
            source, line_offset = _add_docstring(
                source, f, line_offset, include_auto_gen_txt, **kwargs
            )

    return source
    
def test_callback(tree, source, start_lineno, end_lineno):
    \"""Prints the source code of the given node

    Args:
        tree: The AST tree
        source: The source code
        start_lineno: The start line number
        end_lineno: The end line number
    \"""

    pass
"""
test_callback = unittest.mock.MagicMock()
tree = ast.parse(source)

_visit_functions(tree, source=source, callback=test_callback)

actual = [
    (start_lineno, end_lineno)
    for tree, source, start_lineno, end_lineno in [
        x[1] for x in test_callback.mock_calls
    ]
]
expected = [(2, 16), (5, 7), (8, 12), (17, 51)]

print(actual)
assert actual == expected, actual

[(2, 16), (5, 7), (8, 12), (17, 51)]


In [None]:
source_with_dosctring = '''
class Test:
    """ Sample docstring"""
    CONST_VAL = 1
    
    def __init__(self, a):
        """ Sample docstring"""
        self.a = a
        
    async def drive(self):
        """ Sample docstring"""
        print(f'The {self.model} is now driving.')
        
    def func_with_docstring():
        """ Sample docstring"""
        pass
'''
test_callback = unittest.mock.MagicMock()
tree = ast.parse(source_with_dosctring)

_visit_functions(tree, source=source_with_dosctring, callback=test_callback)

actual = [
    (start_lineno, end_lineno)
    for tree, source, start_lineno, end_lineno in [
        x[1] for x in test_callback.mock_calls
    ]
]
expected = []

print(actual)
assert actual == expected, actual

[]


In [None]:
def test_callback(tree, source, start_lineno, end_lineno):
    """Prints the source code of the given node

    Args:
        tree: The AST tree
        source: The source code
        start_lineno: The start line number
        end_lineno: The end line number

    """

    source_lines = source.split("\n")
    node_lines = source_lines[start_lineno - 1 : end_lineno]
    print("*" * 120)
    print("\n".join(node_lines))


tree = ast.parse(source)

_visit_functions(tree, source=source, callback=test_callback)

************************************************************************************************************************
class Test:
    CONST_VAL = 1
    
    def __init__(self, a):
        self.a = a
        
    async def drive(self):
    
    
        print(f'The {self.model} is now driving.')
        
    def func_with_docstring():
        """ Sample docstring"""
        pass
        
************************************************************************************************************************
    def __init__(self, a):
        self.a = a
        
************************************************************************************************************************
    async def drive(self):
    
    
        print(f'The {self.model} is now driving.')
        
************************************************************************************************************************
def _check_and_add_docstrings_to_source(
    source: str, include_auto_gen_txt: bool, **kwar

In [None]:
# | export


def _get_classes_and_functions(source: str) -> List[Tuple[int, int, int, int]]:
    """Get the classes and functions in a source file.

    Args:
        source: The source code of the file.
        recreate_auto_gen_docs: If set to True, the autogenerated docstrings from the previous runs will be replaced with the new one.


    Returns:
        A list of tuples of the form (start_lineno, end_lineno, start_col_offset, end_col_offset)

    !!! note

        The above docstring is autogenerated by docstring-gen library (https://github.com/airtai/docstring-gen)
    """
    retval: List[Tuple[int, int, int, int]] = []
    tree = ast.parse(source)

    def callback(tree, source, start_lineno, end_lineno, *, retval):
        """Callback function for the ast.walk function.

        Args:
            tree: The tree to walk
            source: The source code
            start_lineno: The starting line number
            end_lineno: The ending line number

        Keyword Args:
            retval: The return value

        Returns:
            The return value

        !!! note

            The above docstring is autogenerated by docstring-gen library (https://github.com/airtai/docstring-gen)
        """
        retval.append(
            (start_lineno, tree.body[0].lineno - 1, end_lineno, tree.body[0].col_offset)
        )

    _visit_functions(
        tree,
        source=source,
        callback=callback,
        retval=retval,
    )

    return retval

In [None]:
actual = _get_classes_and_functions(source)
expected = [(2, 2, 16, 4), (5, 5, 7, 8), (8, 10, 12, 8), (17, 19, 51, 4)]
print(actual)
assert actual == expected

[(2, 2, 16, 4), (5, 5, 7, 8), (8, 10, 12, 8), (17, 19, 51, 4)]


In [None]:
# | export


def _get_code_from_source(source: str, start_line_no: int, end_line_no: int) -> str:
    """Get code from source

    Args:
        source: The source code of the file.
        start_line_no: Start line number
        end_line_no: End line number

    Returns:
        The extracted code

    !!! note

        The above docstring is autogenerated by docstring-gen library (https://github.com/airtai/docstring-gen)
    """
    source_lines = source.split("\n")
    extracted_lines = source_lines[start_line_no - 1 : end_line_no]
    return "\n".join(extracted_lines)

In [None]:
linenos = _get_classes_and_functions(source)
print(linenos)
actual = [
    _get_code_from_source(source, start_line_no, end_line_no)
    for start_line_no, docstring_line_no, end_line_no, node_offset in linenos
]
expected = [
    'class Test:\n    CONST_VAL = 1\n    \n    def __init__(self, a):\n        self.a = a\n        \n    async def drive(self):\n    \n    \n        print(f\'The {self.model} is now driving.\')\n        \n    def func_with_docstring():\n        """ Sample docstring"""\n        pass\n        ',
    "    def __init__(self, a):\n        self.a = a\n        ",
    "    async def drive(self):\n    \n    \n        print(f'The {self.model} is now driving.')\n        ",
    "def _check_and_add_docstrings_to_source(\n    source: str, include_auto_gen_txt: bool, **kwargs\n) -> str:\n    source = _remove_auto_generated_docstring(source)    \n    tree = ast.parse(source)\n    line_offset = 0\n\n    for node in tree.body:\n        if not isinstance(node, (ast.ClassDef, ast.FunctionDef, ast.AsyncFunctionDef)):\n            continue\n        \n        if ast.get_docstring(node) is not None:\n            continue\n\n        source, line_offset = _add_docstring(\n            source, node, line_offset, include_auto_gen_txt, **kwargs\n        )\n        if not isinstance(node, ast.ClassDef):\n            continue\n        # Is a class and we need to check the functions inside\n        # 29 - 36 make it as a recursive function\n        for f in node.body:\n            if not isinstance(f, (ast.FunctionDef, ast.AsyncFunctionDef)):\n                continue\n            \n            if ast.get_docstring(f) is not None:\n                continue\n\n            # should be a function inside the class for which there is no docstring\n            source, line_offset = _add_docstring(\n                source, f, line_offset, include_auto_gen_txt, **kwargs\n            )\n\n    return source\n    ",
]
for f in actual:
    print("*" * 100)
    print(f)

assert actual == expected

[(2, 2, 16, 4), (5, 5, 7, 8), (8, 10, 12, 8), (17, 19, 51, 4)]
****************************************************************************************************
class Test:
    CONST_VAL = 1
    
    def __init__(self, a):
        self.a = a
        
    async def drive(self):
    
    
        print(f'The {self.model} is now driving.')
        
    def func_with_docstring():
        """ Sample docstring"""
        pass
        
****************************************************************************************************
    def __init__(self, a):
        self.a = a
        
****************************************************************************************************
    async def drive(self):
    
    
        print(f'The {self.model} is now driving.')
        
****************************************************************************************************
def _check_and_add_docstrings_to_source(
    source: str, include_auto_gen_txt: bool, **kwargs
) -> str:
    

In [None]:
# | export

# Reference: https://github.com/openai/openai-cookbook/blob/main/examples/How_to_handle_rate_limits.ipynb


def _retry_with_exponential_backoff(
    initial_delay: float = 1,
    exponential_base: float = 2,
    jitter: bool = True,
    max_retries: int = 10,
    max_wait: float = 60,
    errors: tuple = (
        openai.error.RateLimitError,
        openai.error.ServiceUnavailableError,
        openai.error.APIError,
    ),
) -> Callable:
    """Retry a function with exponential backoff."""

    def decorator(func):
        """Decorator to retry a function if it fails.

        Args:
            func: The function to be decorated
            max_retries: The maximum number of retries
            initial_delay: The initial delay
            exponential_base: The exponential base
            max_wait: The maximum wait
            jitter: The jitter

        Returns:
            The decorated function

        Raises:
            Exception: If the maximum number of retries is exceeded

        !!! note

            The above docstring is autogenerated by docstring-gen library (https://github.com/airtai/docstring-gen)
        """

        def wrapper(*args, **kwargs):
            num_retries = 0
            delay = initial_delay

            while True:
                try:
                    return func(*args, **kwargs)

                except errors as e:
                    num_retries += 1
                    if num_retries > max_retries:
                        raise Exception(
                            f"Maximum number of retries ({max_retries}) exceeded."
                        )
                    delay = min(
                        delay
                        * exponential_base
                        * (1 + jitter * random.random()),  # nosec
                        max_wait,
                    )
                    typer.secho(
                        f"Note: OpenAI's API rate limit reached. Command will automatically retry in {int(delay)} seconds. For more information visit: https://help.openai.com/en/articles/5955598-is-api-usage-subject-to-any-rate-limits",
                        fg=typer.colors.BLUE,
                    )
                    time.sleep(delay)

                except Exception as e:
                    raise e

        return wrapper

    return decorator


@_retry_with_exponential_backoff()
def _completions_with_backoff(*args, **kwargs):
    return openai.Completion.create(*args, **kwargs)

In [None]:
# | export

DEFAULT_PROMPT_TEMPLATE = '''
# Python 3.7

#docstring_gen: Original code:

def add_strings(s1: Optional[str] = None, s2: Optional[str] = None) -> str:
    if s1 is None or s2 is None:
        raise ValueError(Both s1 and s2 must be provided and must be of type string")
    return s1 + s2
    
#docstring_gen: A comprehensive PEP 257 Google style doctring, including a brief one-line summary of the function.

"""Add two strings

Args:
    s1: First string
    s2: Second string
    
Returns:
    The added string
    
Raises:
    ValueError: If s1 or s2 is None
"""

#docstring_gen: Original code:

class Person:
    def __init__(self, name, surname, age):
        self.name = name
        self.surname = surname
        self.age = age

    def info(self, additional=""):
        print('My name is :' + self.name + additional)
        
#docstring_gen: A comprehensive PEP 257 Google style doctring, including a brief one-line summary of the function.


"""A class to represent a person.

Attributes:
    name : first name of the person
    surname : family name of the person
    age : age of the person
"""

#docstring_gen: Original code:

{code}

#docstring_gen: A comprehensive PEP 257 Google style doctring, including a brief one-line summary of the function.

"""
'''

In [None]:
code = """
def drive(name: str) -> None:
    print(f'The {name} is now driving.')
"""

prompt = DEFAULT_PROMPT_TEMPLATE.format(code=code)

response = _completions_with_backoff(
    prompt=prompt,
    model="code-davinci-002",
    temperature=0.2,
    max_tokens=250,
    top_p=1.0,
    frequency_penalty=0.0,
    presence_penalty=0.0,
    stop=['"""'],
    n=1,
)

print(response)

{
  "choices": [
    {
      "finish_reason": "stop",
      "index": 0,
      "logprobs": null,
      "text": "Drive the car.\n\nArgs:\n    name: The name of the car.\n\nReturns:\n    None\n"
    }
  ],
  "created": 1674465717,
  "id": "cmpl-6bnJV2H2r0wo77OFvCIafAMrBWeTL",
  "model": "code-davinci-002",
  "object": "text_completion",
  "usage": {
    "completion_tokens": 27,
    "prompt_tokens": 397,
    "total_tokens": 424
  }
}


In [None]:
@contextmanager
def mock_openai_create():
    mock_choice = unittest.mock.MagicMock
    mock_choice.text = """Drive a car

    Args:
        name: The name of the car
    """
    mock_choices = unittest.mock.MagicMock
    mock_choices.choices = [mock_choice]

    with unittest.mock.patch("openai.Completion") as MockClass:
        MockClass.create.return_value = mock_choices
        yield

In [None]:
with mock_openai_create():
    response = openai.Completion.create()
    print(response.choices[0].text)

Drive a car

    Args:
        name: The name of the car
    


In [None]:
@_retry_with_exponential_backoff()
def mock_func():
    return "Success"


assert mock_func() == "Success"

# Test max retries exceeded
@_retry_with_exponential_backoff(max_retries=1)
def mock_func_error():
    raise openai.error.RateLimitError


with pytest.raises(Exception) as e:
    mock_func_error()

print(e.value)
assert str(e.value) == "Maximum number of retries (1) exceeded."

[34mNote: OpenAI's API rate limit reached. Command will automatically retry in 2 seconds. For more information visit: https://help.openai.com/en/articles/5955598-is-api-usage-subject-to-any-rate-limits[0m
Maximum number of retries (1) exceeded.


In [None]:
# | export


def _get_best_docstring(docstrings: List[str]) -> Optional[str]:
    """Get the best docstring from a list of docstrings.

    Args:
        docstrings: List of docstrings

    Returns:
        The best docstring

    !!! note

        The above docstring is autogenerated by docstring-gen library (https://github.com/airtai/docstring-gen)
    """
    docstrings = [d for d in docstrings if "Args:" in d or "Attributes:" in d]
    docstrings = [d for d in docstrings if "~~~~" not in d]
    return docstrings[0] if len(docstrings) > 0 else None

In [None]:
docstrings = [
    "    _check_and_add_docstrings_to_source(\n    source: str, include_auto_gen_txt: bool, **kwargs\n) -> str:\n    source = _remove_auto_generated_docstring(source)    \n    tree = ast.parse(source)\n    line_offset = 0\n\n    for node in tree.body:\n        if not isinstance(node, (ast.ClassDef, ast.FunctionDef, ast.AsyncFunctionDef)):\n            continue\n        \n        if ast.get_docstring(node) is not None:\n            continue\n\n        source, line_offset = _add_docstring(\n            source, node, line_offset, include_auto_gen_txt, **kwargs\n        )\n        if not isinstance(node, ast.ClassDef):\n            continue\n        ",
    '    _check_and_add_docstrings_to_source\n    ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~\n    This function checks if the source code has docstrings for all the functions and classes.\n    If not, it adds a docstring to the function/class.\n\n    Args:\n        source (str): The source code to be checked for docstrings.\n        include_auto_gen_txt (bool): If True, the docstring will include the text "Auto-generated by nbdev".\n        **kwargs: Additional keyword arguments.\n\n    Returns:\n        str: The source code with docstrings added.\n',
    "    This function checks if the source code has docstrings for all the functions and classes.\n    If not, it adds the docstrings.\n    It also removes the auto generated docstring.\n    \n    Args:\n        source: The source code as a string.\n        include_auto_gen_txt: Whether to include the auto generated text in the docstring.\n        **kwargs: Other keyword arguments.\n    \n    Returns:\n        The source code with docstrings added.\n    \n    Raises:\n        ValueError: If the source code is not a string.\n",
]

actual = _get_best_docstring(docstrings)
expected = docstrings[2]

print(actual)
assert actual == expected

    This function checks if the source code has docstrings for all the functions and classes.
    If not, it adds the docstrings.
    It also removes the auto generated docstring.
    
    Args:
        source: The source code as a string.
        include_auto_gen_txt: Whether to include the auto generated text in the docstring.
        **kwargs: Other keyword arguments.
    
    Returns:
        The source code with docstrings added.
    
    Raises:
        ValueError: If the source code is not a string.



In [None]:
docstrings = [
    "    _check_and_add_docstrings_to_source(\n    source: str, include_auto_gen_txt: bool, **kwargs\n) -> str:\n    source = _remove_auto_generated_docstring(source)    \n    tree = ast.parse(source)\n    line_offset = 0\n\n    for node in tree.body:\n        if not isinstance(node, (ast.ClassDef, ast.FunctionDef, ast.AsyncFunctionDef)):\n            continue\n        \n        if ast.get_docstring(node) is not None:\n            continue\n\n        source, line_offset = _add_docstring(\n            source, node, line_offset, include_auto_gen_txt, **kwargs\n        )\n        if not isinstance(node, ast.ClassDef):\n            continue\n        ",
    '    _check_and_add_docstrings_to_source\n    ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~\n    This function checks if the source code has docstrings for all the functions and classes.\n    If not, it adds a docstring to the function/class.\n\n    Args:\n        source (str): The source code to be checked for docstrings.\n        include_auto_gen_txt (bool): If True, the docstring will include the text "Auto-generated by nbdev".\n        **kwargs: Additional keyword arguments.\n\n    Returns:\n        str: The source code with docstrings added.\n',
    "    This function checks if the source code ~~~~~~~~~~~~~~~~ has docstrings for all the functions and classes.\n    If not, it adds the docstrings.\n    It also removes the auto generated docstring.\n    \n    Args:\n        source: The source code as a string.\n        include_auto_gen_txt: Whether to include the auto generated text in the docstring.\n        **kwargs: Other keyword arguments.\n    \n    Returns:\n        The source code with docstrings added.\n    \n    Raises:\n        ValueError: If the source code is not a string.\n",
]

actual = _get_best_docstring(docstrings)
expected = None

print(actual)
assert actual == expected

None


In [None]:
# | export

DOCSTRING_ERR = """!!! note
    
    Failed to generate docs
"""


def _generate_docstring_using_codex(
    code: str, prompt_template: str, **kwargs: Union[int, float, str, List[str]]
) -> str:
    """Generate a docstring using codex.

    Args:
        code: The code for which to generate a docstring.
        prompt_template: The template for the prompt.
        **kwargs: The keyword arguments.

    Returns:
        The generated docstring.

    !!! note

        The above docstring is autogenerated by docstring-gen library (https://github.com/airtai/docstring-gen)
    """
    prompt = prompt_template.format(code=code)
    response = _completions_with_backoff(prompt=prompt, **kwargs)

    best_docstring = _get_best_docstring([i.text for i in response.choices])
    retval = best_docstring if best_docstring is not None else DOCSTRING_ERR
    return retval

In [None]:
linenos = _get_classes_and_functions(source)
classes_and_functions = [
    _get_code_from_source(source, start_line_no, end_line_no)
    for start_line_no, docstring_line_no, end_line_no, node_offset in linenos
]
with mock_openai_create():
    for code in classes_and_functions:
        docstring = _generate_docstring_using_codex(
            code,
            DEFAULT_PROMPT_TEMPLATE,
            model="code-davinci-002",
            temperature=0.2,
            max_tokens=250,
            top_p=1.0,
            frequency_penalty=0.0,
            presence_penalty=0.0,
            stop=['"""'],
            n=3,
        )

        print(docstring)

Drive a car

    Args:
        name: The name of the car
    
Drive a car

    Args:
        name: The name of the car
    
Drive a car

    Args:
        name: The name of the car
    
Drive a car

    Args:
        name: The name of the car
    


In [None]:
# | export

AUTO_GEN_PERFIX = """
!!! note

"""
AUTO_GEN_BODY = "The above docstring is autogenerated by docstring-gen library"
AUTO_GEN_SUFFIX = "(https://github.com/airtai/docstring-gen)"

AUTO_GEN_TXT = AUTO_GEN_PERFIX + " " * 4 + AUTO_GEN_BODY + " " + AUTO_GEN_SUFFIX

In [None]:
# | export


def _add_auto_gen_txt(docstring: str) -> str:
    """Add the autogenerated by docstring-gen library text to the end of a docstring

    Args:
        docstring: The docstring to which the text will be added

    Returns:
        The docstring with the added text

    !!! note

        The above docstring is autogenerated by docstring-gen library (https://github.com/airtai/docstring-gen)
    """
    return docstring + AUTO_GEN_TXT + "\n"

In [None]:
docstring = """Sample docstring

Args:
    s: sample args

Returns:
    sample return
"""

expected = docstring + AUTO_GEN_TXT + "\n"

actual = _add_auto_gen_txt(docstring)
print(actual)

assert actual == expected

Sample docstring

Args:
    s: sample args

Returns:
    sample return

!!! note

    The above docstring is autogenerated by docstring-gen library (https://github.com/airtai/docstring-gen)



In [None]:
# | export


def _fix_docstring_indent(
    docstring: str, col_offset: int, *, include_auto_gen_txt: bool
) -> str:
    """Fix the indentation of a docstring.

    Args:
        docstring: The docstring to fix.
        col_offset: The column offset to use.
        include_auto_gen_txt: If True, include the auto-generated text in the docstring.

    Returns:
        The fixed docstring.

    !!! note

        The above docstring is autogenerated by docstring-gen library (https://github.com/airtai/docstring-gen)
    """
    lines = docstring.split("\n")
    rest = textwrap.dedent("\n".join(lines[1:]))
    retval = lines[0] + "\n" + rest

    if include_auto_gen_txt:
        retval = _add_auto_gen_txt(retval)

    retval = '"""' + retval + '"""'
    retval = textwrap.indent(retval, prefix=" " * col_offset)
    return retval

In [None]:
docstring = """Sample docstring

    Args:
        s: sample args

    Returns:
        sample return
"""

expected_false = '''"""Sample docstring

Args:
    s: sample args

Returns:
    sample return
"""'''
expected_true = (
    '''"""Sample docstring

Args:
    s: sample args

Returns:
    sample return
'''
    + AUTO_GEN_TXT
    + '\n"""'
)

for col_offset in [0, 4, 8]:
    for include_auto_gen_txt, expected in zip(
        [False, True], [expected_false, expected_true]
    ):
        expected = textwrap.indent(expected, prefix=" " * col_offset)
        actual = _fix_docstring_indent(
            docstring, col_offset, include_auto_gen_txt=include_auto_gen_txt
        )
        print(actual)
        assert actual == expected

"""Sample docstring

Args:
    s: sample args

Returns:
    sample return
"""
"""Sample docstring

Args:
    s: sample args

Returns:
    sample return

!!! note

    The above docstring is autogenerated by docstring-gen library (https://github.com/airtai/docstring-gen)
"""
    """Sample docstring

    Args:
        s: sample args

    Returns:
        sample return
    """
    """Sample docstring

    Args:
        s: sample args

    Returns:
        sample return

    !!! note

        The above docstring is autogenerated by docstring-gen library (https://github.com/airtai/docstring-gen)
    """
        """Sample docstring

        Args:
            s: sample args

        Returns:
            sample return
        """
        """Sample docstring

        Args:
            s: sample args

        Returns:
            sample return

        !!! note

            The above docstring is autogenerated by docstring-gen library (https://github.com/airtai/docstring-gen)
        """


In [None]:
linenos = _get_classes_and_functions(source)
classes_and_functions = [
    _get_code_from_source(source, start_line_no, end_line_no)
    for start_line_no, docstring_line_no, end_line_no, node_offset in linenos
]
print(len(classes_and_functions))

4


In [None]:
line_offset = 0
with mock_openai_create():
    docstrings = [
        _generate_docstring_using_codex(
            i,
            DEFAULT_PROMPT_TEMPLATE,
            model="code-davinci-002",
            temperature=0.2,
            max_tokens=250,
            top_p=1.0,
            frequency_penalty=0.0,
            presence_penalty=0.0,
            stop=['"""'],
            n=3,
        )
        for i in classes_and_functions
    ]

    print(docstrings)

source_lines = source.split("\n")
offsets = [node_offset for i, _, _, node_offset in linenos]

indented_docstrings = [
    _fix_docstring_indent(docstring, offset, include_auto_gen_txt=include_auto_gen_txt)
    for docstring, offset in zip(docstrings, offsets)
]
for i in indented_docstrings:
    print("*" * 100)
    print(i)

['Drive a car\n\n    Args:\n        name: The name of the car\n    ', 'Drive a car\n\n    Args:\n        name: The name of the car\n    ', 'Drive a car\n\n    Args:\n        name: The name of the car\n    ', 'Drive a car\n\n    Args:\n        name: The name of the car\n    ']
****************************************************************************************************
    """Drive a car

    Args:
        name: The name of the car

    !!! note

        The above docstring is autogenerated by docstring-gen library (https://github.com/airtai/docstring-gen)
    """
****************************************************************************************************
        """Drive a car

        Args:
            name: The name of the car

        !!! note

            The above docstring is autogenerated by docstring-gen library (https://github.com/airtai/docstring-gen)
        """
***************************************************************************************************

In [None]:
# | export


def _inject_docstring_to_source(
    source: str,
    indented_docstrings: List[str],
    linenos: List[Tuple[int, int, int, int]],
) -> str:
    """Injects docstrings into source code

    Args:
        source: The source code
        indented_docstrings: The docstrings to be injected
        linenos: The line numbers where the docstrings should be injected

    Returns:
        The source code with the injected docstrings

    !!! note

        The above docstring is autogenerated by docstring-gen library (https://github.com/airtai/docstring-gen)
    """
    source_lines = source.split("\n")
    line_offset = 0
    for docstring, (_, docstring_line_no, _, _) in zip(indented_docstrings, linenos):
        docstring_lines = docstring.split("\n")
        source_lines = (
            source_lines[: docstring_line_no + line_offset]
            + docstring_lines
            + source_lines[docstring_line_no + line_offset :]
        )
        line_offset += len(docstring_lines)

    return "\n".join(source_lines)

In [None]:
actual = _inject_docstring_to_source(source, indented_docstrings, linenos)
print(actual)

docstring = """Drive a car

    Args:
        name: The name of the car

    !!! note

        The above docstring is autogenerated by docstring-gen library (https://github.com/airtai/docstring-gen)
"""

assert docstring in actual


class Test:
    """Drive a car

    Args:
        name: The name of the car

    !!! note

        The above docstring is autogenerated by docstring-gen library (https://github.com/airtai/docstring-gen)
    """
    CONST_VAL = 1
    
    def __init__(self, a):
        """Drive a car

        Args:
            name: The name of the car

        !!! note

            The above docstring is autogenerated by docstring-gen library (https://github.com/airtai/docstring-gen)
        """
        self.a = a
        
    async def drive(self):
    
    
        """Drive a car

        Args:
            name: The name of the car

        !!! note

            The above docstring is autogenerated by docstring-gen library (https://github.com/airtai/docstring-gen)
        """
        print(f'The {self.model} is now driving.')
        
    def func_with_docstring():
        """ Sample docstring"""
        pass
        
def _check_and_add_docstrings_to_source(
    source: str, include_auto_gen_txt: bo

In [None]:
# | export


def _remove_auto_generated_docstring(source: str) -> str:
    """Remove the autogenerated docstrings from the source code.

    Args:
        source: The source code

    Returns:
        The source code without the auto generated docstring

    !!! note

        The above docstring is autogenerated by docstring-gen library (https://github.com/airtai/docstring-gen)
    """
    placeholder = "{DOCSTRING_PLACEHOLDER}"
    retval = re.sub(
        f'"""((?!""").)*?({AUTO_GEN_BODY}).*?"""', placeholder, source, flags=re.DOTALL
    )
    retval = "\n".join([l for l in retval.split("\n") if l.strip() != placeholder])
    return retval

In [None]:
_source = (
    '''
def decorator1(func):
    """Decorator function that takes a function as an argument and returns a function."""
    pass
    
def decorator2(func):
    """Sample docstring

    Args:
        arg 1: arg 1 description
        arg 2: arg 2 description

    '''
    + AUTO_GEN_TXT
    + '''
    """
    pass
'''
)

expected = '''
def decorator1(func):
    """Decorator function that takes a function as an argument and returns a function."""
    pass
    
def decorator2(func):
    pass
'''

actual = _remove_auto_generated_docstring(_source)
print(actual)

assert actual == expected


def decorator1(func):
    """Decorator function that takes a function as an argument and returns a function."""
    pass
    
def decorator2(func):
    pass



In [None]:
# | export


def _check_and_add_docstrings_to_source(
    source: str,
    include_auto_gen_txt: bool,
    recreate_auto_gen_docs: bool,
    **kwargs: Union[int, float, str, List[str]]
) -> str:
    """Check and add docstrings to classes and functions that don't have one.

    Args:
        source: Source code
        include_auto_gen_txt: Include auto gen text
        recreate_auto_gen_docs: If set to True, the autogenerated docstrings from the previous runs will be replaced with the new one.
        kwargs: Keyword arguments

    Returns:
        The source code with docstrings

    !!! note

        The above docstring is autogenerated by docstring-gen library (https://github.com/airtai/docstring-gen)
    """
    if recreate_auto_gen_docs:
        source = _remove_auto_generated_docstring(source)

    linenos = _get_classes_and_functions(source)
    if len(linenos) != 0:
        classes_and_functions = [
            _get_code_from_source(source, start_line_no, end_line_no)
            for start_line_no, docstring_line_no, end_line_no, node_offset in linenos
        ]

        docstrings = [
            _generate_docstring_using_codex(i, DEFAULT_PROMPT_TEMPLATE, **kwargs)
            for i in classes_and_functions
        ]
        offsets = [node_offset for i, _, _, node_offset in linenos]

        indented_docstrings = [
            _fix_docstring_indent(
                docstring, offset, include_auto_gen_txt=include_auto_gen_txt
            )
            for docstring, offset in zip(docstrings, offsets)
        ]
        source = _inject_docstring_to_source(source, indented_docstrings, linenos)

    return source

In [None]:
nl = "\n"
_source = (
    """
class Test:
    CONST_VAL = 1
    
    def __init__(self, a):
        self.a = a
        
def test_callback(tree, source, start_lineno, end_lineno):
    \"""Check and add docstrings to source

    Args:
        source: Source code
        include_auto_gen_txt: Include auto gen text
        recreate_auto_gen_docs: If set to True, the autogenerated docstrings from the previous runs will be replaced with the new one.
        kwargs: Keyword arguments

    Returns:
        The source code with docstrings

    """
    + f'{nl.join([textwrap.indent(l, " "*4) for l in AUTO_GEN_TXT.split(nl)])}'
    + """
    \"""

    pass

"""
)
with mock_openai_create():
    updated_source = _check_and_add_docstrings_to_source(
        _source,
        include_auto_gen_txt=True,
        recreate_auto_gen_docs=True,
        model="code-davinci-002",
        temperature=0,
        max_tokens=250,
        top_p=1.0,
        frequency_penalty=0.0,
        presence_penalty=0.0,
        stop=["#", '"""'],
    )

ast.parse(updated_source)

assert "Check and add docstrings to source" not in updated_source
print(updated_source)


class Test:
    """Drive a car

    Args:
        name: The name of the car

    !!! note

        The above docstring is autogenerated by docstring-gen library (https://github.com/airtai/docstring-gen)
    """
    CONST_VAL = 1
    
    def __init__(self, a):
        """Drive a car

        Args:
            name: The name of the car

        !!! note

            The above docstring is autogenerated by docstring-gen library (https://github.com/airtai/docstring-gen)
        """
        self.a = a
        
def test_callback(tree, source, start_lineno, end_lineno):

    """Drive a car

    Args:
        name: The name of the car

    !!! note

        The above docstring is autogenerated by docstring-gen library (https://github.com/airtai/docstring-gen)
    """
    pass




In [None]:
with mock_openai_create():
    updated_source = _check_and_add_docstrings_to_source(
        _source,
        include_auto_gen_txt=True,
        recreate_auto_gen_docs=False,
        model="code-davinci-002",
        temperature=0,
        max_tokens=250,
        top_p=1.0,
        frequency_penalty=0.0,
        presence_penalty=0.0,
        stop=["#", '"""'],
    )

ast.parse(updated_source)

assert "Check and add docstrings to source" in updated_source
print(updated_source)


class Test:
    """Drive a car

    Args:
        name: The name of the car

    !!! note

        The above docstring is autogenerated by docstring-gen library (https://github.com/airtai/docstring-gen)
    """
    CONST_VAL = 1
    
    def __init__(self, a):
        """Drive a car

        Args:
            name: The name of the car

        !!! note

            The above docstring is autogenerated by docstring-gen library (https://github.com/airtai/docstring-gen)
        """
        self.a = a
        
def test_callback(tree, source, start_lineno, end_lineno):
    """Check and add docstrings to source

    Args:
        source: Source code
        include_auto_gen_txt: Include auto gen text
        recreate_auto_gen_docs: If set to True, the autogenerated docstrings from the previous runs will be replaced with the new one.
        kwargs: Keyword arguments

    Returns:
        The source code with docstrings

    
    !!! note

        The above docstring is autogenerated by docs

In [None]:
# | export


def _get_files(p: Path) -> List[Path]:
    """Get Jupyter notebooks and Python files path in the directory.

    Args:
        p: Path to the directory

    Returns:
        A list of paths to the files in the directory

    Raises:
        ValueError: If the directory does not contain any Python files or notebooks

    !!! note

        The above docstring is autogenerated by docstring-gen library (https://github.com/airtai/docstring-gen)
    """
    exts = [".ipynb", ".py"]
    files = [
        f
        for f in p.rglob("*")
        if f.suffix in exts
        and not any(p.startswith(".") for p in f.parts)
        and not f.name.startswith("_")
    ]

    if len(files) == 0:
        raise ValueError(
            f"The directory {p.resolve()} does not contain any Python files or notebooks"
        )

    return files

In [None]:
with TemporaryDirectory() as d:
    nbs_path = Path(d) / "nbs"
    nbs_path.mkdir(parents=True)

    hidden_dir = nbs_path / ".hidden"
    hidden_dir.mkdir(parents=True)

    shutil.copyfile(Path("..") / "settings.ini", nbs_path / "settings.ini")
    shutil.copyfile(
        Path("..") / "fixtures" / "Test_Data.ipynb", nbs_path / "_test.ipynb"
    )
    shutil.copyfile(
        Path("..") / "fixtures" / "Test_Data.ipynb", nbs_path / "test.ipynb"
    )
    shutil.copyfile(
        Path("..") / "fixtures" / "Test_Data.ipynb", nbs_path / "test_1.ipynb"
    )

    shutil.copyfile(
        Path("..") / "fixtures" / "Test_Data.ipynb", hidden_dir / "test.ipynb"
    )
    shutil.copyfile(
        Path("..") / "fixtures" / "Test_Data.ipynb", hidden_dir / "test_1.ipynb"
    )

    for f in nbs_path.rglob("*"):
        print(f)

    files = _get_files(nbs_path)

    assert len(files) == 2
    print(f"\n\n{files}")
    assert files == [nbs_path / "test_1.ipynb", nbs_path / "test.ipynb"]

/tmp/tmpihbxxbc8/nbs/_test.ipynb
/tmp/tmpihbxxbc8/nbs/test_1.ipynb
/tmp/tmpihbxxbc8/nbs/settings.ini
/tmp/tmpihbxxbc8/nbs/.hidden
/tmp/tmpihbxxbc8/nbs/test.ipynb
/tmp/tmpihbxxbc8/nbs/.hidden/test_1.ipynb
/tmp/tmpihbxxbc8/nbs/.hidden/test.ipynb


[PosixPath('/tmp/tmpihbxxbc8/nbs/test_1.ipynb'), PosixPath('/tmp/tmpihbxxbc8/nbs/test.ipynb')]


In [None]:
with pytest.raises(ValueError) as e:

    with TemporaryDirectory() as d:
        nbs_path = Path(d) / "nbs"
        nbs_path.mkdir(parents=True)

        _get_files(nbs_path)

print(e.value)

The directory /tmp/tmpt9qyc1e_/nbs does not contain any Python files or notebooks


In [None]:
# | export


def _add_docstring_to_nb(
    file: Path,
    include_auto_gen_txt: bool,
    recreate_auto_gen_docs: bool,
    version: int = 4,
    **kwargs: Union[int, float, str, List[str]],
) -> None:
    """Add docstrings to a Jupyter notebook.

    Args:
        file (Path): Path to the notebook file.
        include_auto_gen_txt (bool): If True, include a text indicating that the docstring was auto-generated.
        recreate_auto_gen_docs (bool): If True, recreate the docstrings even if they already exist.
        version (int): The version of the notebook.
        **kwargs: Additional arguments to be passed to the function that generates the docstrings.

    Returns:
        None

    !!! note

        The above docstring is autogenerated by docstring-gen library (https://github.com/airtai/docstring-gen)
    """
    file_modified = False
    _f = nbformat.read(file, as_version=version)

    for cell in _f.cells:
        if cell.cell_type == "code":
            original_src = cell["source"]
            try:
                updated_src = _check_and_add_docstrings_to_source(
                    original_src, include_auto_gen_txt, recreate_auto_gen_docs, **kwargs
                )
                if not file_modified:
                    file_modified = original_src != updated_src
                cell["source"] = updated_src
            except SyntaxError as e:
                typer.secho(
                    f"WARNING: Unable to parse the below cell contents in {file} due to: {e}. Skipping the cell for docstring generation.",
                    fg=typer.colors.YELLOW,
                )
                typer.echo(original_src)
                cell["source"] = original_src
                
                
    nbformat.write(_f, file)

    if file_modified or recreate_auto_gen_docs:
        typer.secho(f"Successfully added docstrings to {file}", fg=typer.colors.CYAN)


def _add_docstring_to_py(
    file: Path,
    include_auto_gen_txt: bool,
    recreate_auto_gen_docs: bool,
    **kwargs: Union[int, float, str, List[str]],
) -> None:
    """Add docstrings to a python file.

    Args:
        file: The path to the python file.
        include_auto_gen_txt: Whether to include a text indicating that the docstring is auto-generated.
        recreate_auto_gen_docs: Whether to recreate the docstrings even if they already exist.
        kwargs: Additional keyword arguments.

    Returns:
        None

    Raises:
        ValueError: If file is not a path to a python file.

    !!! note

        The above docstring is autogenerated by docstring-gen library (https://github.com/airtai/docstring-gen)
    """
    try:
        file_modified = False
        with file.open("r") as f:
            source = f.read()
        original_src = source
        updated_src = _check_and_add_docstrings_to_source(
            source, include_auto_gen_txt, recreate_auto_gen_docs, **kwargs
        )

        with file.open("w") as f:
            f.write(updated_src)

        if not file_modified:
            file_modified = original_src != updated_src

        if file_modified or recreate_auto_gen_docs:
            typer.secho(f"Successfully added docstrings to {file}", fg=typer.colors.CYAN)
            
    except SyntaxError as e:
        typer.secho(
            f"WARNING: Unable to parse the {file} due to: {e}. Skipping the file for docstring generation.",
            fg=typer.colors.YELLOW,
        )


def add_docstring_to_source(
    path: Union[str, Path],
    include_auto_gen_txt: bool = True,
    recreate_auto_gen_docs: bool = False,
    model: str = "code-davinci-002",
    temperature: float = 0.2,
    max_tokens: int = 250,
    top_p: float = 1.0,
    n: int = 3,
) -> None:
    """Reads a Jupyter notebook or Python file, or a directory containing these files, and adds docstrings to classes and methods that do not have them.

    Args:
        path: The path to the Jupyter notebook or Python file, or a directory containing these files.
        include_auto_gen_txt: If set to True, a note indicating that the docstring was autogenerated by docstring-gen library will be added to the end.
        recreate_auto_gen_docs: If set to True, the autogenerated docstrings from the previous runs will be replaced with the new one.
        model: The name of the Codex model that will be used to generate docstrings.
        temperature: Setting the temperature close to zero produces better results, whereas higher temperatures produce more complex, and sometimes irrelevant docstrings.
        max_tokens: The maximum number of tokens to be used when generating a docstring for a function or class. Please note that a higher number will deplete your token quota faster.
        top_p: You can also specify a top-P value from 0-1 to achieve similar results to changing the temperature. According to the Open AI documentation, it is generally recommended to change either this or the temperature but not both.
        n: The number of docstrings to be generated for each function or class, with the best one being added to the source code. Please note that a higher number will deplete your token quota faster.

    Returns:
        None

    !!! note

        The above docstring is autogenerated by docstring-gen library (https://github.com/airtai/docstring-gen)
    """
    path = Path(path)
    files = _get_files(path) if path.is_dir() else [path]
    frequency_penalty = 0.0
    presence_penalty = 0.0
    stop = ["#", '"""']

    for file in files:
        if file.suffix == ".ipynb":
            _add_docstring_to_nb(
                file=file,
                include_auto_gen_txt=include_auto_gen_txt,
                recreate_auto_gen_docs=recreate_auto_gen_docs,
                model=model,
                temperature=temperature,
                max_tokens=max_tokens,
                top_p=top_p,
                frequency_penalty=frequency_penalty,
                presence_penalty=presence_penalty,
                stop=stop,
                n=n,
            )
        else:
            _add_docstring_to_py(
                file=file,
                include_auto_gen_txt=include_auto_gen_txt,
                recreate_auto_gen_docs=recreate_auto_gen_docs,
                model=model,
                temperature=temperature,
                max_tokens=max_tokens,
                top_p=top_p,
                frequency_penalty=frequency_penalty,
                presence_penalty=presence_penalty,
                stop=stop,
                n=n,
            )

In [None]:
with TemporaryDirectory() as d:
    nbs_path = Path(d) / "nbs"
    nbs_path.mkdir(parents=True)

    shutil.copyfile(
        Path("..") / "fixtures" / "Test_Data.ipynb", nbs_path / "test.ipynb"
    )
    shutil.copyfile(
        Path("..") / "fixtures" / "Test_Data.ipynb", nbs_path / "_test.ipynb"
    )

    shutil.copyfile(Path("..") / "fixtures" / "test_data.py", nbs_path / "test_data.py")
    shutil.copyfile(Path("..") / "fixtures" / "invalid_test_data.py", nbs_path / "invalid_test_data.py")
    shutil.copyfile(Path("..") / "settings.ini", nbs_path / "settings.ini")

    with mock_openai_create():
        add_docstring_to_source(nbs_path, recreate_auto_gen_docs=True)

    with (nbs_path / "test.ipynb").open("r") as f:
        nb = nbformat.read(f, as_version=4)

for cell in nb.cells:
    if cell.cell_type == "code":
        print(cell["source"])

[36mSuccessfully added docstrings to /tmp/tmpx16s8xna/nbs/test_data.py[0m
with TemporaryDirectory() as d:
    !ls -la {d}
[36mSuccessfully added docstrings to /tmp/tmpx16s8xna/nbs/test.ipynb[0m
# | export

from typing import *
import os
from pathlib import Path
# from contextlib import contextmanager

from contextlib import contextmanager

import shutil
from tempfile import TemporaryDirectory

_source = '''
def decorator1(func):
    """Decorator function that takes a function as an argument and returns a function."""
    pass
    
def decorator2(func):
    pass
'''

with TemporaryDirectory() as d:
    !ls -la {d}
# | export

# Vehicle class
class Vehicle:
    # Constructor function
    """Drive a car

    Args:
        name: The name of the car

    !!! note

        The above docstring is autogenerated by docstring-gen library (https://github.com/airtai/docstring-gen)
    """
    def __init__(self, brand, model, type):
        """Constructor function
        
        Args:
       