# Auto-Fix

> Automatically add placeholder documentation to non-compliant functions

In [None]:
#| default_exp autofix

In [None]:
#| hide
from nbdev.showdoc import *

In [None]:
#| export
import ast
from typing import List, Dict, Any, Optional, NamedTuple
import re
from pathlib import Path
from execnb.nbio import read_nb, write_nb
from fastcore.foundation import L
from fastcore.basics import ifnone, patch, compose
from cjm_nbdev_docments.core import DocmentsCheckResult, check_definition
from cjm_nbdev_docments.scanner import scan_notebook, get_export_cells

In [None]:
#| export
@patch
def needs_fixing(
    self: DocmentsCheckResult
) -> bool:  # TODO: Add return description
    "Check if this definition needs any fixing"
    return not self.is_compliant or self.missing_params or self.params_missing_type_hints

In [None]:
#| export
@patch
def get_param_name(
    self: DocmentsCheckResult,
    param_str: str  # TODO: Add description
) -> str:  # TODO: Add return description
    "Extract parameter name from a parameter string"
    return param_str.split(':', 1)[0].split('=', 1)[0].strip()

In [None]:
#| export
@patch 
def needs_param_fix(
    self: DocmentsCheckResult,
    param_name: str  # TODO: Add description
) -> bool:  # TODO: Add return description
    "Check if a parameter needs documentation or type hint fixes"
    needs_doc = param_name in self.missing_params and param_name != 'self'
    needs_type_hint = param_name in self.params_missing_type_hints and param_name != 'self'
    return needs_doc or needs_type_hint

In [None]:
#| export
def find_signature_boundaries(
    lines: List[str]  # Source code lines
) -> tuple[int, int]:  # (def_line_idx, sig_end_idx) or (-1, -1) if not found
    "Find the start and end lines of a function signature"
    def_line_idx = None
    sig_end_idx = None
    paren_count = 0
    in_signature = False
    
    for i, line in enumerate(lines):
        if line.strip().startswith(('def ', 'async def ')):
            def_line_idx = i
            in_signature = True
            
        if in_signature:
            # Count parentheses to find where signature ends
            paren_count += line.count('(') - line.count(')')
            
            # If we're back to balanced parens and line contains a colon, signature is done
            # (colon might be followed by comments)
            if paren_count == 0 and ':' in line:
                sig_end_idx = i
                break
    
    # Use ifnone for cleaner null handling
    def_line_idx = ifnone(def_line_idx, -1)
    sig_end_idx = ifnone(sig_end_idx, -1)
    
    if def_line_idx == -1 or sig_end_idx == -1:
        return -1, -1
    
    return def_line_idx, sig_end_idx

In [None]:
#| export
def split_parameters(
    params_str: str  # Parameter string from function signature
) -> List[str]:  # List of individual parameter strings
    "Split a parameter string into individual parameters, handling nested types"
    if not params_str.strip():
        return []
    
    # Use a more robust approach for complex nested types
    params = []
    current_param = ''
    paren_depth = 0
    bracket_depth = 0
    brace_depth = 0
    
    for char in params_str:
        if char == '(':
            paren_depth += 1
        elif char == ')':
            paren_depth -= 1
        elif char == '[':
            bracket_depth += 1
        elif char == ']':
            bracket_depth -= 1
        elif char == '{':
            brace_depth += 1
        elif char == '}':
            brace_depth -= 1
        elif char == ',' and paren_depth == 0 and bracket_depth == 0 and brace_depth == 0:
            params.append(current_param.strip())
            current_param = ''
            continue
        current_param += char
    
    if current_param.strip():
        params.append(current_param.strip())
    
    # Return as L for easier manipulation
    return L(params).filter()

In [None]:
#| export
def parse_single_line_signature(
    sig_line: str  # Single-line function signature
) -> dict:  # Parsed components of the signature
    "Parse a single-line function signature into its components"
    func_match = re.match(r'^(\s*)(def|async def)\s+(\w+)\s*\((.*?)\)(\s*(?:->\s*[^:]+)?)\s*:\s*(.*)$', sig_line)
    if not func_match:
        return None
    
    return {
        'indent': func_match.group(1),
        'def_keyword': func_match.group(2),
        'func_name': func_match.group(3),
        'params_str': func_match.group(4),
        'return_type': func_match.group(5),
        'existing_comment': func_match.group(6).strip()
    }

In [None]:
#| export
def generate_param_todo_comment(
    param_name: str,  # Parameter name
    result: DocmentsCheckResult,  # Check result with type hint and doc info
    existing_comment: str = ""  # Existing comment text (without #)
) -> str:  # TODO comment to add
    "Generate appropriate TODO comment for a parameter based on what's missing"
    has_type_hint = result.params_with_type_hints.get(param_name, False)
    has_doc = result.params_documented.get(param_name, False)
    
    if not has_type_hint and not has_doc:
        # Missing both type hint and description
        return "TODO: Add type hint and description"
    elif not has_type_hint and has_doc:
        # Has description but missing type hint
        if existing_comment:
            # Check if TODO for type hint already exists
            if "TODO: Add type hint" in existing_comment or "TODO:Add type hint" in existing_comment:
                return existing_comment  # Don't add duplicate TODO
            else:
                return f"{existing_comment} - TODO: Add type hint"
        else:
            return "TODO: Add type hint"
    elif has_type_hint and not has_doc:
        # Has type hint but missing description
        return "TODO: Add description"
    else:
        # This shouldn't happen if we're being asked to generate a comment
        return existing_comment if existing_comment else "TODO: Verify documentation"

In [None]:
#| export
def generate_return_todo_comment(
    result: DocmentsCheckResult,  # Check result with type hint and doc info
    existing_comment: str = ""  # Existing comment text (without #)
) -> str:  # TODO comment to add
    "Generate appropriate TODO comment for return value based on what's missing"
    has_type_hint = result.return_has_type_hint
    has_doc = result.return_documented
    
    if not has_type_hint and not has_doc:
        # Missing both type hint and description
        return "TODO: Add type hint and return description"
    elif not has_type_hint and has_doc:
        # Has description but missing type hint
        if existing_comment:
            # Check if TODO for type hint already exists
            if "TODO: Add type hint" in existing_comment or "TODO:Add type hint" in existing_comment:
                return existing_comment  # Don't add duplicate TODO
            else:
                return f"{existing_comment} - TODO: Add type hint"
        else:
            return "TODO: Add type hint"
    elif has_type_hint and not has_doc:
        # Has type hint but missing description
        return "TODO: Add return description"
    else:
        # This shouldn't happen if we're being asked to generate a comment
        return existing_comment if existing_comment else "TODO: Verify description"

In [None]:
#| export
def build_fixed_single_line_function(
    parsed: dict,  # Parsed signature components
    params: List[str],  # Individual parameter strings
    result: DocmentsCheckResult  # Check result with missing params info
) -> List[str]:  # Lines of fixed function signature
    "Build a fixed single-line function with documentation comments"
    fixed_lines = []
    indent = parsed['indent']
    
    # Start the function definition
    fixed_lines.append(f"{indent}{parsed['def_keyword']} {parsed['func_name']}(")
    
    # Add parameters with comments as needed
    for i, param in enumerate(params):
        # Use patch method to get parameter name
        param_name = result.get_param_name(param)
        
        # Use patch method to check if needs fixing
        if result.needs_param_fix(param_name):
            todo_comment = generate_param_todo_comment(param_name, result)
            if i < len(params) - 1:
                fixed_lines.append(f"{indent}    {param},  # {todo_comment}")
            else:
                fixed_lines.append(f"{indent}    {param}  # {todo_comment}")
        else:
            if i < len(params) - 1:
                fixed_lines.append(f"{indent}    {param},")
            else:
                fixed_lines.append(f"{indent}    {param}")
    
    # Handle return type and existing comment
    return_type = parsed['return_type']
    existing_comment = parsed['existing_comment']
    
    # For single-line conversions, check if return needs fixing
    if return_type:
        if 'return' in result.missing_params or 'return' in result.params_missing_type_hints:
            if existing_comment:
                # Parse existing comment
                comment_text = existing_comment[1:].strip() if existing_comment.startswith('#') else existing_comment
                todo_comment = generate_return_todo_comment(result, comment_text)
                fixed_lines.append(f"{indent}){return_type}: # {todo_comment}")
            else:
                # No existing comment
                todo_comment = generate_return_todo_comment(result)
                fixed_lines.append(f"{indent}){return_type}:  # {todo_comment}")
        else:
            # Return doesn't need fixing
            if existing_comment:
                if existing_comment.startswith('#'):
                    fixed_lines.append(f"{indent}){return_type}: {existing_comment}")
                else:
                    fixed_lines.append(f"{indent}){return_type}: # {existing_comment}")
            else:
                fixed_lines.append(f"{indent}){return_type}:")
    else:
        # No return type but might need one
        if 'return' in result.params_missing_type_hints:
            if existing_comment:
                comment_text = existing_comment[1:].strip() if existing_comment.startswith('#') else existing_comment
                todo_comment = generate_return_todo_comment(result, comment_text)
                fixed_lines.append(f"{indent}): # {todo_comment}")
            else:
                todo_comment = generate_return_todo_comment(result)
                fixed_lines.append(f"{indent}): # {todo_comment}")
        else:
            # No return type needed
            if existing_comment:
                if existing_comment.startswith('#'):
                    fixed_lines.append(f"{indent}): {existing_comment}")
                else:
                    fixed_lines.append(f"{indent}): # {existing_comment}")
            else:
                fixed_lines.append(f"{indent}):")
    
    return fixed_lines

In [None]:
#| export
def fix_multi_line_signature(
    lines: List[str],  # All source lines
    def_line_idx: int,  # Start of function definition
    sig_end_idx: int,  # End of function signature
    result: DocmentsCheckResult  # Check result with missing params info
) -> List[str]:  # Fixed lines for the signature portion
    "Fix a multi-line function signature by adding parameter comments"
    fixed_lines = []
    
    for i in range(def_line_idx, sig_end_idx + 1):
        line = lines[i]
        line_stripped = line.strip()
        
        # More flexible parameter matching for multi-line signatures
        # Match: whitespace + word + optional type annotation + optional comma/paren + optional whitespace + optional comment
        param_match = re.match(r'^(\s*)(\w+)(\s*(?::\s*[^,\)#]+)?)\s*([,\)]?)(\s*)(?:#\s*(.*))?$', line)
        if param_match and i > def_line_idx and i < sig_end_idx:
            # This is a parameter line (not the def line, not the return line)
            indent = param_match.group(1)
            param_name = param_match.group(2)
            type_annotation = param_match.group(3) or ''
            trailing_punct = param_match.group(4) or ''
            trailing_space = param_match.group(5) or ''
            existing_comment = param_match.group(6) or ''
            
            # Check if this parameter needs fixing (either missing docs or missing type hints)
            needs_doc_fix = param_name in result.missing_params and param_name != 'self'
            needs_type_hint_fix = param_name in result.params_missing_type_hints and param_name != 'self'
            
            if needs_doc_fix or needs_type_hint_fix:
                todo_comment = generate_param_todo_comment(param_name, result, existing_comment)
                # Only add the fixed line if the comment actually changed
                if todo_comment != existing_comment:
                    fixed_lines.append(f"{indent}{param_name}{type_annotation}{trailing_punct}{trailing_space}  # {todo_comment}")
                else:
                    # Comment didn't change, keep original line
                    fixed_lines.append(line)
            else:
                fixed_lines.append(line)
        else:
            # Check for return type line
            return_match = re.match(r'^(\s*\)\s*->\s*[^:#]+)\s*:\s*(.*)$', line)
            if return_match and ('return' in result.missing_params or 'return' in result.params_missing_type_hints):
                pre_colon = return_match.group(1)
                after_colon = return_match.group(2).strip()
                
                if after_colon:
                    # There's already a comment, generate appropriate TODO
                    comment_text = after_colon[1:].strip() if after_colon.startswith('#') else after_colon
                    todo_comment = generate_return_todo_comment(result, comment_text)
                    # Only change if the comment actually changed
                    if todo_comment != comment_text:
                        fixed_lines.append(f"{pre_colon}: # {todo_comment}")
                    else:
                        fixed_lines.append(line)
                else:
                    # No comment, add full TODO
                    todo_comment = generate_return_todo_comment(result)
                    fixed_lines.append(f"{pre_colon}:  # {todo_comment}")
            else:
                fixed_lines.append(line)
    
    return fixed_lines

In [None]:
#| export
def fix_class_definition(
    result: DocmentsCheckResult  # Check result with non-compliant class
) -> str:  # Fixed source code with class docstring
    "Fix a class definition by adding a docstring if missing"
    lines = result.source.split('\n')
    fixed_lines = []
    
    # Find the class definition line
    class_line_idx = -1
    for i, line in enumerate(lines):
        if line.strip().startswith('class '):
            class_line_idx = i
            break
    
    if class_line_idx == -1:
        return result.source
    
    # Add lines up to and including the class definition
    for i in range(class_line_idx + 1):
        fixed_lines.append(lines[i])
    
    # If missing docstring, add it after the class definition
    if not result.has_docstring:
        # Find the indentation of the first line after class definition
        indent = '    '  # Default
        if class_line_idx + 1 < len(lines):
            next_line = lines[class_line_idx + 1]
            # Match leading whitespace
            indent_match = re.match(r'^(\s*)', next_line)
            indent = ifnone(indent_match.group(1) if indent_match else None, '    ')
        
        fixed_lines.append(f'{indent}"TODO: Add class description"')
    
    # Add the rest of the class body
    for i in range(class_line_idx + 1, len(lines)):
        fixed_lines.append(lines[i])
    
    return '\n'.join(fixed_lines)

In [None]:
#| export
def insert_function_docstring(
    lines: List[str],  # Fixed function lines
    def_line_idx: int,  # Index of function definition line
    indent: str  # Base indentation for the function
) -> List[str]:  # Lines with docstring inserted
    "Insert a TODO docstring after the function signature"
    # Find the signature end (last line before function body)
    sig_end_idx = def_line_idx
    for i in range(def_line_idx, len(lines)):
        if lines[i].rstrip().endswith(':'):
            sig_end_idx = i
            break
    
    # Insert docstring after signature
    result_lines = []
    for i in range(sig_end_idx + 1):
        result_lines.append(lines[i])
    
    # Add the docstring
    docstring_indent = indent + '    '
    result_lines.append(f'{docstring_indent}"TODO: Add function description"')
    
    # Add the rest of the function body
    for i in range(sig_end_idx + 1, len(lines)):
        result_lines.append(lines[i])
    
    return result_lines

In [None]:
#| export
def fix_single_line_function(
    lines: List[str],  # All source lines
    def_line_idx: int,  # Index of function definition line
    result: DocmentsCheckResult  # Check result with missing params info
) -> List[str]:  # Fixed lines for the function
    "Fix a single-line function signature by converting to multi-line with parameter comments"
    # Parse the signature
    parsed = parse_single_line_signature(lines[def_line_idx])
    if not parsed:
        return lines
    
    # Split parameters
    params = split_parameters(parsed['params_str'])
    
    # Build the fixed function signature
    fixed_signature_lines = build_fixed_single_line_function(parsed, params, result)
    
    # Combine with rest of function
    fixed_lines = []
    # Add lines before the function
    for i in range(def_line_idx):
        fixed_lines.append(lines[i])
    
    # Add the fixed signature
    fixed_lines.extend(fixed_signature_lines)
    
    # Add docstring if missing
    if not result.has_docstring:
        fixed_lines = insert_function_docstring(fixed_lines, def_line_idx, parsed['indent'])
    
    # Add lines after the function definition
    for i in range(def_line_idx + 1, len(lines)):
        fixed_lines.append(lines[i])
    
    return fixed_lines

In [None]:
#| export
def fix_multi_line_function(
    lines: List[str],  # All source lines
    def_line_idx: int,  # Start of function definition
    sig_end_idx: int,  # End of function signature
    result: DocmentsCheckResult  # Check result with missing params info
) -> List[str]:  # Fixed lines for the function
    "Fix a multi-line function signature by adding parameter comments"
    fixed_lines = []
    
    # Add lines before the function
    for i in range(def_line_idx):
        fixed_lines.append(lines[i])
    
    # Fix the signature
    signature_lines = fix_multi_line_signature(lines, def_line_idx, sig_end_idx, result)
    fixed_lines.extend(signature_lines)
    
    # Insert docstring if missing
    if not result.has_docstring:
        # Find the indentation of the function definition
        indent_match = re.match(r'^(\s*)', lines[def_line_idx])
        base_indent = indent_match.group(1) if indent_match else ''
        docstring_indent = base_indent + '    '
        fixed_lines.append(f'{docstring_indent}"TODO: Add function description"')
    
    # Add rest of function body
    for i in range(sig_end_idx + 1, len(lines)):
        fixed_lines.append(lines[i])
    
    return fixed_lines

In [None]:
#| export
def generate_fixed_source(
    result: DocmentsCheckResult  # Check result with non-compliant function
) -> str:  # Fixed source code with placeholder documentation
    "Generate fixed source code for a non-compliant function or class"
    # Handle classes (including dataclasses)
    if result.type == 'ClassDef':
        return fix_class_definition(result)
    
    # Use the patch method to check if fixing is needed
    if not result.needs_fixing():
        return result.source
    
    lines = result.source.split('\n')
    
    # Find the function definition line and signature end
    def_line_idx, sig_end_idx = find_signature_boundaries(lines)
    
    if def_line_idx == -1:
        return result.source
    
    # Choose the appropriate fix method based on signature type
    if def_line_idx == sig_end_idx and (result.missing_params or result.params_missing_type_hints):
        # Single-line signature that needs parameter fixing
        fixed_lines = fix_single_line_function(lines, def_line_idx, result)
    else:
        # Multi-line signature 
        fixed_lines = fix_multi_line_function(lines, def_line_idx, sig_end_idx, result)
    
    return '\n'.join(fixed_lines)

In [None]:
#| export
def fix_notebook(
    nb_path: Path,  # Path to notebook to fix
    dry_run: bool = False  # If True, show changes without saving
) -> Dict[str, Any]:  # Summary of changes made
    "Fix non-compliant functions in a notebook by adding placeholder documentation"
    nb = read_nb(nb_path)
    definitions = scan_notebook(nb_path)
    
    changes = {
        'notebook': nb_path.name,
        'definitions_fixed': [],
        'cells_modified': []
    }
    
    # Check each definition
    for defn in definitions:
        result = check_definition(defn)
        
        # Fix if non-compliant OR has missing type hints
        needs_fixing = (not result.is_compliant or 
                       result.missing_params or 
                       result.params_missing_type_hints)
        
        if needs_fixing:
            # Generate fixed source
            fixed_source = generate_fixed_source(result)
            
            # Only proceed if the source actually changed
            if fixed_source != result.source:
                # Find and update the cell
                cell_id = defn['cell_id']
                for cell in nb.cells:
                    if cell.get('id') == cell_id:
                        # Replace the definition in the cell source
                        old_source = result.source
                        cell_source = cell.source
                        
                        # Find the definition in the cell and replace it
                        if old_source in cell_source:
                            new_cell_source = cell_source.replace(old_source, fixed_source)
                            
                            if not dry_run:
                                cell.source = new_cell_source
                            
                            changes['definitions_fixed'].append(result.name)
                            if cell_id not in changes['cells_modified']:
                                changes['cells_modified'].append(cell_id)
                            
                            if dry_run:
                                print(f"\nWould fix {result.name}:")
                                print("-" * 40)
                                print(fixed_source)
                                print("-" * 40)
    
    # Save the notebook if not dry run
    if not dry_run and changes['definitions_fixed']:
        write_nb(nb, nb_path)
        # Fix grammar: use singular/plural based on count
        count = len(changes['definitions_fixed'])
        item_word = "definition" if count == 1 else "definitions"
        print(f"✅ Fixed {count} {item_word} in {nb_path.name}")
        for defn_name in changes['definitions_fixed']:
            print(f"   - {defn_name}")
    elif dry_run and changes['definitions_fixed']:
        count = len(changes['definitions_fixed'])
        item_word = "definition" if count == 1 else "definitions" 
        print(f"\n🔍 Dry run: Would fix {count} {item_word}")
    else:
        print(f"✅ All definitions in {nb_path.name} are already compliant")
    
    return changes

In [None]:
#| export
class DocstringInfo(NamedTuple):
    """Information extracted from a docstring"""
    description: str  # Main function description
    params: Dict[str, str]  # Parameter name -> description
    returns: Optional[str]  # Return description
    docstring_type: str  # Type of docstring (google, numpy, sphinx, etc.)

In [None]:
#| export
def detect_docstring_style(
    docstring: str  # Docstring text to analyze
) -> str:  # Detected style: 'google', 'numpy', 'sphinx', 'docments', or 'unknown'
    "Detect the style of a docstring"
    if not docstring:
        return 'unknown'
    
    docstring = docstring.strip()
    
    # Check for Google style (Args:, Returns:, etc.)
    if re.search(r'(Args?|Arguments?|Parameters?|Params?|Returns?|Return|Yields?|Yield|Raises?|Raise|Note|Notes|Example|Examples):\s*$', docstring, re.MULTILINE):
        return 'google'
    
    # Check for NumPy style (Parameters\n----------)
    if re.search(r'(Parameters?|Returns?|Yields?|Raises?|See Also|Notes?|References?|Examples?)\s*\n\s*-{3,}', docstring, re.MULTILINE):
        return 'numpy'
    
    # Check for Sphinx style (:param, :type, :returns, etc.)
    if re.search(r':(param|type|returns?|rtype|raises?|note|example)(\s+\w+)?:', docstring, re.MULTILINE):
        return 'sphinx'
    
    # Check if already in docments style (very simple check)
    # This would be harder to detect since docments puts docs inline
    # For now, assume unknown if none of the above patterns match
    return 'unknown'

In [None]:
#| export
def parse_google_docstring(
    docstring: str  # Google-style docstring text
) -> DocstringInfo:  # Parsed docstring information
    "Parse a Google-style docstring"
    params = {}
    returns = None
    description_lines = []
    
    # Clean the docstring - remove triple quotes and normalize
    cleaned = docstring.strip()
    if cleaned.startswith('"""') or cleaned.startswith("'''"):
        cleaned = cleaned[3:]
    if cleaned.endswith('"""') or cleaned.endswith("'''"):
        cleaned = cleaned[:-3]
    
    lines = cleaned.split('\n')
    current_section = None
    current_param = None
    
    for line in lines:
        line = line.strip()
        
        # Check for section headers
        if re.match(r'^(Args?|Arguments?|Parameters?|Params?):\s*$', line):
            current_section = 'params'
            continue
        elif re.match(r'^(Returns?|Return):\s*$', line):
            current_section = 'returns'
            continue
        elif re.match(r'^(Yields?|Yield|Raises?|Raise|Note|Notes|Example|Examples):\s*$', line):
            current_section = 'other'
            continue
        
        # Process content based on current section
        if current_section == 'params':
            # Look for parameter definitions: "param_name (type): description"
            param_match = re.match(r'^(\w+)\s*(?:\([^)]+\))?\s*:\s*(.+)$', line)
            if param_match:
                param_name = param_match.group(1)
                param_desc = param_match.group(2)
                params[param_name] = param_desc
                current_param = param_name
            elif current_param and line:
                # Continuation of previous parameter description
                params[current_param] += ' ' + line
        elif current_section == 'returns':
            if line:
                if returns is None:
                    returns = line
                else:
                    returns += ' ' + line
        elif current_section is None:
            # This is part of the main description
            if line:
                description_lines.append(line)
    
    description = ' '.join(description_lines)
    return DocstringInfo(description, params, returns, 'google')

In [None]:
#| export
def parse_numpy_docstring(
    docstring: str  # NumPy-style docstring text
) -> DocstringInfo:  # Parsed docstring information
    "Parse a NumPy-style docstring"
    params = {}
    returns = None
    description_lines = []
    
    # Clean the docstring - remove triple quotes and normalize
    cleaned = docstring.strip()
    if cleaned.startswith('"""') or cleaned.startswith("'''"):
        cleaned = cleaned[3:]
    if cleaned.endswith('"""') or cleaned.endswith("'''"):
        cleaned = cleaned[:-3]
    
    lines = cleaned.split('\n')
    current_section = None
    current_param = None
    
    for i, line in enumerate(lines):
        line_stripped = line.strip()
        
        # Check for section headers (followed by dashes)
        if i + 1 < len(lines) and re.match(r'^-{3,}$', lines[i + 1].strip()):
            if re.match(r'^(Parameters?|Params?)$', line_stripped):
                current_section = 'params'
                continue
            elif re.match(r'^(Returns?|Return)$', line_stripped):
                current_section = 'returns'
                continue
            elif re.match(r'^(Yields?|Raises?|See Also|Notes?|References?|Examples?)$', line_stripped):
                current_section = 'other'
                continue
        
        # Skip the dashes line
        if re.match(r'^-{3,}$', line_stripped):
            continue
        
        # Process content based on current section
        if current_section == 'params':
            # Look for parameter definitions: "param_name : type" followed by description
            param_match = re.match(r'^(\w+)\s*:\s*(.+)$', line_stripped)
            if param_match:
                param_name = param_match.group(1)
                # The type information is on the same line, description usually follows
                current_param = param_name
                params[param_name] = ''
            elif current_param and line_stripped:
                # Description line for the current parameter
                if params[current_param]:
                    params[current_param] += ' ' + line_stripped
                else:
                    params[current_param] = line_stripped
        elif current_section == 'returns':
            if line_stripped:
                if returns is None:
                    returns = line_stripped
                else:
                    returns += ' ' + line_stripped
        elif current_section is None:
            # This is part of the main description
            if line_stripped:
                description_lines.append(line_stripped)
    
    description = ' '.join(description_lines)
    return DocstringInfo(description, params, returns, 'numpy')

In [None]:
#| export
def parse_sphinx_docstring(
    docstring: str  # Sphinx-style docstring text
) -> DocstringInfo:  # Parsed docstring information
    "Parse a Sphinx-style docstring"
    params = {}
    returns = None
    description_lines = []
    
    lines = docstring.split('\n')
    
    for line in lines:
        line = line.strip()
        
        # Check for parameter definitions: ":param param_name: description"
        param_match = re.match(r'^:param\s+(\w+)\s*:\s*(.+)$', line)
        if param_match:
            param_name = param_match.group(1)
            param_desc = param_match.group(2)
            params[param_name] = param_desc
            continue
        
        # Check for return definitions: ":returns: description" or ":return: description"
        return_match = re.match(r'^:returns?\s*:\s*(.+)$', line)
        if return_match:
            returns = return_match.group(1)
            continue
        
        # Skip other sphinx directives
        if re.match(r'^:\w+(\s+\w+)?:', line):
            continue
        
        # This is part of the main description
        if line:
            description_lines.append(line)
    
    description = ' '.join(description_lines)
    return DocstringInfo(description, params, returns, 'sphinx')

In [None]:
#| export
def extract_docstring_info(
    source: str,  # Function source code
    name: str  # Function name
) -> Optional[DocstringInfo]:  # Extracted docstring information or None
    "Extract docstring information from function source code"
    try:
        tree = ast.parse(source)
        for node in ast.walk(tree):
            if isinstance(node, (ast.FunctionDef, ast.AsyncFunctionDef)):
                if node.name == name and node.body:
                    # Check if first statement is a docstring
                    first_stmt = node.body[0]
                    if (isinstance(first_stmt, ast.Expr) and 
                        isinstance(first_stmt.value, (ast.Str, ast.Constant))):
                        
                        # Extract docstring text
                        if hasattr(first_stmt.value, 's'):
                            docstring = first_stmt.value.s
                        elif hasattr(first_stmt.value, 'value'):
                            docstring = first_stmt.value.value
                        else:
                            return None
                        
                        if not isinstance(docstring, str):
                            return None
                        
                        # Detect and parse the docstring style
                        style = detect_docstring_style(docstring)
                        
                        if style == 'google':
                            return parse_google_docstring(docstring)
                        elif style == 'numpy':
                            return parse_numpy_docstring(docstring)
                        elif style == 'sphinx':
                            return parse_sphinx_docstring(docstring)
                        else:
                            # Unknown style, return basic info
                            return DocstringInfo(docstring.strip(), {}, None, 'unknown')
                    break
    except Exception:
        return None
    
    return None

In [None]:
#| export
def convert_to_docments_format(
    source: str,  # Original function source code
    docstring_info: DocstringInfo,  # Extracted docstring information
    result: DocmentsCheckResult  # Check result with missing params info
) -> str:  # Converted source code in docments format
    "Convert function source to docments format using extracted docstring info"
    lines = source.split('\n')
    
    # Find the function definition line and signature end
    def_line_idx, sig_end_idx = find_signature_boundaries(lines)
    
    if def_line_idx == -1:
        return source
    
    # Build the new function with docments-style documentation
    fixed_lines = []
    
    # Add lines before the function
    for i in range(def_line_idx):
        fixed_lines.append(lines[i])
    
    # Convert single-line to multi-line if needed or fix existing multi-line
    if def_line_idx == sig_end_idx:
        # Single-line signature - convert to multi-line with docments comments
        fixed_lines.extend(convert_single_line_to_docments(lines[def_line_idx], docstring_info, result))
    else:
        # Multi-line signature - add docments comments to existing structure
        fixed_lines.extend(convert_multiline_to_docments(lines[def_line_idx:sig_end_idx+1], docstring_info, result))
    
    # Replace the original docstring with the description only
    body_start_idx = sig_end_idx + 1
    if body_start_idx < len(lines):
        # Find the docstring in the function body and replace it
        body_lines = lines[body_start_idx:]
        new_body_lines = replace_docstring_in_body(body_lines, docstring_info.description, lines[def_line_idx])
        fixed_lines.extend(new_body_lines)
    
    return '\n'.join(fixed_lines)

In [None]:
#| export
def convert_single_line_to_docments(
    sig_line: str,  # Single-line function signature
    docstring_info: DocstringInfo,  # Extracted docstring information
    result: DocmentsCheckResult  # Check result with missing params info
) -> List[str]:  # Multi-line signature with docments comments
    "Convert single-line function signature to multi-line docments format"
    
    # Parse the signature
    parsed = parse_single_line_signature(sig_line)
    if not parsed:
        return [sig_line]
    
    # Split parameters
    params = split_parameters(parsed['params_str'])
    
    # Build the new signature
    fixed_lines = []
    indent = parsed['indent']
    
    # Start the function definition
    fixed_lines.append(f"{indent}{parsed['def_keyword']} {parsed['func_name']}(")
    
    # Add parameters with docments comments
    for i, param in enumerate(params):
        param_name = result.get_param_name(param)
        
        # Get documentation from the extracted docstring info
        param_doc = docstring_info.params.get(param_name, '')
        
        if param_doc:
            # Use the extracted documentation
            if i < len(params) - 1:
                fixed_lines.append(f"{indent}    {param},  # {param_doc}")
            else:
                fixed_lines.append(f"{indent}    {param}  # {param_doc}")
        else:
            # No documentation found, add TODO
            if param_name in result.missing_params:
                todo_comment = generate_param_todo_comment(param_name, result)
                if i < len(params) - 1:
                    fixed_lines.append(f"{indent}    {param},  # {todo_comment}")
                else:
                    fixed_lines.append(f"{indent}    {param}  # {todo_comment}")
            else:
                # Keep as is
                if i < len(params) - 1:
                    fixed_lines.append(f"{indent}    {param},")
                else:
                    fixed_lines.append(f"{indent}    {param}")
    
    # Handle return type
    return_type = parsed['return_type']
    if return_type and docstring_info.returns:
        fixed_lines.append(f"{indent}){return_type}:  # {docstring_info.returns}")
    elif return_type and 'return' in result.missing_params:
        todo_comment = generate_return_todo_comment(result)
        fixed_lines.append(f"{indent}){return_type}:  # {todo_comment}")
    elif return_type:
        fixed_lines.append(f"{indent}){return_type}:")
    else:
        fixed_lines.append(f"{indent}):")
    
    return fixed_lines

In [None]:
#| export
def convert_multiline_to_docments(
    sig_lines: List[str],  # Multi-line function signature
    docstring_info: DocstringInfo,  # Extracted docstring information
    result: DocmentsCheckResult  # Check result with missing params info
) -> List[str]:  # Multi-line signature with docments comments
    "Convert multi-line function signature to docments format"
    
    fixed_lines = []
    
    for i, line in enumerate(sig_lines):
        line_stripped = line.strip()
        
        # Check if this line contains a parameter
        param_match = re.match(r'^(\s*)(\w+)(\s*(?::\s*[^,\)#]+)?)\s*([,\)]?)(\s*)(?:#\s*(.*))?$', line)
        if param_match and i > 0 and i < len(sig_lines) - 1:
            # This is a parameter line
            indent = param_match.group(1)
            param_name = param_match.group(2)
            type_annotation = param_match.group(3) or ''
            trailing_punct = param_match.group(4) or ''
            trailing_space = param_match.group(5) or ''
            existing_comment = param_match.group(6) or ''
            
            # Get documentation from the extracted docstring info
            param_doc = docstring_info.params.get(param_name, '')
            
            if param_doc:
                # Use the extracted documentation
                fixed_lines.append(f"{indent}{param_name}{type_annotation}{trailing_punct}{trailing_space}  # {param_doc}")
            elif param_name in result.missing_params:
                # No documentation found, add TODO
                todo_comment = generate_param_todo_comment(param_name, result, existing_comment)
                fixed_lines.append(f"{indent}{param_name}{type_annotation}{trailing_punct}{trailing_space}  # {todo_comment}")
            else:
                # Keep original
                fixed_lines.append(line)
        else:
            # Check for return type line
            return_match = re.match(r'^(\s*\)\s*->\s*[^:#]+)\s*:\s*(.*)$', line)
            if return_match and docstring_info.returns:
                pre_colon = return_match.group(1)
                fixed_lines.append(f"{pre_colon}:  # {docstring_info.returns}")
            elif return_match and 'return' in result.missing_params:
                pre_colon = return_match.group(1)
                existing_comment = return_match.group(2).strip()
                comment_text = existing_comment[1:].strip() if existing_comment.startswith('#') else existing_comment
                todo_comment = generate_return_todo_comment(result, comment_text)
                fixed_lines.append(f"{pre_colon}:  # {todo_comment}")
            else:
                fixed_lines.append(line)
    
    return fixed_lines

In [None]:
#| export
def replace_docstring_in_body(
    body_lines: List[str],  # Function body lines
    description: str,  # New description to use
    def_line: str  # Function definition line for indentation
) -> List[str]:  # Modified body lines
    "Replace the docstring in function body with a simple description"
    
    # Find the indentation of the function definition
    indent_match = re.match(r'^(\s*)', def_line)
    base_indent = indent_match.group(1) if indent_match else ''
    docstring_indent = base_indent + '    '
    
    # Look for the docstring (first string literal after function definition)
    docstring_found = False
    result_lines = []
    in_multiline_docstring = False
    
    for i, line in enumerate(body_lines):
        line_stripped = line.strip()
        
        # If we haven't found the docstring yet and this line is not empty
        if not docstring_found and line_stripped:
            # Check if it starts a docstring
            if line_stripped.startswith(('"""', "'''", '"', "'")):
                docstring_found = True
                
                # Check if it's a single-line docstring
                if ((line_stripped.startswith('"""') and line_stripped.endswith('"""') and len(line_stripped) > 6) or
                    (line_stripped.startswith("'''") and line_stripped.endswith("'''") and len(line_stripped) > 6) or
                    (line_stripped.startswith('"') and line_stripped.endswith('"') and len(line_stripped) > 2 and not line_stripped.startswith('"""')) or
                    (line_stripped.startswith("'") and line_stripped.endswith("'") and len(line_stripped) > 2 and not line_stripped.startswith("'''"))):
                    # Single-line docstring
                    result_lines.append(f'{docstring_indent}"{description}"')
                else:
                    # Start of multi-line docstring
                    in_multiline_docstring = True
                    result_lines.append(f'{docstring_indent}"{description}"')
            else:
                # Not a docstring, keep the line
                result_lines.append(line)
        elif in_multiline_docstring:
            # We're inside a multi-line docstring, check if this ends it
            if line_stripped.endswith(('"""', "'''")):
                in_multiline_docstring = False
                # Skip this line (end of docstring)
            # Skip all lines inside the multi-line docstring
        else:
            # Either we already processed the docstring or this is a regular line
            result_lines.append(line)
    
    # If no docstring was found, add the description at the beginning
    if not docstring_found:
        result_lines.insert(0, f'{docstring_indent}"{description}"')
    
    return result_lines

In [None]:
#| export
def generate_fixed_source_with_conversion(
    result: DocmentsCheckResult  # Check result with non-compliant function
) -> str:  # Fixed source code with converted documentation
    "Generate fixed source code, converting existing docstrings to docments format if possible"
    
    # First, try to extract docstring information for conversion
    docstring_info = extract_docstring_info(result.source, result.name)
    
    # If we found structured docstring info (not unknown), convert it
    if (docstring_info and 
        docstring_info.docstring_type in ['google', 'numpy', 'sphinx'] and
        (docstring_info.params or docstring_info.returns)):
        try:
            converted_source = convert_to_docments_format(result.source, docstring_info, result)
            return converted_source
        except Exception:
            # Fallback to original fix if conversion fails
            pass
    
    # Fallback to the original generate_fixed_source function
    return generate_fixed_source(result)

In [None]:
#| export
def fix_notebook_with_conversion(
    nb_path: Path,  # Path to notebook to fix
    dry_run: bool = False,  # If True, show changes without saving
    convert_docstrings: bool = True  # If True, convert existing docstrings to docments format
) -> Dict[str, Any]:  # Summary of changes made
    "Fix non-compliant functions in a notebook, optionally converting docstrings to docments format"
    nb = read_nb(nb_path)
    definitions = scan_notebook(nb_path)
    
    changes = {
        'notebook': nb_path.name,
        'definitions_fixed': [],
        'definitions_converted': [],
        'cells_modified': []
    }
    
    # Check each definition
    for defn in definitions:
        result = check_definition(defn)
        
        # Fix if non-compliant OR has missing type hints
        needs_fixing = (not result.is_compliant or 
                       result.missing_params or 
                       result.params_missing_type_hints)
        
        if needs_fixing:
            # Choose the appropriate fix method
            if convert_docstrings:
                fixed_source = generate_fixed_source_with_conversion(result)
                
                # Check if this was a conversion (has structured docstring info)
                docstring_info = extract_docstring_info(result.source, result.name)
                is_conversion = (docstring_info and 
                               docstring_info.docstring_type in ['google', 'numpy', 'sphinx'])
            else:
                fixed_source = generate_fixed_source(result)
                is_conversion = False
            
            # Only proceed if the source actually changed
            if fixed_source != result.source:
                # Find and update the cell
                cell_id = defn['cell_id']
                for cell in nb.cells:
                    if cell.get('id') == cell_id:
                        # Replace the definition in the cell source
                        old_source = result.source
                        cell_source = cell.source
                        
                        # Find the definition in the cell and replace it
                        if old_source in cell_source:
                            new_cell_source = cell_source.replace(old_source, fixed_source)
                            
                            if not dry_run:
                                cell.source = new_cell_source
                            
                            changes['definitions_fixed'].append(result.name)
                            if is_conversion:
                                changes['definitions_converted'].append(result.name)
                            
                            if cell_id not in changes['cells_modified']:
                                changes['cells_modified'].append(cell_id)
                            
                            if dry_run:
                                action = "convert and fix" if is_conversion else "fix"
                                print(f"\nWould {action} {result.name}:")
                                print("-" * 40)
                                print(fixed_source)
                                print("-" * 40)
    
    # Save the notebook if not dry run
    if not dry_run and changes['definitions_fixed']:
        write_nb(nb, nb_path)
        
        # Report results
        fixed_count = len(changes['definitions_fixed'])
        converted_count = len(changes['definitions_converted'])
        
        if converted_count > 0:
            print(f"✅ Fixed {fixed_count} definitions in {nb_path.name} ({converted_count} converted from other docstring styles)")
        else:
            print(f"✅ Fixed {fixed_count} definitions in {nb_path.name}")
        
        for defn_name in changes['definitions_fixed']:
            action = "converted & fixed" if defn_name in changes['definitions_converted'] else "fixed"
            print(f"   - {defn_name} ({action})")
    elif dry_run and changes['definitions_fixed']:
        fixed_count = len(changes['definitions_fixed'])
        converted_count = len(changes['definitions_converted'])
        
        if converted_count > 0:
            print(f"\n🔍 Dry run: Would fix {fixed_count} definitions ({converted_count} converted from other docstring styles)")
        else:
            print(f"\n🔍 Dry run: Would fix {fixed_count} definitions")
    else:
        print(f"✅ All definitions in {nb_path.name} are already compliant")
    
    return changes

In [None]:
# Test docstring style detection
test_docstrings = [
    # Google style
    '''"""Calculate the sum of two numbers.
    
    Args:
        x (int): The first number to add
        y (int): The second number to add
        
    Returns:
        int: The sum of x and y
    """''',
    
    # NumPy style  
    '''"""Calculate the sum of two numbers.
    
    Parameters
    ----------
    x : int
        The first number to add
    y : int  
        The second number to add
        
    Returns
    -------
    int
        The sum of x and y
    """''',
    
    # Sphinx style
    '''"""Calculate the sum of two numbers.
    
    :param x: The first number to add
    :param y: The second number to add
    :returns: The sum of x and y
    """''',
    
    # Unknown style
    '''"""Just a simple description without structured parameters."""'''
]

for i, docstring in enumerate(test_docstrings):
    style = detect_docstring_style(docstring)
    print(f"Docstring {i+1}: {style}")

print("\n" + "="*50)

# Test parsing Google style
google_docstring = test_docstrings[0]
google_info = parse_google_docstring(google_docstring)
print("Google style parsing:")
print(f"Description: {google_info.description}")
print(f"Parameters: {google_info.params}")
print(f"Returns: {google_info.returns}")

print("\n" + "="*50)

# Test parsing NumPy style
numpy_docstring = test_docstrings[1]
numpy_info = parse_numpy_docstring(numpy_docstring)
print("NumPy style parsing:")
print(f"Description: {numpy_info.description}")
print(f"Parameters: {numpy_info.params}")
print(f"Returns: {numpy_info.returns}")

Docstring 1: google
Docstring 2: numpy
Docstring 3: sphinx
Docstring 4: unknown

Google style parsing:
Description: Calculate the sum of two numbers.
Parameters: {'x': 'The first number to add', 'y': 'The second number to add'}
Returns: int: The sum of x and y

NumPy style parsing:
Description: Calculate the sum of two numbers.
Parameters: {'x': 'The first number to add', 'y': 'The second number to add'}
Returns: int The sum of x and y


In [None]:
# Test full conversion on a Google-style function
google_function_source = '''def calculate_sum(x: int, y: int) -> int:
    """Calculate the sum of two numbers.
    
    Args:
        x (int): The first number to add
        y (int): The second number to add
        
    Returns:
        int: The sum of x and y
    """
    return x + y'''

# Create a test definition
test_def_google = {
    'name': 'calculate_sum',
    'type': 'FunctionDef',
    'source': google_function_source,
    'notebook': 'test.ipynb',
    'args': [
        {'name': 'x', 'annotation': 'int'},
        {'name': 'y', 'annotation': 'int'}
    ],
    'returns': 'int'
}

# Check it
result_google = check_definition(test_def_google)
print("Original Google-style function:")
print(google_function_source)
print(f"\nCompliant: {result_google.is_compliant}")
print(f"Missing: {result_google.missing_params}")

# Convert it
if not result_google.is_compliant:
    converted = generate_fixed_source_with_conversion(result_google)
    print("\nConverted to docments style:")
    print(converted)
    
    # Verify the converted version is compliant
    test_def_converted = {
        'name': 'calculate_sum',
        'type': 'FunctionDef',
        'source': converted,
        'notebook': 'test.ipynb',
        'args': [
            {'name': 'x', 'annotation': 'int'},
            {'name': 'y', 'annotation': 'int'}
        ],
        'returns': 'int'
    }
    
    result_converted = check_definition(test_def_converted)
    print(f"\nAfter conversion - Compliant: {result_converted.is_compliant}")
    print(f"After conversion - Missing: {result_converted.missing_params}")

Original Google-style function:
def calculate_sum(x: int, y: int) -> int:
    """Calculate the sum of two numbers.

    Args:
        x (int): The first number to add
        y (int): The second number to add

    Returns:
        int: The sum of x and y
    """
    return x + y

Compliant: False
Missing: ['x', 'y', 'return']

Converted to docments style:
def calculate_sum(
    x: int,  # The first number to add
    y: int  # The second number to add
) -> int:  # int: The sum of x and y
    "Calculate the sum of two numbers."
    return x + y

After conversion - Compliant: True
After conversion - Missing: []


In [None]:
# Test with debug output to see what's happening
google_function_source = '''def calculate_sum(x: int, y: int) -> int:
    """Calculate the sum of two numbers.
    
    Args:
        x (int): The first number to add
        y (int): The second number to add
        
    Returns:
        int: The sum of x and y
    """
    return x + y'''

test_def_google = {
    'name': 'calculate_sum',
    'type': 'FunctionDef',
    'source': google_function_source,
    'notebook': 'test.ipynb',
    'args': [
        {'name': 'x', 'annotation': 'int'},
        {'name': 'y', 'annotation': 'int'}
    ],
    'returns': 'int'
}

result_google = check_definition(test_def_google)
print("Testing conversion with debug output:")

# First test the docstring extraction
docstring_info = extract_docstring_info(result_google.source, result_google.name)
print(f"Extracted docstring_info: {docstring_info}")
print(f"Docstring type: {docstring_info.docstring_type if docstring_info else 'None'}")
print(f"Params: {docstring_info.params if docstring_info else 'None'}")

# Now test the conversion
converted = generate_fixed_source_with_conversion(result_google)
print("\\nConverted result:")
print(converted)

Testing conversion with debug output:
Extracted docstring_info: DocstringInfo(description='Calculate the sum of two numbers.', params={'x': 'The first number to add', 'y': 'The second number to add'}, returns='int: The sum of x and y', docstring_type='google')
Docstring type: google
Params: {'x': 'The first number to add', 'y': 'The second number to add'}
\nConverted result:
def calculate_sum(
    x: int,  # The first number to add
    y: int  # The second number to add
) -> int:  # int: The sum of x and y
    "Calculate the sum of two numbers."
    return x + y


In [None]:
# Debug the docstring extraction step by step
print("Debug AST parsing:")

try:
    tree = ast.parse(google_function_source)
    print("AST parsing successful")
    
    for node in ast.walk(tree):
        print(f"Found node: {type(node).__name__}")
        if isinstance(node, (ast.FunctionDef, ast.AsyncFunctionDef)):
            print(f"  Function name: {node.name}")
            if node.name == 'calculate_sum' and node.body:
                print(f"  Function body has {len(node.body)} statements")
                first_stmt = node.body[0]
                print(f"  First statement type: {type(first_stmt).__name__}")
                
                if isinstance(first_stmt, ast.Expr):
                    print(f"  Expression value type: {type(first_stmt.value).__name__}")
                    if isinstance(first_stmt.value, (ast.Str, ast.Constant)):
                        # Extract docstring text
                        if hasattr(first_stmt.value, 's'):
                            docstring = first_stmt.value.s
                            print(f"  Found docstring (s): '{docstring[:50]}...'")
                        elif hasattr(first_stmt.value, 'value'):
                            docstring = first_stmt.value.value
                            print(f"  Found docstring (value): '{docstring[:50]}...'")
                        else:
                            print("  No docstring text found")
                    else:
                        print(f"  Expression value is not a string: {first_stmt.value}")
                else:
                    print(f"  First statement is not an expression")
            break
except Exception as e:
    print(f"Error in AST parsing: {e}")

print("\n" + "="*50)

# Test the detect_docstring_style function directly
docstring_text = '''"""Calculate the sum of two numbers.

Args:
    x (int): The first number to add
    y (int): The second number to add
    
Returns:
    int: The sum of x and y
"""'''

print("Testing docstring style detection:")
style = detect_docstring_style(docstring_text)
print(f"Detected style: {style}")

print("\nTesting parsing:")
if style == 'google':
    parsed = parse_google_docstring(docstring_text)
    print(f"Parsed info: {parsed}")

Debug AST parsing:
AST parsing successful
Found node: Module
Found node: FunctionDef
  Function name: calculate_sum
  Function body has 2 statements
  First statement type: Expr
  Expression value type: Constant
  Found docstring (s): 'Calculate the sum of two numbers.

    Args:
     ...'

Testing docstring style detection:
Detected style: google

Testing parsing:
Parsed info: DocstringInfo(description='Calculate the sum of two numbers.', params={'x': 'The first number to add', 'y': 'The second number to add'}, returns='int: The sum of x and y', docstring_type='google')


In [None]:
# Test the extraction with debug output
print("Testing extract_docstring_info with debug:")
docstring_info = extract_docstring_info(result_google.source, result_google.name)
print(f"Result: {docstring_info}")

# Also test it step by step to see where it fails
print("\\nTesting step by step:")
print(f"Source: {repr(result_google.source[:100])}")
print(f"Name: {result_google.name}")

# Test if DocstringInfo is available
try:
    test_info = DocstringInfo("test", {}, None, "test")
    print(f"DocstringInfo works: {test_info}")
except Exception as e:
    print(f"DocstringInfo error: {e}")

Testing extract_docstring_info with debug:
Result: DocstringInfo(description='Calculate the sum of two numbers.', params={'x': 'The first number to add', 'y': 'The second number to add'}, returns='int: The sum of x and y', docstring_type='google')
\nTesting step by step:
Source: 'def calculate_sum(x: int, y: int) -> int:\n    """Calculate the sum of two numbers.\n\n    Args:\n      '
Name: calculate_sum
DocstringInfo works: DocstringInfo(description='test', params={}, returns=None, docstring_type='test')


In [None]:
# Comprehensive test of all docstring formats and conversion
print("🧪 COMPREHENSIVE DOCSTRING CONVERSION TEST")
print("=" * 60)

# Test functions with different docstring formats
test_functions = [
    # Google style
    {
        'name': 'google_example',
        'source': '''def google_example(name: str, age: int, active: bool = True) -> str:
    """Generate a user profile string.
    
    Args:
        name (str): The user's full name
        age (int): The user's age in years
        active (bool): Whether the user is currently active
        
    Returns:
        str: A formatted profile string
    """
    return f"{name} ({age}) - {'Active' if active else 'Inactive'}"''',
        'args': [
            {'name': 'name', 'annotation': 'str'},
            {'name': 'age', 'annotation': 'int'},
            {'name': 'active', 'annotation': 'bool'}
        ],
        'returns': 'str'
    },
    
    # NumPy style
    {
        'name': 'numpy_example',
        'source': '''def numpy_example(data: list, threshold: float = 0.5) -> dict:
    """Process data based on threshold.
    
    Parameters
    ----------
    data : list
        Input data to process
    threshold : float
        Minimum threshold value
        
    Returns
    -------
    dict
        Processing results with statistics
    """
    return {'processed': len(data), 'threshold': threshold}''',
        'args': [
            {'name': 'data', 'annotation': 'list'},
            {'name': 'threshold', 'annotation': 'float'}
        ],
        'returns': 'dict'
    },
    
    # Sphinx style
    {
        'name': 'sphinx_example',
        'source': '''def sphinx_example(filename: str, encoding: str = 'utf-8') -> bool:
    """Read and validate a file.
    
    :param filename: Path to the file to read
    :param encoding: Text encoding to use
    :returns: True if file is valid, False otherwise
    """
    return True''',
        'args': [
            {'name': 'filename', 'annotation': 'str'},
            {'name': 'encoding', 'annotation': 'str'}
        ],
        'returns': 'bool'
    }
]

for i, func_info in enumerate(test_functions, 1):
    print(f"\n📝 Test {i}: {func_info['name'].replace('_', ' ').title()}")
    print("-" * 40)
    
    # Create test definition
    test_def = {
        'name': func_info['name'],
        'type': 'FunctionDef',
        'source': func_info['source'],
        'notebook': 'test.ipynb',
        'args': func_info['args'],
        'returns': func_info['returns']
    }
    
    # Check compliance
    result = check_definition(test_def)
    print(f"Original compliance: {'✅ Compliant' if result.is_compliant else '❌ Non-compliant'}")
    if not result.is_compliant:
        print(f"Missing: {result.missing_params}")
    
    # Extract docstring info
    docstring_info = extract_docstring_info(result.source, result.name)
    if docstring_info:
        print(f"Docstring type: {docstring_info.docstring_type}")
        print(f"Parameters found: {list(docstring_info.params.keys())}")
        print(f"Return info: {'Yes' if docstring_info.returns else 'No'}")
    
    # Convert
    converted = generate_fixed_source_with_conversion(result)
    
    # Verify converted version
    test_def_converted = test_def.copy()
    test_def_converted['source'] = converted
    result_converted = check_definition(test_def_converted)
    
    print(f"After conversion: {'✅ Compliant' if result_converted.is_compliant else '❌ Non-compliant'}")
    
    print("\n🔄 Converted function:")
    print(converted)

print(f"\n🎉 All tests completed!")
print("=" * 60)

🧪 COMPREHENSIVE DOCSTRING CONVERSION TEST

📝 Test 1: Google Example
----------------------------------------
Original compliance: ❌ Non-compliant
Missing: ['name', 'age', 'active', 'return']
Docstring type: google
Parameters found: ['name', 'age', 'active']
Return info: Yes
After conversion: ✅ Compliant

🔄 Converted function:
def google_example(
    name: str,  # The user's full name
    age: int,  # The user's age in years
    active: bool = True  # Whether the user is currently active
) -> str:  # str: A formatted profile string
    "Generate a user profile string."
    return f"{name} ({age}) - {'Active' if active else 'Inactive'}"

📝 Test 2: Numpy Example
----------------------------------------
Original compliance: ❌ Non-compliant
Missing: ['data', 'threshold', 'return']
Docstring type: numpy
Parameters found: ['data', 'threshold']
Return info: Yes
After conversion: ✅ Compliant

🔄 Converted function:
def numpy_example(
    data: list,  # Input data to process
    threshold: float 

In [None]:
# Test fixing a non-compliant function
from cjm_nbdev_docments.core import check_definition

# Create a test function
test_source = '''def bad_function(x, y, z=10):
    result = x + y + z
    return result'''

# Create a mock definition
test_def = {
    'name': 'bad_function',
    'type': 'FunctionDef',
    'source': test_source,
    'notebook': 'test.ipynb',
    'args': [
        {'name': 'x', 'annotation': None},
        {'name': 'y', 'annotation': None},
        {'name': 'z', 'annotation': None}
    ],
    'returns': None
}

# Check it
result = check_definition(test_def)
print("Original function:")
print(result.source)
print(f"\nCompliant: {result.is_compliant}")
print(f"Missing: {result.missing_params}")

# Fix it
fixed = generate_fixed_source(result)
print("\nFixed function:")
print(fixed)

Original function:
def bad_function(x, y, z=10):
    result = x + y + z
    return result

Compliant: False
Missing: ['x', 'y', 'z']

Fixed function:
def bad_function(
    "TODO: Add function description"
    x,  # TODO: Add type hint and description
    y,  # TODO: Add type hint and description
    z=10  # TODO: Add type hint and description
): # TODO: Add type hint
    result = x + y + z
    return result


In [None]:
# Test with a function that has return type
test_source2 = '''def typed_function(name: str, age: int) -> str:
    return f"{name} is {age} years old"'''

test_def2 = {
    'name': 'typed_function',
    'type': 'FunctionDef',
    'source': test_source2,
    'notebook': 'test.ipynb',
    'args': [
        {'name': 'name', 'annotation': 'str'},
        {'name': 'age', 'annotation': 'int'}
    ],
    'returns': 'str'
}

result2 = check_definition(test_def2)
print("Original function:")
print(result2.source)
print(f"\nCompliant: {result2.is_compliant}")
print(f"Missing: {result2.missing_params}")

fixed2 = generate_fixed_source(result2)
print("\nFixed function:")
print(fixed2)

Original function:
def typed_function(name: str, age: int) -> str:
    return f"{name} is {age} years old"

Compliant: False
Missing: ['name', 'age', 'return']

Fixed function:
def typed_function(
    "TODO: Add function description"
    name: str,  # TODO: Add description
    age: int  # TODO: Add description
) -> str:  # TODO: Add return description
    return f"{name} is {age} years old"


In [None]:
#| hide
import nbdev; nbdev.nbdev_export()

In [None]:
# Debug the line matching
test_source = '''def bad_function(x, y, z=10):
    result = x + y + z
    return result'''

lines = test_source.split('\n')
for i, line in enumerate(lines):
    print(f"Line {i}: {repr(line)}")
    param_match = re.match(r'^(\s*)(\w+)(\s*(?::\s*[^,\)#]+)?(?:\s*=\s*[^,\)#]+)?)\s*([,\)])\s*$', line)
    if param_match:
        print(f"  -> Matched param: {param_match.groups()}")

Line 0: 'def bad_function(x, y, z=10):'
Line 1: '    result = x + y + z'
Line 2: '    return result'


In [None]:
# Test the problematic case you described
problematic_source = '''def get_export_cells(
    nb_path: Path  # Path to the notebook file
) -> List[Dict[str, Any]]:  # List of cells with export directives
    nb = read_nb(nb_path)
    export_cells = []
    
    for cell in nb.cells:
        if cell.cell_type == 'code' and cell.source:
            lines = cell.source.split('\\n')
            for line in lines:
                if line.strip().startswith('#| export'):
                    export_cells.append({
                        'cell_id': cell.get('id', None),
                        'source': cell.source,
                        'idx': cell.idx_ if hasattr(cell, 'idx_') else None
                    })
                    break
    
    return export_cells'''

# Create test definition (missing docstring only)
test_def = {
    'name': 'get_export_cells',
    'type': 'FunctionDef',
    'source': problematic_source,
    'notebook': 'test.ipynb',
    'args': [{'name': 'nb_path', 'annotation': 'Path'}],
    'returns': 'List[Dict[str, Any]]'
}

result = check_definition(test_def)
print("Original function:")
print(result.source)
print(f"\\nCompliant: {result.is_compliant}")
print(f"Missing: {result.missing_params}")

if not result.is_compliant:
    fixed = generate_fixed_source(result)
    print("\\nFixed function:")
    print(fixed)

Original function:
def get_export_cells(
    nb_path: Path  # Path to the notebook file
) -> List[Dict[str, Any]]:  # List of cells with export directives
    nb = read_nb(nb_path)
    export_cells = []

    for cell in nb.cells:
        if cell.cell_type == 'code' and cell.source:
            lines = cell.source.split('\n')
            for line in lines:
                if line.strip().startswith('#| export'):
                    export_cells.append({
                        'cell_id': cell.get('id', None),
                        'source': cell.source,
                        'idx': cell.idx_ if hasattr(cell, 'idx_') else None
                    })
                    break

    return export_cells
\nCompliant: False
Missing: []
\nFixed function:
def get_export_cells(
    nb_path: Path  # Path to the notebook file
) -> List[Dict[str, Any]]:  # List of cells with export directives
    "TODO: Add function description"
    nb = read_nb(nb_path)
    export_cells = []

    for cell in nb

In [None]:
# Debug the signature detection
problematic_source = '''def get_export_cells(
    nb_path: Path  # Path to the notebook file
) -> List[Dict[str, Any]]:  # List of cells with export directives
    nb = read_nb(nb_path)
    export_cells = []

    for cell in nb.cells:
        if cell.cell_type == 'code' and cell.source:
            lines = cell.source.split('\\n')
            for line in lines:
                if line.strip().startswith('#| export'):
                    export_cells.append({
                        'cell_id': cell.get('id', None),
                        'source': cell.source,
                        'idx': cell.idx_ if hasattr(cell, 'idx_') else None
                    })
                    break

    return export_cells'''

lines = problematic_source.split('\n')
print("Lines with indices:")
for i, line in enumerate(lines):
    print(f"{i}: {repr(line)}")
    if line.rstrip().endswith(':'):
        print(f"    ^ Line {i} ends with ':'")

# Find signature end
def_line_idx = None
sig_end_idx = None
for i, line in enumerate(lines):
    if line.strip().startswith(('def ', 'async def ')):
        def_line_idx = i
        print(f"Function starts at line {i}")
    if def_line_idx is not None and line.rstrip().endswith(':'):
        sig_end_idx = i
        print(f"Signature ends at line {i}")
        break

print(f"\\nFunction definition: line {def_line_idx}")
print(f"Signature end: line {sig_end_idx}")
print(f"Multi-line signature: {def_line_idx != sig_end_idx}")

Lines with indices:
0: 'def get_export_cells('
1: '    nb_path: Path  # Path to the notebook file'
2: ') -> List[Dict[str, Any]]:  # List of cells with export directives'
3: '    nb = read_nb(nb_path)'
4: '    export_cells = []'
5: ''
6: '    for cell in nb.cells:'
    ^ Line 6 ends with ':'
7: "        if cell.cell_type == 'code' and cell.source:"
    ^ Line 7 ends with ':'
8: "            lines = cell.source.split('\\n')"
9: '            for line in lines:'
    ^ Line 9 ends with ':'
10: "                if line.strip().startswith('#| export'):"
    ^ Line 10 ends with ':'
11: '                    export_cells.append({'
12: "                        'cell_id': cell.get('id', None),"
13: "                        'source': cell.source,"
14: "                        'idx': cell.idx_ if hasattr(cell, 'idx_') else None"
15: '                    })'
16: '                    break'
17: ''
18: '    return export_cells'
Function starts at line 0
Signature ends at line 6
\nFunction definition: 

In [None]:
# Test the corrected signature detection
lines = problematic_source.split('\n')

def_line_idx = None
sig_end_idx = None
paren_count = 0
in_signature = False

for i, line in enumerate(lines):
    if line.strip().startswith(('def ', 'async def ')):
        def_line_idx = i
        in_signature = True
        print(f"Function starts at line {i}")
        
    if in_signature:
        # Count parentheses to find where signature ends
        old_count = paren_count
        paren_count += line.count('(') - line.count(')')
        print(f"Line {i}: paren_count {old_count} -> {paren_count}, ends with ':': {line.rstrip().endswith(':')}")
        
        # If we're back to balanced parens and line ends with colon, signature is done
        if paren_count == 0 and line.rstrip().endswith(':'):
            sig_end_idx = i
            print(f"Signature ends at line {i}")
            break

print(f"\nCorrected detection:")
print(f"Function definition: line {def_line_idx}")
print(f"Signature end: line {sig_end_idx}")

# Test the fix
result = check_definition(test_def)
if not result.is_compliant:
    fixed = generate_fixed_source(result)
    print("\nFixed function:")
    print(fixed)

Function starts at line 0
Line 0: paren_count 0 -> 1, ends with ':': False
Line 1: paren_count 1 -> 1, ends with ':': False
Line 2: paren_count 1 -> 0, ends with ':': False
Line 3: paren_count 0 -> 0, ends with ':': False
Line 4: paren_count 0 -> 0, ends with ':': False
Line 5: paren_count 0 -> 0, ends with ':': False
Line 6: paren_count 0 -> 0, ends with ':': True
Signature ends at line 6

Corrected detection:
Function definition: line 0
Signature end: line 6

Fixed function:
def get_export_cells(
    nb_path: Path  # Path to the notebook file
) -> List[Dict[str, Any]]:  # List of cells with export directives
    "TODO: Add function description"
    nb = read_nb(nb_path)
    export_cells = []

    for cell in nb.cells:
        if cell.cell_type == 'code' and cell.source:
            lines = cell.source.split('\n')
            for line in lines:
                if line.strip().startswith('#| export'):
                    export_cells.append({
                        'cell_id': cell.get

In [None]:
# Debug scenario 1 - should need fixing but claims to be compliant
scenario1_source = '''def get_export_cells(
    nb_path: Path  
) -> List[Dict[str, Any]]:  # List of cells with export directives
    "Extract all code cells from a notebook that have export directives"
    nb = read_nb(nb_path)
    return []'''

test_def1 = {
    'name': 'get_export_cells',
    'type': 'FunctionDef',
    'source': scenario1_source,
    'notebook': 'test.ipynb',
    'args': [{'name': 'nb_path', 'annotation': 'Path'}],
    'returns': 'List[Dict[str, Any]]'
}

result1 = check_definition(test_def1)
print("Scenario 1 Debug:")
print(f"Compliant: {result1.is_compliant}")
print(f"Missing params: {result1.missing_params}")
print(f"Params documented: {result1.params_documented}")

# Test what the autofix generates
if not result1.is_compliant:
    fixed1 = generate_fixed_source(result1)
    print(f"Original == Fixed: {scenario1_source == fixed1}")
    print("\nOriginal:")
    print(repr(scenario1_source))
    print("\nFixed:")
    print(repr(fixed1))

Scenario 1 Debug:
Compliant: False
Missing params: ['nb_path']
Params documented: {'nb_path': False}
Original == Fixed: False

Original:
'def get_export_cells(\n    nb_path: Path  \n) -> List[Dict[str, Any]]:  # List of cells with export directives\n    "Extract all code cells from a notebook that have export directives"\n    nb = read_nb(nb_path)\n    return []'

Fixed:
'def get_export_cells(\n    nb_path: Path    # TODO: Add description\n) -> List[Dict[str, Any]]:  # List of cells with export directives\n    "Extract all code cells from a notebook that have export directives"\n    nb = read_nb(nb_path)\n    return []'


In [None]:
# Debug scenario 4 - only partially fixes
scenario4_source = '''def get_export_cells(
    nb_path: Path,  
    fake_test_path: Path
) -> List[Dict[str, Any]]:  # List of cells with export directives
    "Extract all code cells from a notebook that have export directives"
    nb = read_nb(nb_path)
    return []'''

test_def4 = {
    'name': 'get_export_cells',
    'type': 'FunctionDef',
    'source': scenario4_source,
    'notebook': 'test.ipynb',
    'args': [
        {'name': 'nb_path', 'annotation': 'Path'},
        {'name': 'fake_test_path', 'annotation': 'Path'}
    ],
    'returns': 'List[Dict[str, Any]]'
}

result4 = check_definition(test_def4)
print("Scenario 4 Debug:")
print(f"Compliant: {result4.is_compliant}")
print(f"Missing params: {result4.missing_params}")
print(f"Params documented: {result4.params_documented}")

if not result4.is_compliant:
    fixed4 = generate_fixed_source(result4)
    print("\nFixed version:")
    print(fixed4)
    
    # Check if all missing params were actually fixed
    test_def4_fixed = {
        'name': 'get_export_cells',
        'type': 'FunctionDef',
        'source': fixed4,
        'notebook': 'test.ipynb',
        'args': [
            {'name': 'nb_path', 'annotation': 'Path'},
            {'name': 'fake_test_path', 'annotation': 'Path'}
        ],
        'returns': 'List[Dict[str, Any]]'
    }
    
    result4_after_fix = check_definition(test_def4_fixed)
    print(f"\nAfter fix - Compliant: {result4_after_fix.is_compliant}")
    print(f"After fix - Missing: {result4_after_fix.missing_params}")

Scenario 4 Debug:
Compliant: False
Missing params: ['nb_path', 'fake_test_path']
Params documented: {'nb_path': False, 'fake_test_path': False}

Fixed version:
def get_export_cells(
    nb_path: Path,    # TODO: Add description
    fake_test_path: Path  # TODO: Add description
) -> List[Dict[str, Any]]:  # List of cells with export directives
    "Extract all code cells from a notebook that have export directives"
    nb = read_nb(nb_path)
    return []

After fix - Compliant: True
After fix - Missing: []


In [None]:
# Debug the parameter matching regex
scenario4_lines = scenario4_source.split('\n')
print("Lines in scenario 4:")
for i, line in enumerate(scenario4_lines):
    print(f"{i}: {repr(line)}")
    # Test the regex that should match parameter lines
    param_match = re.match(r'^(\s*)(\w+)(\s*(?::\s*[^,\)#]+)?(?:\s*=\s*[^,\)#]+)?)\s*([,\)])\s*$', line)
    if param_match:
        print(f"   -> MATCHED: param={param_match.group(2)}")
    else:
        print(f"   -> NO MATCH")
        
print("\n" + "="*50)

# Let's also test a simpler pattern
print("Testing simpler parameter detection:")
for i, line in enumerate(scenario4_lines):
    # Look for lines that contain parameter names we know about
    if 'nb_path' in line:
        print(f"Line {i} contains 'nb_path': {repr(line)}")
    if 'fake_test_path' in line:
        print(f"Line {i} contains 'fake_test_path': {repr(line)}")

Lines in scenario 4:
0: 'def get_export_cells('
   -> NO MATCH
1: '    nb_path: Path,  '
   -> MATCHED: param=nb_path
2: '    fake_test_path: Path'
   -> NO MATCH
3: ') -> List[Dict[str, Any]]:  # List of cells with export directives'
   -> NO MATCH
4: '    "Extract all code cells from a notebook that have export directives"'
   -> NO MATCH
5: '    nb = read_nb(nb_path)'
   -> MATCHED: param=nb
6: '    return []'
   -> NO MATCH

Testing simpler parameter detection:
Line 1 contains 'nb_path': '    nb_path: Path,  '
Line 2 contains 'fake_test_path': '    fake_test_path: Path'
Line 5 contains 'nb_path': '    nb = read_nb(nb_path)'


In [None]:
# Check the scenario 1 output
print("Scenario 1 results:")
print("(Run this after the previous debug cell)")

# Also let's see what the fix_notebook function does
from pathlib import Path

# Create a mock scenario to test fix_notebook behavior
print("\nTesting what causes false positive 'fixed' reports...")

# Let's see if the issue is in the fix_notebook function's change detection

Scenario 1 results:
(Run this after the previous debug cell)

Testing what causes false positive 'fixed' reports...


In [None]:
# Test the improved version on scenario 4
result4 = check_definition(test_def4)
print("Scenario 4 with improved fix:")
print(f"Missing params: {result4.missing_params}")

fixed4_new = generate_fixed_source(result4)
print("\nImproved fixed version:")
print(fixed4_new)

# Check if it's now fully compliant
test_def4_new_fixed = {
    'name': 'get_export_cells',
    'type': 'FunctionDef',
    'source': fixed4_new,
    'notebook': 'test.ipynb',
    'args': [
        {'name': 'nb_path', 'annotation': 'Path'},
        {'name': 'fake_test_path', 'annotation': 'Path'}
    ],
    'returns': 'List[Dict[str, Any]]'
}

result4_new_after_fix = check_definition(test_def4_new_fixed)
print(f"\nAfter improved fix - Compliant: {result4_new_after_fix.is_compliant}")
print(f"After improved fix - Missing: {result4_new_after_fix.missing_params}")

Scenario 4 with improved fix:
Missing params: ['nb_path', 'fake_test_path']

Improved fixed version:
def get_export_cells(
    nb_path: Path,    # TODO: Add description
    fake_test_path: Path  # TODO: Add description
) -> List[Dict[str, Any]]:  # List of cells with export directives
    "Extract all code cells from a notebook that have export directives"
    nb = read_nb(nb_path)
    return []

After improved fix - Compliant: True
After improved fix - Missing: []


In [None]:
# Test scenario 1 - single param, no comma
scenario1_source = '''def get_export_cells(
    nb_path: Path  
) -> List[Dict[str, Any]]:  # List of cells with export directives
    "Extract all code cells from a notebook that have export directives"
    nb = read_nb(nb_path)
    return []'''

test_def1 = {
    'name': 'get_export_cells',
    'type': 'FunctionDef',
    'source': scenario1_source,
    'notebook': 'test.ipynb',
    'args': [{'name': 'nb_path', 'annotation': 'Path'}],
    'returns': 'List[Dict[str, Any]]'
}

result1 = check_definition(test_def1)
print("Scenario 1 Debug:")
print(f"Compliant: {result1.is_compliant}")
print(f"Missing params: {result1.missing_params}")
print(f"Params documented: {result1.params_documented}")

if not result1.is_compliant:
    fixed1 = generate_fixed_source(result1)
    print("\nFixed version:")
    print(fixed1)
    
    # Check the change detection
    print(f"\nOriginal == Fixed: {scenario1_source == fixed1}")
    if scenario1_source != fixed1:
        print("Changes detected correctly")
    else:
        print("ERROR: No changes made but function was not compliant!")
else:
    print("Function is already compliant - should not be 'fixed'")

Scenario 1 Debug:
Compliant: False
Missing params: ['nb_path']
Params documented: {'nb_path': False}

Fixed version:
def get_export_cells(
    nb_path: Path    # TODO: Add description
) -> List[Dict[str, Any]]:  # List of cells with export directives
    "Extract all code cells from a notebook that have export directives"
    nb = read_nb(nb_path)
    return []

Original == Fixed: False
Changes detected correctly


In [None]:
# Check if the issue is in fix_notebook's change detection
# Let me examine the fix_notebook logic by looking at what it considers a "change"

def debug_fix_notebook_logic(definition, old_source, new_source):
    """Debug what fix_notebook considers a change"""
    print(f"Function: {definition['name']}")
    print(f"Old source in new source: {old_source in new_source}")
    print(f"Sources equal: {old_source == new_source}")
    print(f"Would report as fixed: {old_source in new_source and old_source != new_source}")
    
# Test with our scenarios
print("Testing change detection logic:")

# Scenario 1 - might be wrongly detected as changed
result1 = check_definition(test_def1)
if not result1.is_compliant:
    fixed1 = generate_fixed_source(result1)
    debug_fix_notebook_logic(test_def1, result1.source, fixed1)
else:
    print("Scenario 1: Already compliant, no fix needed")

Testing change detection logic:
Function: get_export_cells
Old source in new source: False
Sources equal: False
Would report as fixed: False


In [None]:
# Test the EXACT scenarios from your original issue

# Your scenario 1 - this should be compliant, no fix needed
your_scenario1 = '''def get_export_cells(
    nb_path: Path  # Path to the notebook file
) -> List[Dict[str, Any]]:  # List of cells with export directives
    "Extract all code cells from a notebook that have export directives"
    nb = read_nb(nb_path)
    export_cells = []
    return export_cells'''

test_your_s1 = {
    'name': 'get_export_cells',
    'type': 'FunctionDef',
    'source': your_scenario1,
    'notebook': 'test.ipynb',
    'args': [{'name': 'nb_path', 'annotation': 'Path'}],
    'returns': 'List[Dict[str, Any]]'
}

result_s1 = check_definition(test_your_s1)
print("YOUR Scenario 1 (should be compliant):")
print(f"Compliant: {result_s1.is_compliant}")
print(f"Missing: {result_s1.missing_params}")

if not result_s1.is_compliant:
    print("❌ ERROR: This should be compliant but isn't!")
    fixed_s1 = generate_fixed_source(result_s1)
    print("Would be 'fixed' to:")
    print(fixed_s1)
else:
    print("✅ Correctly identified as compliant")

print("\n" + "="*60)

# Your scenario 2 - missing one param doc, should be fixed
your_scenario2 = '''def get_export_cells(
    nb_path: Path,  # Path to the notebook file
    fake_test_path: Path 
) -> List[Dict[str, Any]]:  # List of cells with export directives
    "Extract all code cells from a notebook that have export directives"
    nb = read_nb(nb_path)
    return []'''

test_your_s2 = {
    'name': 'get_export_cells',
    'type': 'FunctionDef',
    'source': your_scenario2,
    'notebook': 'test.ipynb',
    'args': [
        {'name': 'nb_path', 'annotation': 'Path'},
        {'name': 'fake_test_path', 'annotation': 'Path'}
    ],
    'returns': 'List[Dict[str, Any]]'
}

result_s2 = check_definition(test_your_s2)
print("YOUR Scenario 2 (should need fixing):")
print(f"Compliant: {result_s2.is_compliant}")
print(f"Missing: {result_s2.missing_params}")

if not result_s2.is_compliant:
    print("✅ Correctly identified as non-compliant")
    fixed_s2 = generate_fixed_source(result_s2)
    print("Fixed to:")
    print(fixed_s2)
else:
    print("❌ ERROR: This should need fixing but was marked compliant!")

YOUR Scenario 1 (should be compliant):
Compliant: True
Missing: []
✅ Correctly identified as compliant

YOUR Scenario 2 (should need fixing):
Compliant: False
Missing: ['fake_test_path']
✅ Correctly identified as non-compliant
Fixed to:
def get_export_cells(
    nb_path: Path,  # Path to the notebook file
    fake_test_path: Path   # TODO: Add description
) -> List[Dict[str, Any]]:  # List of cells with export directives
    "Extract all code cells from a notebook that have export directives"
    nb = read_nb(nb_path)
    return []


In [None]:
# Test the edge case with existing return comment
edge_case_source = '''def get_export_cells(nb_path: Path) -> List[Dict[str, Any]]:  # List of cells with export directives
    "Extract all code cells from a notebook that have export directives"
    nb = read_nb(nb_path)
    return []'''

test_edge_case = {
    'name': 'get_export_cells',
    'type': 'FunctionDef',
    'source': edge_case_source,
    'notebook': 'test.ipynb',
    'args': [{'name': 'nb_path', 'annotation': 'Path'}],
    'returns': 'List[Dict[str, Any]]'
}

result_edge = check_definition(test_edge_case)
print("Edge case - single line with existing return comment:")
print(f"Compliant: {result_edge.is_compliant}")
print(f"Missing: {result_edge.missing_params}")

if not result_edge.is_compliant:
    fixed_edge = generate_fixed_source(result_edge)
    print("\nFixed version (should preserve existing comment with TODO):")
    print(fixed_edge)
    
    # Verify the fix worked
    test_edge_fixed = {
        'name': 'get_export_cells',
        'type': 'FunctionDef',
        'source': fixed_edge,
        'notebook': 'test.ipynb',
        'args': [{'name': 'nb_path', 'annotation': 'Path'}],
        'returns': 'List[Dict[str, Any]]'
    }
    
    result_edge_after = check_definition(test_edge_fixed)
    print(f"\nAfter fix - Compliant: {result_edge_after.is_compliant}")
    print(f"After fix - Missing: {result_edge_after.missing_params}")
else:
    print("Already compliant")

Edge case - single line with existing return comment:
Compliant: False
Missing: ['nb_path']

Fixed version (should preserve existing comment with TODO):
def get_export_cells(
    nb_path: Path  # TODO: Add description
) -> List[Dict[str, Any]]: # List of cells with export directives
    "Extract all code cells from a notebook that have export directives"
    nb = read_nb(nb_path)
    return []

After fix - Compliant: True
After fix - Missing: []


In [None]:
# Test with a dataclass without docstring
dataclass_source = '''@dataclass
class DocmentsCheckResult:
    name: str  # Name of the function/class
    type: str  # Type (FunctionDef, ClassDef, etc.)
    notebook: str  # Source notebook
    has_docstring: bool  # Whether it has a docstring
    params_documented: Dict[str, bool]  # Which params have documentation
    return_documented: bool  # Whether return is documented
    missing_params: List[str]  # Parameters missing documentation
    is_compliant: bool  # Overall compliance status
    source: str  # Source code of the definition
    has_todos: bool = False  # Whether it contains TODO placeholders
    todo_count: int = 0  # Number of TODO placeholders found'''

# Create test definition for dataclass
test_dataclass = {
    'name': 'DocmentsCheckResult',
    'type': 'ClassDef',
    'source': dataclass_source,
    'notebook': 'test.ipynb',
    'args': [],  # Classes don't have args in our scanner
    'returns': None
}

# Check it
result = check_definition(test_dataclass)
print("Original dataclass:")
print(result.source)
print(f"\nCompliant: {result.is_compliant}")
print(f"Has docstring: {result.has_docstring}")
print(f"Missing: {result.missing_params}")

# Try to fix it
if not result.is_compliant:
    fixed = generate_fixed_source(result)
    print("\nFixed dataclass:")
    print(fixed)
    print(f"\nSource changed: {result.source != fixed}")

Original dataclass:
@dataclass
class DocmentsCheckResult:
    name: str  # Name of the function/class
    type: str  # Type (FunctionDef, ClassDef, etc.)
    notebook: str  # Source notebook
    has_docstring: bool  # Whether it has a docstring
    params_documented: Dict[str, bool]  # Which params have documentation
    return_documented: bool  # Whether return is documented
    missing_params: List[str]  # Parameters missing documentation
    is_compliant: bool  # Overall compliance status
    source: str  # Source code of the definition
    has_todos: bool = False  # Whether it contains TODO placeholders
    todo_count: int = 0  # Number of TODO placeholders found

Compliant: False
Has docstring: False
Missing: []

Fixed dataclass:
@dataclass
class DocmentsCheckResult:
    "TODO: Add class description"
    name: str  # Name of the function/class
    type: str  # Type (FunctionDef, ClassDef, etc.)
    notebook: str  # Source notebook
    has_docstring: bool  # Whether it has a docstr