# Scanner

> Scan nbdev notebooks for exported functions and classes

In [None]:
#| default_exp scanner

In [None]:
#| hide
from nbdev.showdoc import *

In [None]:
#| export
from pathlib import Path
from typing import List, Dict, Any, Optional, Tuple
import ast
from execnb.nbio import read_nb
from fastcore.basics import AttrDict
from nbdev.config import get_config
import re

In [None]:
#| export
def get_export_cells(
    nb_path: Path  # Path to the notebook file
) -> List[Dict[str, Any]]:  # List of cells with export directives
    "Extract all code cells from a notebook that have export directives"
    nb = read_nb(nb_path)
    export_cells = []
    
    for cell in nb.cells:
        if cell.cell_type == 'code' and cell.source:
            lines = cell.source.split('\n')
            for line in lines:
                if line.strip().startswith('#| export'):
                    export_cells.append({
                        'cell_id': cell.get('id', None),
                        'source': cell.source,
                        'idx': cell.idx_ if hasattr(cell, 'idx_') else None
                    })
                    break
    
    return export_cells

In [None]:
#| export
def extract_definitions(
    source: str  # Python source code
) -> List[Dict[str, Any]]:  # List of function/class definitions with metadata
    "Extract function and class definitions from source code"
    definitions = []
    
    # Remove export directive lines
    lines = source.split('\n')
    clean_lines = [line for line in lines if not line.strip().startswith('#| ')]
    clean_source = '\n'.join(clean_lines)
    
    try:
        tree = ast.parse(clean_source)
        
        for node in ast.walk(tree):
            if isinstance(node, (ast.FunctionDef, ast.AsyncFunctionDef, ast.ClassDef)):
                # Get the source lines for this definition
                start_line = node.lineno - 1
                end_line = node.end_lineno if hasattr(node, 'end_lineno') else start_line + 1
                
                def_lines = clean_lines[start_line:end_line]
                def_source = '\n'.join(def_lines)
                
                definition = {
                    'name': node.name,
                    'type': type(node).__name__,
                    'source': def_source,
                    'lineno': node.lineno,
                    'is_async': isinstance(node, ast.AsyncFunctionDef)
                }
                
                # For functions, extract parameters
                if isinstance(node, (ast.FunctionDef, ast.AsyncFunctionDef)):
                    definition['args'] = []
                    for arg in node.args.args:
                        definition['args'].append({
                            'name': arg.arg,
                            'annotation': ast.unparse(arg.annotation) if arg.annotation else None
                        })
                    
                    # Add return annotation
                    definition['returns'] = ast.unparse(node.returns) if node.returns else None
                
                definitions.append(definition)
                
    except SyntaxError as e:
        print(f"Syntax error parsing source: {e}")
        
    return definitions

In [None]:
#| export
def scan_notebook(
    nb_path: Path  # Path to the notebook to scan
) -> List[Dict[str, Any]]:  # List of exported definitions with metadata
    "Scan a notebook and extract all exported function/class definitions"
    export_cells = get_export_cells(nb_path)
    all_definitions = []
    
    for cell in export_cells:
        definitions = extract_definitions(cell['source'])
        for defn in definitions:
            defn['notebook'] = nb_path.name
            defn['cell_id'] = cell['cell_id']
            all_definitions.append(defn)
    
    return all_definitions

In [None]:
#| export
def scan_project(
    nbs_path: Optional[Path] = None,  # Path to notebooks directory (defaults to config.nbs_path)
    pattern: str = "*.ipynb"  # Pattern for notebook files to scan
) -> List[Dict[str, Any]]:  # All exported definitions found in the project
    "Scan all notebooks in a project for exported definitions"
    if nbs_path is None:
        cfg = get_config()
        nbs_path = Path(cfg.config_path) / cfg.nbs_path
    
    nbs_path = Path(nbs_path)
    all_definitions = []
    
    for nb_path in nbs_path.glob(pattern):
        if not nb_path.name.startswith('_'):  # Skip private notebooks
            try:
                definitions = scan_notebook(nb_path)
                all_definitions.extend(definitions)
            except Exception as e:
                print(f"Error scanning {nb_path}: {e}")
    
    return all_definitions

In [None]:
# Test scanning this project
definitions = scan_project()
print(f"Found {len(definitions)} exported definitions")
for defn in definitions[:5]:  # Show first 5
    print(f"- {defn['type']}: {defn['name']} in {defn['notebook']}")

Found 16 exported definitions
- ClassDef: DocmentsCheckResult in 00_core.ipynb
- FunctionDef: extract_param_docs in 00_core.ipynb
- FunctionDef: check_return_doc in 00_core.ipynb
- FunctionDef: check_definition in 00_core.ipynb
- FunctionDef: check_notebook in 00_core.ipynb


In [None]:
# Debug: Check what notebooks we're finding
from nbdev.config import get_config
cfg = get_config()
nbs_path = Path(cfg.config_path) / cfg.nbs_path
notebooks = list(nbs_path.glob("*.ipynb"))
print(f"Looking in: {nbs_path}")
print(f"Found {len(notebooks)} notebooks:")
for nb in notebooks:
    if not nb.name.startswith('_'):
        print(f"  - {nb.name}")
        cells = get_export_cells(nb)
        print(f"    Export cells: {len(cells)}")

Looking in: /mnt/SN850X_8TB_EXT4/Projects/GitHub/cj-mills/cjm-nbdev-docments/nbs
Found 6 notebooks:
  - 00_core.ipynb
    Export cells: 7
  - 03_autofix.ipynb
    Export cells: 3
  - 01_scanner.ipynb
    Export cells: 5
  - index.ipynb
    Export cells: 0
  - 04_cli.ipynb
    Export cells: 2
  - 02_report.ipynb
    Export cells: 4


In [None]:
#| hide
import nbdev; nbdev.nbdev_export()