From 319bec4f3c4197081713fd8f89f7643e257625bf Mon Sep 17 00:00:00 2001 From: Giuliano Ranauro Date: Thu, 12 Feb 2026 15:54:55 +0100 Subject: [PATCH 1/9] getting there --- src/deepbase/main.py | 150 ++++++++++++++++++----------- src/deepbase/toon.py | 220 ++++++++++++++++++++++++++++++++++--------- 2 files changed, 270 insertions(+), 100 deletions(-) diff --git a/src/deepbase/main.py b/src/deepbase/main.py index 4d6c84d..ef2a686 100644 --- a/src/deepbase/main.py +++ b/src/deepbase/main.py @@ -11,7 +11,7 @@ from importlib.metadata import version as get_package_version, PackageNotFoundError from typing import List, Dict, Any, Set, Optional, Tuple -from deepbase.toon import generate_toon_representation, generate_database_focused +from deepbase.toon import generate_toon_representation, generate_light_representation, generate_database_focused from deepbase.parsers import get_document_structure from deepbase.database import is_sqlite_database, get_database_schema, generate_database_context_full @@ -75,6 +75,7 @@ def load_config(root_dir: str) -> Dict[str, Any]: pass return config + def estimate_tokens(size_bytes: int) -> str: if size_bytes == 0: return "0t" tokens = math.ceil(size_bytes / 4) @@ -85,37 +86,37 @@ def estimate_tokens(size_bytes: int) -> str: else: return f"~{tokens/1000000:.1f}M t" + def is_significant_file(file_path: str, config: Dict[str, Any], output_file_abs: str = None) -> bool: file_name = os.path.basename(file_path) - + # Check 1: Esclusione del file di output corrente (tramite path assoluto) if output_file_abs and os.path.abspath(file_path) == output_file_abs: return False - # Check 2 (NUOVO): Esclusione per nome. - # Se nella cartella target esiste un file che si chiama come il file di output - # (es: "llm_context.md"), lo ignoriamo a prescindere dal path. + # Check 2: Esclusione per nome (es: llm_context.md già esistente nella dir target) if output_file_abs and file_name == os.path.basename(output_file_abs): return False - # Check 3: Esclusione file spazzatura (lockfiles, etc definita in config) + # Check 3: Esclusione file spazzatura (lockfiles, etc) if file_name in config["ignore_files"]: return False significant_extensions = config["significant_extensions"] - - if file_name in significant_extensions: + + if file_name in significant_extensions: return True - + _, ext = os.path.splitext(file_path) if ext in significant_extensions: return True if is_sqlite_database(file_path): return True - + return False + def calculate_project_stats(root_dir: str, config: Dict[str, Any], output_file_abs: str) -> int: total_size = 0 ignore_dirs = config["ignore_dirs"] @@ -130,6 +131,7 @@ def calculate_project_stats(root_dir: str, config: Dict[str, Any], output_file_a pass return total_size + # --- ALBERO DELLE DIRECTORY --- def _generate_tree_recursive( @@ -144,7 +146,7 @@ def _generate_tree_recursive( """ output_str = "" subtree_size = 0 - + try: items = sorted(os.listdir(current_path)) except PermissionError: @@ -154,7 +156,7 @@ def _generate_tree_recursive( for item in items: full_path = os.path.join(current_path, item) is_dir = os.path.isdir(full_path) - + if is_dir: if item not in config["ignore_dirs"] and not item.startswith('.'): filtered_items.append((item, True)) @@ -166,21 +168,19 @@ def _generate_tree_recursive( is_last = (i == len(filtered_items) - 1) full_path = os.path.join(current_path, name) connector = "└── " if is_last else "├── " - + if is_dir: - # Ricorsione: ottengo prima il contenuto e la dimensione della sottocartella extension = " " if is_last else "│ " sub_tree_str, sub_dir_size = _generate_tree_recursive( - full_path, - prefix + extension, - config, + full_path, + prefix + extension, + config, total_project_size, output_file_abs ) - + subtree_size += sub_dir_size - - # Genero stats cartella + folder_stats = "" if total_project_size > 0 and sub_dir_size > 0: percent = (sub_dir_size / total_project_size) * 100 @@ -189,35 +189,37 @@ def _generate_tree_recursive( output_str += f"{prefix}{connector}📁 {name}/{folder_stats}\n" output_str += sub_tree_str - + else: - icon = "üóÑÔ∏è " if is_sqlite_database(full_path) else "📄 " + icon = "🗄️ " if is_sqlite_database(full_path) else "📄 " try: size = os.path.getsize(full_path) subtree_size += size - + file_stats = "" if total_project_size > 0 and size > 0: percent = (size / total_project_size) * 100 token_est = estimate_tokens(size) file_stats = f" ({percent:.1f}% | {token_est})" - + output_str += f"{prefix}{connector}{icon}{name}{file_stats}\n" except OSError: pass return output_str, subtree_size + def generate_directory_tree(root_dir: str, config: Dict[str, Any], output_file_abs: str) -> Tuple[str, int, int]: abs_root = os.path.abspath(root_dir) total_size = calculate_project_stats(root_dir, config, output_file_abs) - + tree_body, _ = _generate_tree_recursive(root_dir, "", config, total_size, output_file_abs) - + header = f"📁 {os.path.basename(abs_root) or '.'}/\n" total_tokens_est = math.ceil(total_size / 4) return header + tree_body, total_size, total_tokens_est + # --- CORE --- def get_all_significant_files(root_dir: str, config: Dict[str, Any], output_file_abs: str) -> List[str]: @@ -231,6 +233,7 @@ def get_all_significant_files(root_dir: str, config: Dict[str, Any], output_file significant_files.append(file_path) return significant_files + def read_file_content(file_path: str) -> str: if is_sqlite_database(file_path): try: @@ -247,19 +250,18 @@ def read_file_content(file_path: str) -> str: except Exception as e: return f"!!! Error reading file: {e} !!!" + def matches_focus(file_path: str, root_dir: str, focus_patterns: List[str]) -> bool: if not focus_patterns: return False rel_path = os.path.relpath(file_path, root_dir) rel_path_fwd = rel_path.replace(os.sep, '/') for pattern in focus_patterns: - if '.db/' in pattern or '.sqlite/' in pattern: - parts = pattern.split('/') - if len(parts) >= 2 and any(ext in parts[0] for ext in ['.db', '.sqlite']): pass clean_pattern = pattern.replace(os.sep, '/') if fnmatch.fnmatch(rel_path_fwd, clean_pattern): return True if clean_pattern in rel_path_fwd: return True return False + def extract_focused_tables(file_path: str, focus_patterns: List[str]) -> List[str]: if not is_sqlite_database(file_path): return [] db_name = os.path.basename(file_path) @@ -270,6 +272,7 @@ def extract_focused_tables(file_path: str, focus_patterns: List[str]) -> List[st if fnmatch.fnmatch(db_name, db_pattern): focused_tables.append(table_name) return focused_tables + def load_focus_patterns_from_file(file_path: str) -> List[str]: patterns = [] if os.path.exists(file_path): @@ -285,6 +288,7 @@ def load_focus_patterns_from_file(file_path: str) -> List[str]: console.print(f"[bold yellow]Warning:[/bold yellow] Focus file '{file_path}' not found.") return patterns + def version_callback(value: bool): if value: try: @@ -294,6 +298,7 @@ def version_callback(value: bool): console.print("DeepBase version: [yellow]unknown (editable/dev mode)[/yellow]") raise typer.Exit() + @app.command(epilog=EPILOG_TEXT, rich_help_panel="Main Commands") def create( target: str = typer.Argument(None, help="The file or directory to scan."), @@ -301,13 +306,16 @@ def create( output: str = typer.Option("llm_context.md", "--output", "-o", help="The output file."), verbose: bool = typer.Option(False, "--verbose", "-V", help="Show detailed output."), include_all: bool = typer.Option(False, "--all", "-a", help="Include full content of ALL files."), - toon_mode: bool = typer.Option(False, "--toon", "-t", help="Use 'Skeleton' mode for non-focused files."), - focus: Optional[List[str]] = typer.Option(None, "--focus", "-f", help="Pattern to focus on."), + light_mode: bool = typer.Option(False, "--light", "-l", help="Token-saving mode: includes only method/function signatures instead of full code."), + focus: Optional[List[str]] = typer.Option(None, "--focus", "-f", help="Pattern to focus on (full content for matching files)."), focus_file: Optional[str] = typer.Option(None, "--focus-file", "-ff", help="Path to focus patterns file.") ): """ Analyzes a directory OR a single file. - Hybrid workflow with Context Skeleton + Focused Content. + + Default: structure tree only. + Use [bold]--all[/bold] for full content, [bold]--light[/bold] for signatures only, + or [bold]--focus[/bold] / [bold]--focus-file[/bold] for hybrid mode. """ if target is None: console.print("[red]Error: Missing argument 'TARGET'.[/red]") @@ -319,7 +327,7 @@ def create( raise typer.Exit(code=1) abs_output_path = os.path.abspath(output) - + active_focus_patterns = [] if focus: active_focus_patterns.extend(focus) if focus_file: @@ -327,13 +335,21 @@ def create( if file_patterns: active_focus_patterns.extend(file_patterns) active_focus_patterns = list(set(active_focus_patterns)) - console.print(f"[bold green]Analyzing '{target}'...[/bold green]") + mode_label = "" + if light_mode: + mode_label = " [bold yellow](LIGHT — signatures only)[/bold yellow]" + elif include_all: + mode_label = " [bold cyan](ALL — full content)[/bold cyan]" - if toon_mode: + console.print(f"[bold green]Analyzing '{target}'...[/bold green]{mode_label}") + + # --- Formatter helpers --- + # In light mode usiamo lo stesso formato compatto di TOON per leggibilità + if light_mode: def fmt_header(title): return f"### {title}\n\n" def fmt_file_start(path, icon=""): return f"> FILE: {icon}{path}\n" def fmt_file_end(path): return "\n" - def fmt_separator(): return "" + def fmt_separator(): return "" else: def fmt_header(title): return f"{'='*80}\n### {title} ###\n{'='*80}\n\n" def fmt_file_start(path, icon=""): return f"--- START OF FILE: {icon}{path} ---\n\n" @@ -342,10 +358,15 @@ def fmt_separator(): return "-" * 40 + "\n\n" try: with open(output, "w", encoding="utf-8") as outfile: + + # ---------------------------------------------------------------- + # CASO 1: singolo file + # ---------------------------------------------------------------- if os.path.isfile(target): filename = os.path.basename(target) is_db = is_sqlite_database(target) outfile.write(f"# Analysis: {filename}\n\n") + if is_db: schema = get_database_schema(target) focused_tables = extract_focused_tables(target, active_focus_patterns) @@ -353,10 +374,10 @@ def fmt_separator(): return "-" * 40 + "\n\n" fnmatch.fnmatch(filename, p) or p in filename for p in active_focus_patterns )) outfile.write(fmt_header("DATABASE SCHEMA")) - if toon_mode and not is_focused: - outfile.write(generate_toon_representation(target, "")) + if light_mode and not is_focused: + outfile.write(generate_light_representation(target, "")) elif focused_tables: - outfile.write(generate_database_focused(target, focused_tables)) + outfile.write(generate_database_focused(target, focused_tables)) else: outfile.write(generate_database_context_full(schema, filename)) else: @@ -367,26 +388,31 @@ def fmt_separator(): return "-" * 40 + "\n\n" outfile.write("\n\n") outfile.write(fmt_header("CONTENT")) outfile.write(fmt_file_start(filename)) - if toon_mode: outfile.write(generate_toon_representation(target, content)) - else: outfile.write(content) + if light_mode: + outfile.write(generate_light_representation(target, content)) + else: + outfile.write(content) outfile.write(fmt_file_end(filename)) + # ---------------------------------------------------------------- + # CASO 2: directory + # ---------------------------------------------------------------- elif os.path.isdir(target): config = load_config(target) outfile.write(f"# Project Context: {os.path.basename(os.path.abspath(target))}\n\n") outfile.write(fmt_header("PROJECT STRUCTURE")) - + tree_str, total_bytes, total_tokens = generate_directory_tree(target, config, abs_output_path) outfile.write(f"> Total Size: {total_bytes/1024:.2f} KB | Est. Tokens: ~{total_tokens:,}\n") outfile.write(tree_str) outfile.write("\n\n") - if include_all or toon_mode or active_focus_patterns: + if include_all or light_mode or active_focus_patterns: section_title = "FILE CONTENTS" - if toon_mode: section_title += " (TOON)" + if light_mode: section_title += " (LIGHT — signatures only)" outfile.write(fmt_header(section_title)) files = get_all_significant_files(target, config, abs_output_path) - + with Progress(console=console) as progress: task = progress.add_task("[cyan]Processing...", total=len(files)) for fpath in files: @@ -397,38 +423,50 @@ def fmt_separator(): return "-" * 40 + "\n\n" if is_db: focused_tables = extract_focused_tables(fpath, active_focus_patterns) if focused_tables: is_in_focus = True + + # Logica di decisione: + # - --all: sempre full content + # - --light: light per tutto, full per file in focus + # - --focus senza altri flag: light/skip per tutto, full per file in focus should_write_full = include_all or is_in_focus - should_write_toon = toon_mode and not should_write_full - if not should_write_full and not should_write_toon: + should_write_light = light_mode and not should_write_full + + if not should_write_full and not should_write_light: progress.update(task, advance=1) continue progress.update(task, advance=1, description=f"[cyan]{rel_path}[/cyan]") - marker = " [FOCUSED]" if (is_in_focus and toon_mode) else "" - icon = "üóÑÔ∏è " if is_db else "" + marker = " [FOCUSED]" if (is_in_focus and light_mode) else "" + icon = "🗄️ " if is_db else "" outfile.write(fmt_file_start(rel_path + marker, icon)) - + if is_db: if should_write_full: - if focused_tables: outfile.write(generate_database_focused(fpath, focused_tables)) + if focused_tables: + outfile.write(generate_database_focused(fpath, focused_tables)) else: schema = get_database_schema(fpath) outfile.write(generate_database_context_full(schema, os.path.basename(fpath))) - else: outfile.write(generate_toon_representation(fpath, "")) + else: + outfile.write(generate_light_representation(fpath, "")) else: content = read_file_content(fpath) - if should_write_full: outfile.write(content) - elif should_write_toon: outfile.write(generate_toon_representation(fpath, content)) + if should_write_full: + outfile.write(content) + elif should_write_light: + outfile.write(generate_light_representation(fpath, content)) + outfile.write(fmt_file_end(rel_path)) outfile.write(fmt_separator()) else: - console.print("[dim]Directory tree generated. Use --toon, --all, or --focus for content.[/dim]") + console.print("[dim]Directory tree generated. Use --light, --all, or --focus for content.[/dim]") - console.print(f"\n[bold green]✓ SUCCESS[/bold green]: Context created in [cyan]'{output}'[/cyan]") + console.print(f"\n[bold green]✔ SUCCESS[/bold green]: Context created in [cyan]'{output}'[/cyan]") except Exception as e: console.print(f"\n[bold red]Error:[/bold red] {e}") raise typer.Exit(code=1) + if __name__ == "__main__": app() \ No newline at end of file diff --git a/src/deepbase/toon.py b/src/deepbase/toon.py index f53f03a..7f002e5 100644 --- a/src/deepbase/toon.py +++ b/src/deepbase/toon.py @@ -1,4 +1,4 @@ -# src/deepbase/toon.py (AGGIORNAMENTO) +# src/deepbase/toon.py import ast import os @@ -7,13 +7,17 @@ # Import database handling from deepbase.database import ( - get_database_schema, + get_database_schema, generate_database_context_toon, generate_database_context_hybrid, is_sqlite_database ) +# --------------------------------------------------------------------------- +# TOON VISITOR — mantiene classi + firme + docstring (comportamento originale) +# --------------------------------------------------------------------------- + class ToonVisitor(ast.NodeVisitor): def __init__(self): self.output = [] @@ -27,13 +31,13 @@ def visit_ClassDef(self, node): bases = [b.id for b in node.bases if isinstance(b, ast.Name)] base_str = f"({', '.join(bases)})" if bases else "" self._log(f"C: {node.name}{base_str}") - + self.indent_level += 1 docstring = ast.get_docstring(node) if docstring: short_doc = docstring.split('\n')[0].strip() self._log(f"\"\"\"{short_doc}...\"\"\"") - + self.generic_visit(node) self.indent_level -= 1 @@ -47,19 +51,19 @@ def _handle_function(self, node, is_async=False): args = [arg.arg for arg in node.args.args] args_str = ", ".join(args) prefix = "async " if is_async else "" - + ret_anno = "" if node.returns: try: if isinstance(node.returns, ast.Name): ret_anno = f" -> {node.returns.id}" elif isinstance(node.returns, ast.Constant): - ret_anno = f" -> {node.returns.value}" - except: + ret_anno = f" -> {node.returns.value}" + except Exception: pass self._log(f"{prefix}F: {node.name}({args_str}){ret_anno}") - + docstring = ast.get_docstring(node) if docstring: self.indent_level += 1 @@ -73,7 +77,112 @@ def generic_visit(self, node): self.visit(child) -# --- Gestori per file Non-Python --- +# --------------------------------------------------------------------------- +# LIGHT VISITOR — solo firme Python (def/async def), niente classi né docstring +# --------------------------------------------------------------------------- + +class LightVisitor(ast.NodeVisitor): + """ + Visita l'AST e produce SOLO le firme dei metodi/funzioni Python, + preservando la corretta indentazione per classi nidificate. + Non include docstring, decoratori o corpo della funzione. + """ + + def __init__(self): + self.output = [] + self.indent_level = 0 + + def _log(self, text): + indent = " " * self.indent_level + self.output.append(f"{indent}{text}") + + # Entra nelle classi per mantenere la gerarchia, ma non le stampa + def visit_ClassDef(self, node): + self._log(f"class {node.name}:") + self.indent_level += 1 + self.generic_visit(node) + self.indent_level -= 1 + + def visit_FunctionDef(self, node): + self._emit_signature(node, is_async=False) + + def visit_AsyncFunctionDef(self, node): + self._emit_signature(node, is_async=True) + + def _emit_signature(self, node, is_async: bool): + """Emette la firma completa della funzione/metodo in stile Python.""" + prefix = "async " if is_async else "" + + # --- Argomenti con annotazioni di tipo --- + args_parts = [] + + # Calcola l'offset per i default (i default si applicano agli ultimi N args) + all_args = node.args.args + defaults = node.args.defaults + defaults_offset = len(all_args) - len(defaults) + + for i, arg in enumerate(all_args): + arg_str = arg.arg + if arg.annotation: + arg_str += f": {ast.unparse(arg.annotation)}" + # Default value + default_idx = i - defaults_offset + if default_idx >= 0: + default_val = ast.unparse(defaults[default_idx]) + arg_str += f" = {default_val}" + args_parts.append(arg_str) + + # *args + if node.args.vararg: + va = node.args.vararg + va_str = f"*{va.arg}" + if va.annotation: + va_str += f": {ast.unparse(va.annotation)}" + args_parts.append(va_str) + + # keyword-only args + kwonly_defaults = { + i: node.args.kw_defaults[i] + for i in range(len(node.args.kwonlyargs)) + if node.args.kw_defaults[i] is not None + } + for i, kwarg in enumerate(node.args.kwonlyargs): + kw_str = kwarg.arg + if kwarg.annotation: + kw_str += f": {ast.unparse(kwarg.annotation)}" + if i in kwonly_defaults: + kw_str += f" = {ast.unparse(kwonly_defaults[i])}" + args_parts.append(kw_str) + + # **kwargs + if node.args.kwarg: + kwa = node.args.kwarg + kwa_str = f"**{kwa.arg}" + if kwa.annotation: + kwa_str += f": {ast.unparse(kwa.annotation)}" + args_parts.append(kwa_str) + + args_str = ", ".join(args_parts) + + # --- Tipo di ritorno --- + ret_anno = "" + if node.returns: + try: + ret_anno = f" -> {ast.unparse(node.returns)}" + except Exception: + pass + + self._log(f"{prefix}def {node.name}({args_str}){ret_anno}: ...") + + def generic_visit(self, node): + for child in ast.iter_child_nodes(node): + if isinstance(child, (ast.ClassDef, ast.FunctionDef, ast.AsyncFunctionDef)): + self.visit(child) + + +# --------------------------------------------------------------------------- +# Gestori per file Non-Python +# --------------------------------------------------------------------------- def _handle_markdown(content: str) -> str: """Estrae solo gli header Markdown.""" @@ -93,11 +202,8 @@ def _handle_toml_ini(content: str) -> str: clean = line.strip() if not clean or clean.startswith("#"): continue - - # Mantiene le sezioni [Project] if clean.startswith("[") and clean.endswith("]"): lines.append(clean) - # Mantiene le chiavi (key = value), semplificando il valore elif "=" in clean: key = clean.split("=")[0].strip() lines.append(f"{key} = ...") @@ -120,7 +226,7 @@ def _handle_json_structure(content: str) -> str: lines.append("}") return "\n".join(lines) return "(JSON Array or Scalar)" - except: + except Exception: return "(Invalid JSON content)" @@ -129,10 +235,8 @@ def _handle_minified_config(content: str) -> str: lines = [] for line in content.splitlines(): clean = line.strip() - # Ignora righe vuote e commenti if clean and not clean.startswith("#"): lines.append(clean) - if not lines: return "(Empty or comments-only file)" return "\n".join(lines) @@ -144,30 +248,25 @@ def _handle_latex_structure(content: str) -> str: Rimuove il testo semplice. """ keep_patterns = [ - r'^\s*\\documentclass', # Tipo documento - r'^\s*\\usepackage', # Dipendenze - r'^\s*\\input', # Inclusioni file - r'^\s*\\include', # Inclusioni file - r'^\s*\\(part|chapter|section|subsection|subsubsection)', # Struttura - r'^\s*\\begin', # Inizio blocchi (figure, table, document) - r'^\s*\\end', # Fine blocchi + r'^\s*\\documentclass', + r'^\s*\\usepackage', + r'^\s*\\input', + r'^\s*\\include', + r'^\s*\\(part|chapter|section|subsection|subsubsection)', + r'^\s*\\begin', + r'^\s*\\end', r'^\s*\\title', r'^\s*\\author', r'^\s*\\date' ] - combined_pattern = re.compile('|'.join(keep_patterns)) lines = [] - for line in content.splitlines(): - # Rimuove commenti line = line.split('%')[0].rstrip() if combined_pattern.match(line): lines.append(line) - if not lines: return "(LaTeX content empty or purely textual)" - return "\n".join(lines) @@ -182,19 +281,22 @@ def _handle_database_toon(file_path: str) -> str: return "(Not a valid SQLite database)" +# --------------------------------------------------------------------------- +# Funzione pubblica principale — TOON (skeleton completo) +# --------------------------------------------------------------------------- + def generate_toon_representation(file_path: str, content: str) -> str: """ Genera una rappresentazione TOON (Token Oriented) in base al tipo di file. + Include classi, firme e docstring. """ _, ext = os.path.splitext(file_path) filename = os.path.basename(file_path) ext = ext.lower() - # 0. DATABASE (check prima per magic bytes, indipendentemente dall'estensione) if is_sqlite_database(file_path): return _handle_database_toon(file_path) - # 1. PYTHON if ext == ".py": try: tree = ast.parse(content) @@ -203,42 +305,31 @@ def generate_toon_representation(file_path: str, content: str) -> str: return "\n".join(visitor.output) except SyntaxError: return f"(Syntax Error parsing {filename})" - - # 2. MARKDOWN (Documentazione) + elif ext in [".md", ".markdown"]: return _handle_markdown(content) - - # --- 2.5 LATEX --- + elif ext in [".tex", ".sty", ".cls"]: return _handle_latex_structure(content) - # 3. CONFIGURAZIONE STRUTTURATA (TOML, INI, CFG) elif ext in [".toml", ".ini", ".cfg"]: return _handle_toml_ini(content) - # 4. DATI (JSON) elif ext == ".json": return _handle_json_structure(content) - # 5. CONFIGURAZIONE A LISTA (.gitignore, requirements.txt, .env) - # Lista di file noti per essere liste di regole elif ext in [".txt", ".dockerignore", ".gitignore"] or filename in [".gitignore", ".dockerignore", "Dockerfile", "Makefile"]: return _handle_minified_config(content) - # 6. YAML (Struttura semplice basata su indentazione) elif ext in [".yml", ".yaml"]: - # Per YAML facciamo un filtro semplice regex per mostrare solo le chiavi lines = [line for line in content.splitlines() if ":" in line and not line.strip().startswith("#")] - # Semplificazione brutale: mostra solo le chiavi clean_lines = [] for l in lines: key = l.split(":")[0] clean_lines.append(f"{key}:") return "\n".join(clean_lines) - # 7. DEFAULT: Fallback minificato (o troncato) else: - # Se non conosciamo il file, mostriamo le prime 5 righe minificate come anteprima minified = _handle_minified_config(content) lines = minified.splitlines() if len(lines) > 10: @@ -246,18 +337,59 @@ def generate_toon_representation(file_path: str, content: str) -> str: return minified +# --------------------------------------------------------------------------- +# Funzione pubblica principale — LIGHT (solo firme) +# --------------------------------------------------------------------------- + +def generate_light_representation(file_path: str, content: str) -> str: + """ + Genera una rappresentazione LIGHT: solo le firme dei metodi/funzioni. + Per file Python: usa LightVisitor (def/async def con tipi, niente corpo). + Per altri tipi di file: delega alla rappresentazione TOON standard, + perché per file non-Python non c'è distinzione tra "firma" e "scheletro". + """ + _, ext = os.path.splitext(file_path) + filename = os.path.basename(file_path) + ext = ext.lower() + + # DATABASE: stessa logica TOON (schema compatto) + if is_sqlite_database(file_path): + return _handle_database_toon(file_path) + + # PYTHON: usa il LightVisitor per le sole firme + if ext == ".py": + try: + tree = ast.parse(content) + visitor = LightVisitor() + visitor.visit(tree) + result = "\n".join(visitor.output) + return result if result.strip() else f"(No functions or classes found in {filename})" + except SyntaxError: + return f"(Syntax Error parsing {filename})" + + # Tutti gli altri tipi: delega al TOON standard + # (markdown → headers, toml → chiavi, json → struttura, ecc.) + return generate_toon_representation(file_path, content) + + +# --------------------------------------------------------------------------- +# Helper per database in focus mode (usato da main.py) +# --------------------------------------------------------------------------- + def generate_database_focused(file_path: str, focused_tables: list = None) -> str: """ Generate database context with specific tables in full detail. Used when database is in focus mode. """ + from deepbase.database import generate_database_context_full, generate_database_context_hybrid + if not is_sqlite_database(file_path): return "(Not a valid SQLite database)" - + try: schema = get_database_schema(file_path) db_name = os.path.basename(file_path) - + if focused_tables: return generate_database_context_hybrid(schema, db_name, focused_tables) else: From 521d16ecb9b5f2ca4be90731d41d6bff1e1aefac Mon Sep 17 00:00:00 2001 From: Giuliano Ranauro Date: Thu, 12 Feb 2026 16:28:01 +0100 Subject: [PATCH 2/9] updated help view and working light mode for python --- src/deepbase/main.py | 176 ++++++++++++++++++++++++++++++++++++------- src/deepbase/toon.py | 94 ++++++++++++++++++++--- 2 files changed, 233 insertions(+), 37 deletions(-) diff --git a/src/deepbase/main.py b/src/deepbase/main.py index ef2a686..fea643a 100644 --- a/src/deepbase/main.py +++ b/src/deepbase/main.py @@ -15,6 +15,9 @@ from deepbase.parsers import get_document_structure from deepbase.database import is_sqlite_database, get_database_schema, generate_database_context_full +from rich.table import Table +from rich.panel import Panel + # --- CONFIGURAZIONI --- DEFAULT_CONFIG = { @@ -39,20 +42,105 @@ } } -EPILOG_TEXT = """ -[bold]Documentation:[/] https://follen99.github.io/DeepBase/ -[bold]Repository:[/] https://github.com/follen99/DeepBase -[bold]Issues:[/] https://github.com/follen99/DeepBase/issues -[bold]PyPI:[/] https://pypi.org/project/deepbase/ - -[italic]DeepBase scans your project and consolidates it for LLM context analysis.[/italic] +LIGHT_MODE_NOTICE = """> **[LIGHT MODE]** Questo file è stato generato in modalità risparmio token: vengono incluse solo le firme dei metodi/funzioni e i commenti iniziali dei file. Il corpo del codice è omesso. Se hai bisogno di approfondire un file, una classe o un metodo specifico, chiedi all'utente di fornire la porzione di codice completa. """ +# EPILOG_TEXT = """ +# [bold]Configuration (.deepbase.toml):[/bold] +# Create a [cyan].deepbase.toml[/cyan] in your project root to customize behavior: + +# [dim]# Ignore additional directories[/dim] +# ignore_dirs = ["my_assets", "experimental", ".cache"] + +# [dim]# Ignore specific files[/dim] +# ignore_files = ["*.log", "secrets.env"] + +# [dim]# Add extra file extensions to include[/dim] +# significant_extensions = [".cfg", "Makefile", ".tsx", ".vue"] + +# [bold]Documentation:[/] https://follen99.github.io/DeepBase/ +# [bold]Repository:[/] https://github.com/follen99/DeepBase +# [bold]Issues:[/] https://github.com/follen99/DeepBase/issues +# [bold]PyPI:[/] https://pypi.org/project/deepbase/ + +# [italic]DeepBase scans your project and consolidates it for LLM context analysis.[/italic] +# """ + app = typer.Typer( name="deepbase", add_completion=False, rich_markup_mode="rich" ) + +@app.callback(invoke_without_command=True) +def main_callback( + ctx: typer.Context, + help: bool = typer.Option(False, "--help", "-h", is_eager=True, help="Show this help message and exit.") +): + if help or ctx.invoked_subcommand is None: + console.print(Panel.fit( + "[bold cyan]DeepBase[/bold cyan] — Consolidate project context for LLMs\n\n" + "[bold]Usage:[/bold] [green]deepbase[/green] [OPTIONS] [TARGET]\n\n" + "[bold]Arguments:[/bold]\n" + " [cyan]TARGET[/cyan] The file or directory to scan [dim][default: current dir][/dim]\n", + title="DeepBase v1.7.0", border_style="cyan" + )) + + # Options Table + options_table = Table(show_header=False, box=None, padding=(0, 2)) + options_table.add_column(style="cyan", no_wrap=True) + options_table.add_column(style="green", no_wrap=True) + options_table.add_column() + + options = [ + ("-v, --version", "", "Show version and exit"), + ("-o, --output", "TEXT", "Output file [dim][default: llm_context.md][/dim]"), + ("-V, --verbose", "", "Show detailed output"), + ("-a, --all", "", "Include full content of ALL files"), + ("-l, --light", "", "Token-saving mode (signatures only)"), + ("-f, --focus", "TEXT", "Pattern to focus on (repeatable)"), + ("-ff, --focus-file", "TEXT", "Path to focus patterns file"), + ("-h, --help", "", "Show this message and exit"), + ] + for opt, meta, desc in options: + options_table.add_row(opt, meta, desc) + + console.print(Panel(options_table, title="Options", border_style="green", title_align="left")) + + # Configuration Panel (soluzione semplice) + config_content = """Create a [cyan].deepbase.toml[/cyan] in your project root: + +[dim]# Ignore additional directories[/dim] +[yellow]ignore_dirs = ["my_assets", "experimental"][/yellow] + +[dim]# Ignore specific files[/dim] +[yellow]ignore_files = ["*.log", "secrets.env"][/yellow] + +[dim]# Add extra file extensions[/dim] +[yellow]significant_extensions = [".cfg", "Makefile", ".tsx"][/yellow]""" + + console.print(Panel( + config_content, + title="Configuration (.deepbase.toml)", + border_style="yellow", + title_align="left" + )) + + # Links Table + links_table = Table(show_header=False, box=None, padding=(0, 2)) + links_table.add_column(style="bold") + links_table.add_column(style="blue") + + links_table.add_row("Documentation:", "https://follen99.github.io/DeepBase/") + links_table.add_row("Repository:", "https://github.com/follen99/DeepBase") + links_table.add_row("Issues:", "https://github.com/follen99/DeepBase/issues") + links_table.add_row("PyPI:", "https://pypi.org/project/deepbase/") + + console.print(Panel(links_table, title="Links", border_style="blue", title_align="left")) + + raise typer.Exit() + + console = Console() # --- UTILS --- @@ -87,6 +175,16 @@ def estimate_tokens(size_bytes: int) -> str: return f"~{tokens/1000000:.1f}M t" +def estimate_tokens_for_content(text: str) -> int: + """Stima i token di una stringa già processata (non del file raw).""" + return math.ceil(len(text.encode("utf-8")) / 4) + +def calculate_light_tokens(file_path: str, content: str) -> int: + """Calcola i token della rappresentazione light di un file.""" + from deepbase.toon import generate_light_representation + light_repr = generate_light_representation(file_path, content) + return estimate_tokens_for_content(light_repr) + def is_significant_file(file_path: str, config: Dict[str, Any], output_file_abs: str = None) -> bool: file_name = os.path.basename(file_path) @@ -117,7 +215,7 @@ def is_significant_file(file_path: str, config: Dict[str, Any], output_file_abs: return False -def calculate_project_stats(root_dir: str, config: Dict[str, Any], output_file_abs: str) -> int: +def calculate_project_stats(root_dir: str, config: Dict[str, Any], output_file_abs: str, light_mode: bool = False) -> int: total_size = 0 ignore_dirs = config["ignore_dirs"] for dirpath, dirnames, filenames in os.walk(root_dir, topdown=True): @@ -126,7 +224,12 @@ def calculate_project_stats(root_dir: str, config: Dict[str, Any], output_file_a fpath = os.path.join(dirpath, f) if is_significant_file(fpath, config, output_file_abs): try: - total_size += os.path.getsize(fpath) + if light_mode and not is_sqlite_database(fpath): + content = read_file_content(fpath) + light_repr = generate_light_representation(fpath, content) + total_size += len(light_repr.encode("utf-8")) + else: + total_size += os.path.getsize(fpath) except OSError: pass return total_size @@ -139,7 +242,8 @@ def _generate_tree_recursive( prefix: str, config: Dict[str, Any], total_project_size: int, - output_file_abs: str + output_file_abs: str, + light_mode: bool = False # serve per decidere se includere o meno le stime token nei file (in light mode non sono affidabili, meglio ometterle ) -> Tuple[str, int]: """ Ritorna una tupla: (stringa_visuale_albero, dimensione_totale_bytes_subtree). @@ -193,27 +297,27 @@ def _generate_tree_recursive( else: icon = "🗄️ " if is_sqlite_database(full_path) else "📄 " try: - size = os.path.getsize(full_path) + raw_size = os.path.getsize(full_path) + if light_mode and not is_sqlite_database(full_path): + # In light mode, calcola i token dalla rappresentazione light + content = read_file_content(full_path) + light_repr = generate_light_representation(full_path, content) + size = len(light_repr.encode("utf-8")) + else: + size = raw_size subtree_size += size - - file_stats = "" - if total_project_size > 0 and size > 0: - percent = (size / total_project_size) * 100 - token_est = estimate_tokens(size) - file_stats = f" ({percent:.1f}% | {token_est})" - - output_str += f"{prefix}{connector}{icon}{name}{file_stats}\n" except OSError: pass return output_str, subtree_size -def generate_directory_tree(root_dir: str, config: Dict[str, Any], output_file_abs: str) -> Tuple[str, int, int]: +def generate_directory_tree(root_dir: str, config: Dict[str, Any], output_file_abs: str, light_mode: bool = False) -> Tuple[str, int, int]: abs_root = os.path.abspath(root_dir) - total_size = calculate_project_stats(root_dir, config, output_file_abs) + # Per il totale in light mode, dovremmo idealmente processare tutto, ma per performance usiamo stima + total_size = calculate_project_stats(root_dir, config, output_file_abs, light_mode) - tree_body, _ = _generate_tree_recursive(root_dir, "", config, total_size, output_file_abs) + tree_body, _ = _generate_tree_recursive(root_dir, "", config, total_size, output_file_abs, light_mode) header = f"📁 {os.path.basename(abs_root) or '.'}/\n" total_tokens_est = math.ceil(total_size / 4) @@ -299,7 +403,8 @@ def version_callback(value: bool): raise typer.Exit() -@app.command(epilog=EPILOG_TEXT, rich_help_panel="Main Commands") +# Assenza di epilog +@app.command(rich_help_panel="Main Commands") def create( target: str = typer.Argument(None, help="The file or directory to scan."), version: Optional[bool] = typer.Option(None, "--version", "-v", callback=version_callback, is_eager=True, help="Show version and exit."), @@ -366,6 +471,8 @@ def fmt_separator(): return "-" * 40 + "\n\n" filename = os.path.basename(target) is_db = is_sqlite_database(target) outfile.write(f"# Analysis: {filename}\n\n") + if light_mode: + outfile.write(LIGHT_MODE_NOTICE + "\n") if is_db: schema = get_database_schema(target) @@ -400,10 +507,17 @@ def fmt_separator(): return "-" * 40 + "\n\n" elif os.path.isdir(target): config = load_config(target) outfile.write(f"# Project Context: {os.path.basename(os.path.abspath(target))}\n\n") + if light_mode: + outfile.write(LIGHT_MODE_NOTICE + "\n") outfile.write(fmt_header("PROJECT STRUCTURE")) - tree_str, total_bytes, total_tokens = generate_directory_tree(target, config, abs_output_path) - outfile.write(f"> Total Size: {total_bytes/1024:.2f} KB | Est. Tokens: ~{total_tokens:,}\n") + tree_str, total_bytes, total_tokens = generate_directory_tree(target, config, abs_output_path, light_mode=light_mode) + + if light_mode: + outfile.write(f"> Total Size (raw): {total_bytes/1024:.2f} KB | Est. Tokens (light): ~{total_tokens:,}\n") + else: + outfile.write(f"> Total Size: {total_bytes/1024:.2f} KB | Est. Tokens: ~{total_tokens:,}\n") + outfile.write(tree_str) outfile.write("\n\n") @@ -454,7 +568,17 @@ def fmt_separator(): return "-" * 40 + "\n\n" if should_write_full: outfile.write(content) elif should_write_light: - outfile.write(generate_light_representation(fpath, content)) + light_output = generate_light_representation(fpath, content) + outfile.write(light_output) + + # commento perchè viene mostrato in alto + # annotazione token reali dopo l'elaborazione + + # light_tokens = estimate_tokens_for_content(light_output) + # raw_tokens = math.ceil(os.path.getsize(fpath) / 4) + # savings = raw_tokens - light_tokens + # if savings > 0: + # outfile.write(f"\n\n") outfile.write(fmt_file_end(rel_path)) outfile.write(fmt_separator()) diff --git a/src/deepbase/toon.py b/src/deepbase/toon.py index 7f002e5..d23618f 100644 --- a/src/deepbase/toon.py +++ b/src/deepbase/toon.py @@ -78,14 +78,65 @@ def generic_visit(self, node): # --------------------------------------------------------------------------- -# LIGHT VISITOR — solo firme Python (def/async def), niente classi né docstring +# LIGHT VISITOR — solo firme Python con docstring/commenti iniziali # --------------------------------------------------------------------------- +def _extract_module_comments(source: str) -> str: + """ + Estrae i commenti # e la docstring di modulo dalle prime righe del sorgente. + Si ferma al primo costrutto non-commento e non-docstring. + """ + lines = [] + in_docstring = False + docstring_char = None + source_lines = source.splitlines() + + for line in source_lines: + stripped = line.strip() + + # Riga vuota: la includiamo solo se siamo già dentro i commenti iniziali + if not stripped: + if lines: + lines.append("") + continue + + # Commenti # semplici + if stripped.startswith("#") and not in_docstring: + lines.append(line.rstrip()) + continue + + # Inizio docstring di modulo (""" o ''') + if not in_docstring and (stripped.startswith('"""') or stripped.startswith("'''")): + docstring_char = stripped[:3] + in_docstring = True + lines.append(line.rstrip()) + # Docstring su singola riga + rest = stripped[3:] + if rest.endswith(docstring_char) and len(rest) >= 3: + in_docstring = False + continue + + if in_docstring: + lines.append(line.rstrip()) + if stripped.endswith(docstring_char): + in_docstring = False + continue + + # Qualsiasi altra cosa: fine dell'header + break + + # Rimuovi trailing blank lines + while lines and not lines[-1].strip(): + lines.pop() + + return "\n".join(lines) + + class LightVisitor(ast.NodeVisitor): """ - Visita l'AST e produce SOLO le firme dei metodi/funzioni Python, + Visita l'AST e produce le firme dei metodi/funzioni Python, preservando la corretta indentazione per classi nidificate. - Non include docstring, decoratori o corpo della funzione. + Include la prima riga di docstring di classi e funzioni come commento. """ def __init__(self): @@ -96,10 +147,16 @@ def _log(self, text): indent = " " * self.indent_level self.output.append(f"{indent}{text}") - # Entra nelle classi per mantenere la gerarchia, ma non le stampa def visit_ClassDef(self, node): self._log(f"class {node.name}:") self.indent_level += 1 + + # Docstring della classe (prima riga) + docstring = ast.get_docstring(node) + if docstring: + first_line = docstring.split('\n')[0].strip() + self._log(f'"""{first_line}"""') + self.generic_visit(node) self.indent_level -= 1 @@ -116,7 +173,6 @@ def _emit_signature(self, node, is_async: bool): # --- Argomenti con annotazioni di tipo --- args_parts = [] - # Calcola l'offset per i default (i default si applicano agli ultimi N args) all_args = node.args.args defaults = node.args.defaults defaults_offset = len(all_args) - len(defaults) @@ -125,7 +181,6 @@ def _emit_signature(self, node, is_async: bool): arg_str = arg.arg if arg.annotation: arg_str += f": {ast.unparse(arg.annotation)}" - # Default value default_idx = i - defaults_offset if default_idx >= 0: default_val = ast.unparse(defaults[default_idx]) @@ -174,6 +229,14 @@ def _emit_signature(self, node, is_async: bool): self._log(f"{prefix}def {node.name}({args_str}){ret_anno}: ...") + # Docstring della funzione (prima riga, indentata sotto la firma) + docstring = ast.get_docstring(node) + if docstring: + first_line = docstring.split('\n')[0].strip() + self.indent_level += 1 + self._log(f'"""{first_line}"""') + self.indent_level -= 1 + def generic_visit(self, node): for child in ast.iter_child_nodes(node): if isinstance(child, (ast.ClassDef, ast.FunctionDef, ast.AsyncFunctionDef)): @@ -344,7 +407,8 @@ def generate_toon_representation(file_path: str, content: str) -> str: def generate_light_representation(file_path: str, content: str) -> str: """ Genera una rappresentazione LIGHT: solo le firme dei metodi/funzioni. - Per file Python: usa LightVisitor (def/async def con tipi, niente corpo). + Per file Python: usa LightVisitor (def/async def con tipi, niente corpo) + preceduto dai commenti/docstring di modulo iniziali. Per altri tipi di file: delega alla rappresentazione TOON standard, perché per file non-Python non c'è distinzione tra "firma" e "scheletro". """ @@ -356,19 +420,27 @@ def generate_light_representation(file_path: str, content: str) -> str: if is_sqlite_database(file_path): return _handle_database_toon(file_path) - # PYTHON: usa il LightVisitor per le sole firme + # PYTHON: commenti di modulo + firme via LightVisitor if ext == ".py": try: tree = ast.parse(content) visitor = LightVisitor() visitor.visit(tree) - result = "\n".join(visitor.output) - return result if result.strip() else f"(No functions or classes found in {filename})" + signatures = "\n".join(visitor.output) + + # Prepend commenti/docstring iniziali del modulo (se presenti) + module_header = _extract_module_comments(content) + if module_header: + result = module_header + "\n\n" + signatures + else: + result = signatures + + return result.strip() or f"(No functions or classes found in {filename})" except SyntaxError: return f"(Syntax Error parsing {filename})" # Tutti gli altri tipi: delega al TOON standard - # (markdown → headers, toml → chiavi, json → struttura, ecc.) + # (markdown -> headers, toml -> chiavi, json -> struttura, ecc.) return generate_toon_representation(file_path, content) From 6de93c14dd763d1fe4d5a56ebbeeed7d62e58263 Mon Sep 17 00:00:00 2001 From: Giuliano Ranauro Date: Thu, 12 Feb 2026 22:11:51 +0100 Subject: [PATCH 3/9] fixing bug non veniva piu riconosciuto alcun comando al di fuori di --help Fix CLI argument parsing to support mixed options Refactored main.py to use `typer.run` instead of a command group. This resolves parsing errors when options follow the target argument (e.g., `deepbase . --all`) ensuring single-command behavior. --- src/deepbase/main.py | 217 +++++++++++++++---------------------------- 1 file changed, 74 insertions(+), 143 deletions(-) diff --git a/src/deepbase/main.py b/src/deepbase/main.py index fea643a..ebb66a1 100644 --- a/src/deepbase/main.py +++ b/src/deepbase/main.py @@ -45,102 +45,6 @@ LIGHT_MODE_NOTICE = """> **[LIGHT MODE]** Questo file è stato generato in modalità risparmio token: vengono incluse solo le firme dei metodi/funzioni e i commenti iniziali dei file. Il corpo del codice è omesso. Se hai bisogno di approfondire un file, una classe o un metodo specifico, chiedi all'utente di fornire la porzione di codice completa. """ -# EPILOG_TEXT = """ -# [bold]Configuration (.deepbase.toml):[/bold] -# Create a [cyan].deepbase.toml[/cyan] in your project root to customize behavior: - -# [dim]# Ignore additional directories[/dim] -# ignore_dirs = ["my_assets", "experimental", ".cache"] - -# [dim]# Ignore specific files[/dim] -# ignore_files = ["*.log", "secrets.env"] - -# [dim]# Add extra file extensions to include[/dim] -# significant_extensions = [".cfg", "Makefile", ".tsx", ".vue"] - -# [bold]Documentation:[/] https://follen99.github.io/DeepBase/ -# [bold]Repository:[/] https://github.com/follen99/DeepBase -# [bold]Issues:[/] https://github.com/follen99/DeepBase/issues -# [bold]PyPI:[/] https://pypi.org/project/deepbase/ - -# [italic]DeepBase scans your project and consolidates it for LLM context analysis.[/italic] -# """ - -app = typer.Typer( - name="deepbase", - add_completion=False, - rich_markup_mode="rich" -) - -@app.callback(invoke_without_command=True) -def main_callback( - ctx: typer.Context, - help: bool = typer.Option(False, "--help", "-h", is_eager=True, help="Show this help message and exit.") -): - if help or ctx.invoked_subcommand is None: - console.print(Panel.fit( - "[bold cyan]DeepBase[/bold cyan] — Consolidate project context for LLMs\n\n" - "[bold]Usage:[/bold] [green]deepbase[/green] [OPTIONS] [TARGET]\n\n" - "[bold]Arguments:[/bold]\n" - " [cyan]TARGET[/cyan] The file or directory to scan [dim][default: current dir][/dim]\n", - title="DeepBase v1.7.0", border_style="cyan" - )) - - # Options Table - options_table = Table(show_header=False, box=None, padding=(0, 2)) - options_table.add_column(style="cyan", no_wrap=True) - options_table.add_column(style="green", no_wrap=True) - options_table.add_column() - - options = [ - ("-v, --version", "", "Show version and exit"), - ("-o, --output", "TEXT", "Output file [dim][default: llm_context.md][/dim]"), - ("-V, --verbose", "", "Show detailed output"), - ("-a, --all", "", "Include full content of ALL files"), - ("-l, --light", "", "Token-saving mode (signatures only)"), - ("-f, --focus", "TEXT", "Pattern to focus on (repeatable)"), - ("-ff, --focus-file", "TEXT", "Path to focus patterns file"), - ("-h, --help", "", "Show this message and exit"), - ] - for opt, meta, desc in options: - options_table.add_row(opt, meta, desc) - - console.print(Panel(options_table, title="Options", border_style="green", title_align="left")) - - # Configuration Panel (soluzione semplice) - config_content = """Create a [cyan].deepbase.toml[/cyan] in your project root: - -[dim]# Ignore additional directories[/dim] -[yellow]ignore_dirs = ["my_assets", "experimental"][/yellow] - -[dim]# Ignore specific files[/dim] -[yellow]ignore_files = ["*.log", "secrets.env"][/yellow] - -[dim]# Add extra file extensions[/dim] -[yellow]significant_extensions = [".cfg", "Makefile", ".tsx"][/yellow]""" - - console.print(Panel( - config_content, - title="Configuration (.deepbase.toml)", - border_style="yellow", - title_align="left" - )) - - # Links Table - links_table = Table(show_header=False, box=None, padding=(0, 2)) - links_table.add_column(style="bold") - links_table.add_column(style="blue") - - links_table.add_row("Documentation:", "https://follen99.github.io/DeepBase/") - links_table.add_row("Repository:", "https://github.com/follen99/DeepBase") - links_table.add_row("Issues:", "https://github.com/follen99/DeepBase/issues") - links_table.add_row("PyPI:", "https://pypi.org/project/deepbase/") - - console.print(Panel(links_table, title="Links", border_style="blue", title_align="left")) - - raise typer.Exit() - - console = Console() # --- UTILS --- @@ -176,11 +80,9 @@ def estimate_tokens(size_bytes: int) -> str: def estimate_tokens_for_content(text: str) -> int: - """Stima i token di una stringa già processata (non del file raw).""" return math.ceil(len(text.encode("utf-8")) / 4) def calculate_light_tokens(file_path: str, content: str) -> int: - """Calcola i token della rappresentazione light di un file.""" from deepbase.toon import generate_light_representation light_repr = generate_light_representation(file_path, content) return estimate_tokens_for_content(light_repr) @@ -188,15 +90,12 @@ def calculate_light_tokens(file_path: str, content: str) -> int: def is_significant_file(file_path: str, config: Dict[str, Any], output_file_abs: str = None) -> bool: file_name = os.path.basename(file_path) - # Check 1: Esclusione del file di output corrente (tramite path assoluto) if output_file_abs and os.path.abspath(file_path) == output_file_abs: return False - # Check 2: Esclusione per nome (es: llm_context.md già esistente nella dir target) if output_file_abs and file_name == os.path.basename(output_file_abs): return False - # Check 3: Esclusione file spazzatura (lockfiles, etc) if file_name in config["ignore_files"]: return False @@ -243,11 +142,8 @@ def _generate_tree_recursive( config: Dict[str, Any], total_project_size: int, output_file_abs: str, - light_mode: bool = False # serve per decidere se includere o meno le stime token nei file (in light mode non sono affidabili, meglio ometterle + light_mode: bool = False ) -> Tuple[str, int]: - """ - Ritorna una tupla: (stringa_visuale_albero, dimensione_totale_bytes_subtree). - """ output_str = "" subtree_size = 0 @@ -299,7 +195,6 @@ def _generate_tree_recursive( try: raw_size = os.path.getsize(full_path) if light_mode and not is_sqlite_database(full_path): - # In light mode, calcola i token dalla rappresentazione light content = read_file_content(full_path) light_repr = generate_light_representation(full_path, content) size = len(light_repr.encode("utf-8")) @@ -314,11 +209,8 @@ def _generate_tree_recursive( def generate_directory_tree(root_dir: str, config: Dict[str, Any], output_file_abs: str, light_mode: bool = False) -> Tuple[str, int, int]: abs_root = os.path.abspath(root_dir) - # Per il totale in light mode, dovremmo idealmente processare tutto, ma per performance usiamo stima total_size = calculate_project_stats(root_dir, config, output_file_abs, light_mode) - tree_body, _ = _generate_tree_recursive(root_dir, "", config, total_size, output_file_abs, light_mode) - header = f"📁 {os.path.basename(abs_root) or '.'}/\n" total_tokens_est = math.ceil(total_size / 4) return header + tree_body, total_size, total_tokens_est @@ -403,30 +295,86 @@ def version_callback(value: bool): raise typer.Exit() -# Assenza di epilog -@app.command(rich_help_panel="Main Commands") -def create( +# --- LOGICA PRINCIPALE (SENZA CLASSE TYPER) --- + +def main( target: str = typer.Argument(None, help="The file or directory to scan."), + help: bool = typer.Option(False, "--help", "-h", is_eager=True, help="Show this help message and exit."), version: Optional[bool] = typer.Option(None, "--version", "-v", callback=version_callback, is_eager=True, help="Show version and exit."), output: str = typer.Option("llm_context.md", "--output", "-o", help="The output file."), verbose: bool = typer.Option(False, "--verbose", "-V", help="Show detailed output."), include_all: bool = typer.Option(False, "--all", "-a", help="Include full content of ALL files."), - light_mode: bool = typer.Option(False, "--light", "-l", help="Token-saving mode: includes only method/function signatures instead of full code."), - focus: Optional[List[str]] = typer.Option(None, "--focus", "-f", help="Pattern to focus on (full content for matching files)."), + light_mode: bool = typer.Option(False, "--light", "-l", help="Token-saving mode (signatures only)."), + focus: Optional[List[str]] = typer.Option(None, "--focus", "-f", help="Pattern to focus on (repeatable)."), focus_file: Optional[str] = typer.Option(None, "--focus-file", "-ff", help="Path to focus patterns file.") ): """ Analyzes a directory OR a single file. - Default: structure tree only. - Use [bold]--all[/bold] for full content, [bold]--light[/bold] for signatures only, - or [bold]--focus[/bold] / [bold]--focus-file[/bold] for hybrid mode. """ - if target is None: - console.print("[red]Error: Missing argument 'TARGET'.[/red]") - console.print(f"\n[bold]Usage:[/bold] deepbase [OPTIONS] [TARGET]\n\nTry [cyan]deepbase --help[/cyan] for more info.") - raise typer.Exit(code=1) + # 1. Custom HELP Logic + if help or target is None: + console.print(Panel.fit( + "[bold cyan]DeepBase[/bold cyan] — Consolidate project context for LLMs\n\n" + "[bold]Usage:[/bold] [green]deepbase[/green] [OPTIONS] [TARGET]\n\n" + "[bold]Arguments:[/bold]\n" + " [cyan]TARGET[/cyan] The file or directory to scan [dim][default: current dir][/dim]\n", + title="DeepBase v1.7.0", border_style="cyan" + )) + + # Options Table + options_table = Table(show_header=False, box=None, padding=(0, 2)) + options_table.add_column(style="cyan", no_wrap=True) + options_table.add_column(style="green", no_wrap=True) + options_table.add_column() + + options = [ + ("-v, --version", "", "Show version and exit"), + ("-o, --output", "TEXT", "Output file [dim][default: llm_context.md][/dim]"), + ("-V, --verbose", "", "Show detailed output"), + ("-a, --all", "", "Include full content of ALL files"), + ("-l, --light", "", "Token-saving mode (signatures only)"), + ("-f, --focus", "TEXT", "Pattern to focus on (repeatable)"), + ("-ff, --focus-file", "TEXT", "Path to focus patterns file"), + ("-h, --help", "", "Show this message and exit"), + ] + for opt, meta, desc in options: + options_table.add_row(opt, meta, desc) + + console.print(Panel(options_table, title="Options", border_style="green", title_align="left")) + + config_content = """Create a [cyan].deepbase.toml[/cyan] in your project root: + +[dim]# Ignore additional directories[/dim] +[yellow]ignore_dirs = ["my_assets", "experimental"][/yellow] + +[dim]# Ignore specific files[/dim] +[yellow]ignore_files = ["*.log", "secrets.env"][/yellow] + +[dim]# Add extra file extensions[/dim] +[yellow]significant_extensions = [".cfg", "Makefile", ".tsx"][/yellow]""" + + console.print(Panel( + config_content, + title="Configuration (.deepbase.toml)", + border_style="yellow", + title_align="left" + )) + + links_table = Table(show_header=False, box=None, padding=(0, 2)) + links_table.add_column(style="bold") + links_table.add_column(style="blue") + + links_table.add_row("Documentation:", "https://follen99.github.io/DeepBase/") + links_table.add_row("Repository:", "https://github.com/follen99/DeepBase") + links_table.add_row("Issues:", "https://github.com/follen99/DeepBase/issues") + links_table.add_row("PyPI:", "https://pypi.org/project/deepbase/") + + console.print(Panel(links_table, title="Links", border_style="blue", title_align="left")) + + raise typer.Exit() + # 2. Main Logic Start if not os.path.exists(target): console.print(f"[bold red]Error:[/bold red] Target not found: '{target}'") raise typer.Exit(code=1) @@ -448,8 +396,6 @@ def create( console.print(f"[bold green]Analyzing '{target}'...[/bold green]{mode_label}") - # --- Formatter helpers --- - # In light mode usiamo lo stesso formato compatto di TOON per leggibilità if light_mode: def fmt_header(title): return f"### {title}\n\n" def fmt_file_start(path, icon=""): return f"> FILE: {icon}{path}\n" @@ -463,10 +409,7 @@ def fmt_separator(): return "-" * 40 + "\n\n" try: with open(output, "w", encoding="utf-8") as outfile: - - # ---------------------------------------------------------------- - # CASO 1: singolo file - # ---------------------------------------------------------------- + # CASO 1: Singolo file if os.path.isfile(target): filename = os.path.basename(target) is_db = is_sqlite_database(target) @@ -501,9 +444,7 @@ def fmt_separator(): return "-" * 40 + "\n\n" outfile.write(content) outfile.write(fmt_file_end(filename)) - # ---------------------------------------------------------------- - # CASO 2: directory - # ---------------------------------------------------------------- + # CASO 2: Directory elif os.path.isdir(target): config = load_config(target) outfile.write(f"# Project Context: {os.path.basename(os.path.abspath(target))}\n\n") @@ -538,10 +479,6 @@ def fmt_separator(): return "-" * 40 + "\n\n" focused_tables = extract_focused_tables(fpath, active_focus_patterns) if focused_tables: is_in_focus = True - # Logica di decisione: - # - --all: sempre full content - # - --light: light per tutto, full per file in focus - # - --focus senza altri flag: light/skip per tutto, full per file in focus should_write_full = include_all or is_in_focus should_write_light = light_mode and not should_write_full @@ -570,15 +507,6 @@ def fmt_separator(): return "-" * 40 + "\n\n" elif should_write_light: light_output = generate_light_representation(fpath, content) outfile.write(light_output) - - # commento perchè viene mostrato in alto - # annotazione token reali dopo l'elaborazione - - # light_tokens = estimate_tokens_for_content(light_output) - # raw_tokens = math.ceil(os.path.getsize(fpath) / 4) - # savings = raw_tokens - light_tokens - # if savings > 0: - # outfile.write(f"\n\n") outfile.write(fmt_file_end(rel_path)) outfile.write(fmt_separator()) @@ -591,6 +519,9 @@ def fmt_separator(): return "-" * 40 + "\n\n" console.print(f"\n[bold red]Error:[/bold red] {e}") raise typer.Exit(code=1) +# Entry point che usa typer.run per gestire il comando come SINGOLO +def app(): + typer.run(main) if __name__ == "__main__": app() \ No newline at end of file From ff942f527802c978ef6df07c4474caa8956eff36 Mon Sep 17 00:00:00 2001 From: Giuliano Ranauro Date: Thu, 12 Feb 2026 22:26:08 +0100 Subject: [PATCH 4/9] Add comprehensive test suite for Python projects Introduced tests/test_suite_python.py covering: - Basic directory structure generation - Full content extraction (--all) - Light mode with signature parsing (--light) - Hybrid focus mode (full content for focused files, signatures for others) - TOML configuration loading and file ignoring - Custom output file paths (-o) - Error handling for invalid targets --- llm_context.md | 41 +++++++++ src/deepbase/main.py | 10 +++ test.txt | 42 +++++++++ tests/test_cli.py | 56 ------------ tests/test_parsers.py | 50 ----------- tests/test_suite_python.py | 175 +++++++++++++++++++++++++++++++++++++ 6 files changed, 268 insertions(+), 106 deletions(-) create mode 100644 llm_context.md create mode 100644 test.txt delete mode 100644 tests/test_cli.py delete mode 100644 tests/test_parsers.py create mode 100644 tests/test_suite_python.py diff --git a/llm_context.md b/llm_context.md new file mode 100644 index 0000000..5a8e7cf --- /dev/null +++ b/llm_context.md @@ -0,0 +1,41 @@ +# Project Context: test_toml_configuration0 + +================================================================================ +### PROJECT STRUCTURE ### +================================================================================ + +> Total Size: 0.19 KB | Est. Tokens: ~48 +📁 test_toml_configuration0/ +└── 📁 utils/ (12.0% | ~6t) + + +================================================================================ +### FILE CONTENTS ### +================================================================================ + +--- START OF FILE: main.py --- + + +import os + +def hello_world(): + print("Hello content") + return True + +class MyClass: + def method_one(self): + # This is a comment inside + return 1 + + +--- END OF FILE: main.py --- +---------------------------------------- + +--- START OF FILE: utils/helper.py --- + +def help_me(): + pass + +--- END OF FILE: utils/helper.py --- +---------------------------------------- + diff --git a/src/deepbase/main.py b/src/deepbase/main.py index ebb66a1..c68a236 100644 --- a/src/deepbase/main.py +++ b/src/deepbase/main.py @@ -201,6 +201,16 @@ def _generate_tree_recursive( else: size = raw_size subtree_size += size + + # [FIX] Ripristinate le righe mancanti per stampare il file nell'albero! + file_stats = "" + if total_project_size > 0 and size > 0: + percent = (size / total_project_size) * 100 + token_est = estimate_tokens(size) + file_stats = f" ({percent:.1f}% | {token_est})" + + output_str += f"{prefix}{connector}{icon}{name}{file_stats}\n" + except OSError: pass diff --git a/test.txt b/test.txt new file mode 100644 index 0000000..33e25f2 --- /dev/null +++ b/test.txt @@ -0,0 +1,42 @@ +============================= test session starts ============================== +platform linux -- Python 3.10.12, pytest-9.0.1, pluggy-1.6.0 +rootdir: /home/follen/Documents/git-local/DeepBase +configfile: pyproject.toml +collected 8 items + +tests/test_suite_python.py F....... [100%] + +=================================== FAILURES =================================== +_____________________ TestPythonSuite.test_basic_structure _____________________ + +self = +tmp_path = PosixPath('/tmp/pytest-of-follen/pytest-7/test_basic_structure0') + + def test_basic_structure(self, tmp_path): + """Testa che il comando base generi la struttura nel file.""" + self.create_dummy_python_project(tmp_path) + + output_file = tmp_path / "llm_context.md" + + # Passiamo esplicitamente l'output file nel tmp_path + result = runner.invoke(test_app, [str(tmp_path), "-o", str(output_file)]) + + assert result.exit_code == 0 + assert output_file.exists() + + content = output_file.read_text(encoding="utf-8") + + # Verifica presenza nell'albero (DENTRO IL FILE, non nello stdout) +> assert "main.py" in content +E AssertionError: assert 'main.py' in '# Project Context: test_basic_structure0\n\n=========================================================================...==============\n\n> Total Size: 0.20 KB | Est. Tokens: ~52\n📁 test_basic_structure0/\n└── 📁 utils/ (11.2% | ~6t)\n\n\n' + +tests/test_suite_python.py:66: AssertionError +=============================== warnings summary =============================== +.venv/lib/python3.10/site-packages/typer/main.py:306 + /home/follen/Documents/git-local/DeepBase/.venv/lib/python3.10/site-packages/typer/main.py:306: PytestCollectionWarning: cannot collect 'test_app' because it is not a function. + def __call__(self, *args: Any, **kwargs: Any) -> Any: + +-- Docs: https://docs.pytest.org/en/stable/how-to/capture-warnings.html +=========================== short test summary info ============================ +FAILED tests/test_suite_python.py::TestPythonSuite::test_basic_structure - As... +==================== 1 failed, 7 passed, 1 warning in 0.17s ==================== diff --git a/tests/test_cli.py b/tests/test_cli.py deleted file mode 100644 index 8f2d194..0000000 --- a/tests/test_cli.py +++ /dev/null @@ -1,56 +0,0 @@ -# tests/test_cli.py - -import os -from typer.testing import CliRunner -from deepbase.main import app - -runner = CliRunner() - -# ... (gli altri test sulle directory rimangono uguali) ... - -def test_cli_single_file_default(tmp_path): - """ - Testa che di default (senza -a) venga generata SOLO la struttura. - """ - single_file = tmp_path / "README.md" - unique_content_string = "Questo è il contenuto univoco del file." - single_file.write_text(f"# Intro\n{unique_content_string}\n## Usage", encoding="utf-8") - - output_file = tmp_path / "structure_only.md" - - result = runner.invoke(app, [str(single_file), "-o", str(output_file)]) - - assert result.exit_code == 0 - content = output_file.read_text(encoding="utf-8") - - # DEVE contenere la struttura - assert "# Intro" in content - assert "## Usage" in content - - # NON DEVE contenere il corpo del testo (perché non abbiamo passato -a) - # Nota: la regex dei parser estrae solo le linee con #, quindi la stringa di testo puro - # non dovrebbe apparire nell'output se stiamo stampando solo la structure section. - assert unique_content_string not in content - -def test_cli_single_file_with_all(tmp_path): - """ - Testa che con il flag --all venga generato ANCHE il contenuto. - """ - single_file = tmp_path / "DOCS.md" - unique_content_string = "Dettagli molto importanti." - single_file.write_text(f"# Title\n{unique_content_string}", encoding="utf-8") - - output_file = tmp_path / "full_context.md" - - # Passiamo il flag --all - result = runner.invoke(app, [str(single_file), "--all", "-o", str(output_file)]) - - assert result.exit_code == 0 - content = output_file.read_text(encoding="utf-8") - - # DEVE contenere la struttura - assert "# Title" in content - - # DEVE contenere ANCHE il corpo del testo - assert "--- START OF FILE: DOCS.md ---" in content - assert unique_content_string in content \ No newline at end of file diff --git a/tests/test_parsers.py b/tests/test_parsers.py deleted file mode 100644 index 3123906..0000000 --- a/tests/test_parsers.py +++ /dev/null @@ -1,50 +0,0 @@ -import pytest -from deepbase.parsers import extract_markdown_structure, get_document_structure - -def test_extract_markdown_structure_simple(): - """Testa l'estrazione corretta di header semplici.""" - content = """ -# Titolo Principale -Testo normale che deve essere ignorato. - -## Sottosezione -Altro testo. - -### Livello 3 - """ - expected = "# Titolo Principale\n## Sottosezione\n### Livello 3" - result = extract_markdown_structure(content) - assert result.strip() == expected - -def test_extract_markdown_structure_no_headers(): - """Testa un file markdown senza intestazioni.""" - content = "Solo testo semplice.\nNessun titolo qui." - result = extract_markdown_structure(content) - # FIX: Aggiunta la parola 'Markdown' per matchare l'output aggiornato - assert "Nessuna struttura Markdown rilevata" in result - -def test_extract_markdown_structure_complex(): - """Testa che il codice e i commenti non vengano confusi per header.""" - content = """ -# Header Reale - # Questo è codice, non un header - ## Header Reale 2 -""" - result = extract_markdown_structure(content) - # L'header indentato (codice) non deve apparire, o deve essere gestito come testo - # La regex attuale richiede che # sia all'inizio della riga (con whitespace opzionali) - assert "# Header Reale" in result - assert "## Header Reale 2" in result - # Nota: Se la tua regex permette spazi prima del #, verifica il comportamento desiderato - -def test_dispatcher_extensions(): - """Testa che il dispatcher scelga il parser giusto in base all'estensione.""" - content = "# Test" - - # Markdown extensions - assert get_document_structure("file.md", content) == "# Test" - assert get_document_structure("file.markdown", content) == "# Test" - - # Unsupported extensions (dovrebbe ritornare None o messaggio default) - assert get_document_structure("file.txt", content) is None - assert get_document_structure("script.py", content) is None \ No newline at end of file diff --git a/tests/test_suite_python.py b/tests/test_suite_python.py new file mode 100644 index 0000000..19651e9 --- /dev/null +++ b/tests/test_suite_python.py @@ -0,0 +1,175 @@ +# tests/test_suite_python.py + +import os +import typer +from typer.testing import CliRunner +from deepbase.main import main + +# Creiamo un'app Typer temporanea per il testing +test_app = typer.Typer() +test_app.command()(main) + +runner = CliRunner() + +class TestPythonSuite: + """ + Test suite dedicata all'analisi di progetti Python con DeepBase. + FIX: Specifica sempre l'output path esplicito per evitare FileNotFoundError. + FIX: Controlla il contenuto del file generato, non lo stdout, per la struttura. + """ + + def create_dummy_python_project(self, root): + """Helper per popolare una directory con file Python finti.""" + # 1. File principale + main_py = root / "main.py" + main_py.write_text(""" +import os + +def hello_world(): + print("Hello content") + return True + +class MyClass: + def method_one(self): + # This is a comment inside + return 1 +""", encoding="utf-8") + + # 2. Modulo utils + utils_dir = root / "utils" + utils_dir.mkdir() + (utils_dir / "helper.py").write_text("def help_me():\n pass", encoding="utf-8") + + # 3. File da ignorare (segreto) + (root / "secrets.py").write_text("API_KEY = '123'", encoding="utf-8") + + # 4. Cartella da ignorare (es. cache) + cache_dir = root / ".mypy_cache" + cache_dir.mkdir() + (cache_dir / "data.json").write_text("{}", encoding="utf-8") + + def test_basic_structure(self, tmp_path): + """Testa che il comando base generi la struttura nel file.""" + self.create_dummy_python_project(tmp_path) + + output_file = tmp_path / "llm_context.md" + + # Passiamo esplicitamente l'output file nel tmp_path + result = runner.invoke(test_app, [str(tmp_path), "-o", str(output_file)]) + + assert result.exit_code == 0 + assert output_file.exists() + + content = output_file.read_text(encoding="utf-8") + + # Verifica presenza nell'albero (DENTRO IL FILE, non nello stdout) + assert "main.py" in content + assert "utils/" in content + + # Verifica che il CONTENUTO del codice NON ci sia + assert "def hello_world" not in content + assert "import os" not in content + + def test_flag_all_content(self, tmp_path): + """Testa --all: deve includere tutto il codice.""" + self.create_dummy_python_project(tmp_path) + + output_file = tmp_path / "llm_context.md" + result = runner.invoke(test_app, [str(tmp_path), "--all", "-o", str(output_file)]) + + assert result.exit_code == 0 + content = output_file.read_text(encoding="utf-8") + + # Deve contenere il corpo delle funzioni + assert "print(\"Hello content\")" in content + assert "class MyClass:" in content + + def test_flag_light_mode(self, tmp_path): + """Testa --light: deve includere firme ma NON il corpo.""" + self.create_dummy_python_project(tmp_path) + + output_file = tmp_path / "llm_context.md" + result = runner.invoke(test_app, [str(tmp_path), "--light", "-o", str(output_file)]) + + assert result.exit_code == 0 + content = output_file.read_text(encoding="utf-8") + + # Deve contenere la notice Light Mode + assert "[LIGHT MODE]" in content + + # Deve contenere le firme (via AST parsing) + # Nota: controlliamo stringhe parziali per evitare problemi di formattazione spazi + assert "def hello_world" in content + assert "class MyClass:" in content + + # NON deve contenere il corpo del codice + assert "print(\"Hello content\")" not in content + assert "return 1" not in content + + def test_focus_mode_hybrid(self, tmp_path): + """Testa --focus combined (ibrido).""" + self.create_dummy_python_project(tmp_path) + + output_file = tmp_path / "llm_context.md" + # Focus solo su main.py + result = runner.invoke(test_app, [str(tmp_path), "--focus", "main.py", "-o", str(output_file)]) + + assert result.exit_code == 0 + content = output_file.read_text(encoding="utf-8") + + # main.py deve essere FULL + assert "--- START OF FILE: main.py ---" in content + assert "print(\"Hello content\")" in content + + # utils/helper.py NON era in focus, quindi non dovrebbe esserci il contenuto + assert "--- START OF FILE: utils/helper.py ---" not in content + + def test_focus_with_light_background(self, tmp_path): + """Testa --light insieme a --focus.""" + self.create_dummy_python_project(tmp_path) + + output_file = tmp_path / "llm_context.md" + # Focus su main.py, ma background --light + result = runner.invoke(test_app, [str(tmp_path), "--light", "--focus", "main.py", "-o", str(output_file)]) + + content = output_file.read_text(encoding="utf-8") + + # main.py FULL + assert "print(\"Hello content\")" in content + + # utils/helper.py LIGHT (deve esserci la firma) + assert "def help_me" in content + + def test_toml_configuration(self, tmp_path): + """Testa che .deepbase.toml venga letto e rispettato.""" + self.create_dummy_python_project(tmp_path) + + # Crea configurazione per ignorare "secrets.py" + toml_file = tmp_path / ".deepbase.toml" + toml_file.write_text('ignore_files = ["secrets.py"]', encoding="utf-8") + + output_file = tmp_path / "llm_context.md" + result = runner.invoke(test_app, [str(tmp_path), "--all", "-o", str(output_file)]) + + assert result.exit_code == 0 + content = output_file.read_text(encoding="utf-8") + + # secrets.py NON deve apparire + assert "secrets.py" not in content + assert "API_KEY" not in content + + def test_custom_output_path(self, tmp_path): + """Testa l'opzione -o per il file di output.""" + self.create_dummy_python_project(tmp_path) + + custom_out = tmp_path / "custom_analysis.txt" + result = runner.invoke(test_app, [str(tmp_path), "-o", str(custom_out)]) + + assert result.exit_code == 0 + assert custom_out.exists() + + def test_error_handling_invalid_path(self): + """Testa che il programma gestisca percorsi inesistenti.""" + result = runner.invoke(test_app, ["/percorso/inesistente/assoluto"]) + assert result.exit_code == 1 + assert "Target not found" in result.stdout \ No newline at end of file From 63d845ffa40d6f62fd3445aea389c5d0086bf716 Mon Sep 17 00:00:00 2001 From: Giuliano Ranauro Date: Thu, 12 Feb 2026 23:03:02 +0100 Subject: [PATCH 5/9] new tests database tests --- tests/test_suite_python.py | 47 +++++++++++++++++++++++++++++++++++++- 1 file changed, 46 insertions(+), 1 deletion(-) diff --git a/tests/test_suite_python.py b/tests/test_suite_python.py index 19651e9..8d1264f 100644 --- a/tests/test_suite_python.py +++ b/tests/test_suite_python.py @@ -4,6 +4,7 @@ import typer from typer.testing import CliRunner from deepbase.main import main +import sqlite3 # Creiamo un'app Typer temporanea per il testing test_app = typer.Typer() @@ -172,4 +173,48 @@ def test_error_handling_invalid_path(self): """Testa che il programma gestisca percorsi inesistenti.""" result = runner.invoke(test_app, ["/percorso/inesistente/assoluto"]) assert result.exit_code == 1 - assert "Target not found" in result.stdout \ No newline at end of file + assert "Target not found" in result.stdout + + def test_database_handling(self, tmp_path): + """Testa il supporto per database SQLite (schema extraction e light mode).""" + import sqlite3 # Import necessario qui o in cima al file + + # Creiamo una cartella e un DB reale + project_dir = tmp_path / "db_project" + project_dir.mkdir() + db_path = project_dir / "test_db.sqlite" + + conn = sqlite3.connect(db_path) + cursor = conn.cursor() + cursor.execute("CREATE TABLE users (id INTEGER PRIMARY KEY, username TEXT NOT NULL)") + cursor.execute("CREATE TABLE posts (id INTEGER PRIMARY KEY, user_id INTEGER, content TEXT)") + conn.commit() + conn.close() + + output_file = project_dir / "context.md" + + # 1. Test Full Mode (--all) -> Deve mostrare schema dettagliato + result = runner.invoke(test_app, [str(project_dir), "--all", "-o", str(output_file)]) + assert result.exit_code == 0 + content = output_file.read_text(encoding="utf-8") + + # Verifica che il DB sia stato processato + assert "test_db.sqlite" in content + + # Verifica il contenuto generato da generate_database_context_full + # Nota: "DATABASE SCHEMA" appare solo in single-file mode, qui cerchiamo il contenuto reale + assert "Table: `users`" in content + # Verifica parziale di una colonna per assicurarsi che lo schema sia stato letto + assert "username" in content + assert "TEXT" in content + + # 2. Test Light Mode (--light) -> Deve mostrare schema compatto (TOON) + result = runner.invoke(test_app, [str(project_dir), "--light", "-o", str(output_file)]) + assert result.exit_code == 0 + content = output_file.read_text(encoding="utf-8") + + # Verifica firma compatta (TOON) + # Cerca la definizione della tabella users e la colonna id + assert "users" in content + # Verifica formato TOON: nome:tipo + assert "id:INTEGER" in content \ No newline at end of file From 8a6674756cf794309c849b46853e252340ed485d Mon Sep 17 00:00:00 2001 From: Giuliano Ranauro Date: Fri, 13 Feb 2026 21:23:18 +0100 Subject: [PATCH 6/9] Refactoring modulare di Light Mode e struttura parser MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Sostituisce la logica monolitica di `toon.py` con un nuovo package `src/deepbase/parsers/` per migliorare la scalabilità e facilitare l'aggiunta di nuovi linguaggi. Modifiche principali: - **Architettura**: Introdotta interfaccia `LanguageParser` e un `Registry` centrale per gestire le estensioni. - **Supporto**: Implementati parser dedicati per Python, Markdown e LaTeX. - **Fallback**: Aggiunto un gestore generico che applica un warning per i file non supportati ufficialmente. - **Test**: Aggiornata e ampliata la suite di test per coprire i nuovi parser e i casi limite. --- llm_context.md | 3114 ++++++++++++++++++++++++++++- pyproject.toml | 2 +- src/deepbase/parsers.py | 85 - src/deepbase/parsers/__init__.py | 6 + src/deepbase/parsers/document.py | 58 + src/deepbase/parsers/fallback.py | 25 + src/deepbase/parsers/interface.py | 14 + src/deepbase/parsers/python.py | 205 ++ src/deepbase/parsers/registry.py | 55 + src/deepbase/toon.py | 397 +--- tests/test_suite_python.py | 286 ++- 11 files changed, 3641 insertions(+), 606 deletions(-) delete mode 100644 src/deepbase/parsers.py create mode 100644 src/deepbase/parsers/__init__.py create mode 100644 src/deepbase/parsers/document.py create mode 100644 src/deepbase/parsers/fallback.py create mode 100644 src/deepbase/parsers/interface.py create mode 100644 src/deepbase/parsers/python.py create mode 100644 src/deepbase/parsers/registry.py diff --git a/llm_context.md b/llm_context.md index 5a8e7cf..00dde88 100644 --- a/llm_context.md +++ b/llm_context.md @@ -1,41 +1,3121 @@ -# Project Context: test_toml_configuration0 +# Project Context: DeepBase ================================================================================ ### PROJECT STRUCTURE ### ================================================================================ -> Total Size: 0.19 KB | Est. Tokens: ~48 -📁 test_toml_configuration0/ -└── 📁 utils/ (12.0% | ~6t) +> Total Size: 93.03 KB | Est. Tokens: ~23,817 +📁 DeepBase/ +├── 📄 .gitignore (3.4% | ~805t) +├── 📄 CHANGELOG.md (1.1% | ~255t) +├── 📄 README.md (3.8% | ~908t) +├── 📁 docs/ (2.1% | ~500t) +│ ├── 📄 index.md (2.0% | ~487t) +│ └── 📄 reference.md (0.1% | ~13t) +├── 📁 examples/ (26.2% | ~6.2k t) +│ └── 📄 deepbase_context.md (26.2% | ~6.2k t) +├── 📄 mkdocs.yml (1.0% | ~227t) +├── 📄 pyproject.toml (1.5% | ~363t) +├── 📁 src/ (52.0% | ~12.4k t) +│ ├── 📁 deepbase/ (52.0% | ~12.4k t) +│ │ ├── 📄 __init__.py +│ │ ├── 📄 database.py (9.1% | ~2.2k t) +│ │ ├── 📄 main.py (24.4% | ~5.8k t) +│ │ ├── 📁 parsers/ (12.5% | ~3.0k t) +│ │ │ ├── 📄 __init__.py (0.2% | ~53t) +│ │ │ ├── 📄 document.py (2.0% | ~485t) +│ │ │ ├── 📄 fallback.py (0.9% | ~226t) +│ │ │ ├── 📄 interface.py (0.4% | ~96t) +│ │ │ ├── 📄 python.py (6.9% | ~1.6k t) +│ │ │ └── 📄 registry.py (2.0% | ~477t) +│ │ └── 📄 toon.py (6.1% | ~1.4k t) +│ └── 📁 deepbase.egg-info/ +└── 📁 tests/ (9.0% | ~2.1k t) + ├── 📁 database/ + └── 📄 test_suite_python.py (9.0% | ~2.1k t) ================================================================================ ### FILE CONTENTS ### ================================================================================ ---- START OF FILE: main.py --- +--- START OF FILE: .gitignore --- +# Byte-compiled / optimized / DLL files +__pycache__/ +*.py[cod] +*$py.class +# C extensions +*.so + +# Distribution / packaging +.Python +env/ +build/ +develop-eggs/ +dist/ +downloads/ +eggs/ +.eggs/ +lib/ +lib64/ +parts/ +sdist/ +var/ +wheels/ +share/python-wheels/ +*.egg-info/ +.installed.cfg +*.egg +MANIFEST + +# PyInstaller +# Usually these files are written by a python script from a template +# before PyInstaller builds the exe, so as to inject date/other infos into it. +*.manifest +*.spec + +# Installer logs +debug.log +pip-log.txt +pip-delete-this-directory.txt + +# Unit test / coverage reports +htmlcov/ +.tox/ +.nox/ +.coverage +.coverage.* +.cache +nosetests.xml +coverage.xml +*.cover +*.py,cover +.hypothesis/ +.pytest_cache/ + +# Translations +*.mo +*.pot + +# Django stuff: +*.log +local_settings.py +db.sqlite3 + +# Flask stuff: +instance/ +.webassets-cache + +# Scrapy stuff: +.scrapy + +# Sphinx documentation +docs/_build/ + +# PyBuilder +.target/ + +# Jupyter Notebook +.ipynb_checkpoints + +# IPython +profile_default/ +ipython_config.py + +# pyenv +.python-version + +# pipenv +# According to pypa/pipenv#598, it is recommended to include Pipfile.lock in version control. +# However, in case you generate it automatically, you may want to ignore it. +# Pipfile.lock + +# poetry +# According to python-poetry/poetry#519, it is recommended to include poetry.lock in version control. +# This is especially if you are building a library. +# poetry.lock + +# PEP 582; used by e.g. github.com/David-OConnor/pyflow +__pypackages__/ + +# Celery stuff +celerybeat-schedule + +# SageMath parsed files +*.sage.py + +# Environments +.env +.venv +env.bak +venv.bak + +# Spyder project settings +.spyderproject +.spyproject + +# Rope project settings +.ropeproject + +# mkdocs documentation +/site + +# mypy +.mypy_cache/ +.dmypy.json +dmypy.json + +# Pyre type checker +.pyre/ + +# pytype static type analyzer +.pytype/ + +# Cython debug symbols +cython_debug/ + +# PyCharm +# JetBrains specific template is maintained in a separate JetBrains.gitignore that can +# be found at https://github.com/github/gitignore/blob/main/Global/JetBrains.gitignore +# and can be added to the global gitignore or merged into this file. For a more nuclear +# option (not recommended) you can uncomment the following to ignore the entire idea folder. +#.idea/ + +# Abstra +# Abstra is an AI-powered process automation framework. +# Ignore directories containing user credentials, local state, and settings. +# Learn more at https://abstra.io/docs +.abstra/ + +# Visual Studio Code +# Visual Studio Code specific template is maintained in a separate VisualStudioCode.gitignore +# that can be found at https://github.com/github/gitignore/blob/main/Global/VisualStudioCode.gitignore +# and can be added to the global gitignore or merged into this file. However, if you prefer, +# you could uncomment the following to ignore the entire vscode folder +# .vscode/ + +# Ruff stuff: +.ruff_cache/ + +# PyPI configuration file +.pypirc + +# Cursor +# Cursor is an AI-powered code editor. `.cursorignore` specifies files/directories to +# exclude from AI features like autocomplete and code analysis. Recommended for sensitive data +# refer to https://docs.cursor.com/context/ignore-files +.cursorignore +.cursorindexingignore + +# Marimo +marimo/_static/ +marimo/_lsp/ +__marimo__/ + + +--- END OF FILE: .gitignore --- +---------------------------------------- + +--- START OF FILE: CHANGELOG.md --- + +## [1.7.0] - 2024-02-12 + +### Added +- **Smart Token Estimation**: Added approximate token count (~4 chars/token) and file size percentage next to every file and folder in the tree view. +- **Recursive Directory Stats**: Parent folders now show the cumulative size and token count of their contents. +- **Enhanced Tree Visualization**: Replaced simple indentation with proper ASCII tree branches (`├──`, `└──`, `│`) for better readability. +- **CLI Links**: Added links to Documentation, Repository, and Issues in the `--help` output. + +### Changed +- **React/JS Optimization**: Automatically ignores `package-lock.json`, `yarn.lock`, `pnpm-lock.yaml`, and mobile build folders (`ios/`, `android/`) to save tokens. +- **Self-Exclusion Logic**: DeepBase now strictly ignores any existing file named `llm_context.md` (or the specified output name) in the target directory to prevent data duplication. + +### Fixed +- Fixed an issue where previous context files were included in the analysis, doubling the token count. + +--- END OF FILE: CHANGELOG.md --- +---------------------------------------- + +--- START OF FILE: README.md --- + +# DeepBase + +**DeepBase** is a command-line tool that analyzes a project directory, extracts the folder structure and the content of all significant code files, and consolidates them into a single text/markdown file. + +This unified "context" is perfect for providing to a Large Language Model (LLM) to enable it to deeply understand the entire codebase. + +## Features + +- **Project Structure**: Generates a tree view of the folder and file structure. +- **Smart Filtering**: Automatically ignores common unnecessary directories (e.g., `.git`, `venv`, `node_modules`). +- **Token Optimization (TOON)**: Capable of generating "Semantic Skeletons" (class definitions, function signatures, docstrings) instead of full code to save up to 90% of tokens. +- **Hybrid Focus Mode**: Combine lightweight context for the whole project with full content only for specific files or folders. +- **Configurable**: Customize ignored directories and included extensions via a `.deepbase.toml` file. +- **Unified Output**: Combines everything into a single file, easy to copy and paste. +- **PyPI Ready**: Easy to install via `pip`. + +## Installation + +You can install DeepBase directly from PyPI: + +```sh +pip install deepbase +``` + +## How to Use + +Once installed, use the `deepbase` command followed by the target (directory or file). + +### 1. Basic Project Analysis + +**Structure Only (Default)** +Quickly generate a tree view of your project folders and files. No code content is included. + +```sh +deepbase . +``` + +**Include All Content** +To generate the full context including the code of all significant files, use the `--all` (or `-a`) flag. +*Warning: use this only for small projects.* + +```sh +deepbase . --all +``` + +### 2. Smart Token Optimization (TOON) + +For large projects, sending all code to an LLM is expensive and inefficient. **TOON (Token Oriented Object Notation)** extracts only the semantic "skeleton" of your code (classes, signatures, docstrings), ignoring implementations. + +```sh +deepbase . --toon +# or +deepbase . -t +``` +*Result: LLMs understand your architecture using minimal tokens.* + +### 3. Hybrid Mode (Focus) + +This is the power user feature. You can provide the TOON skeleton for the entire project (background context) while focusing on specific files (full content). + +**Focus via CLI:** +Use `-f` or `--focus` with glob patterns (e.g., `*auth*`, `src/utils/*`). + +```sh +deepbase . --toon --focus "server/controllers/*" --focus "client/src/login.js" +``` + +**Focus via File:** +Instead of typing patterns every time, create a text file (e.g., `context_task.txt`) with the list of files/folders you are working on. + +*content of `context_task.txt`:* +```text +server/routes/auth.js +server/models/User.js +client/src/components/LoginForm.jsx +``` + +Run deepbase loading the file: +```sh +deepbase . --toon --focus-file context_task.txt +``` + +### 4. Single File Analysis + +DeepBase supports analyzing a single specific file. + +**Structure Only (Default)** +Extracts only the outline/headers. Useful for large documentation files. + +```sh +deepbase README.md +``` + +**Structure + Content** +Appends the full content after the structure. + +```sh +deepbase README.md --all +``` + +### Configuration (.deepbase.toml) + +You can customize behavior by creating a `.deepbase.toml` file in your project root: + +```toml +ignore_dirs = ["my_assets", "experimental"] +significant_extensions = [".cfg", "Makefile", ".tsx"] +``` + +## Development Workflow + +If you want to contribute or test the tool locally: + +```sh +# Install in editable mode +pip install -e ".[dev]" + +# Run tests +pytest +``` + +## License + +This project is released under the GPL 3 license. See the `LICENSE` file for details. +``` + +--- END OF FILE: README.md --- +---------------------------------------- + +--- START OF FILE: mkdocs.yml --- + +site_name: DeepBase +site_description: A CLI tool to consolidate project context for LLMs. +site_url: https://follen99.github.io/deepbase/ # Aggiorna con il tuo username +repo_url: https://github.com/follen99/deepbase +repo_name: follen99/deepbase + +theme: + name: material + palette: + # Light mode + - scheme: default + primary: teal + accent: purple + toggle: + icon: material/brightness-7 + name: Switch to dark mode + # Dark mode + - scheme: slate + primary: teal + accent: lime + toggle: + icon: material/brightness-4 + name: Switch to light mode + features: + - content.code.copy + - navigation.expand + - navigation.top + - search.suggest + +plugins: + - search + - mkdocstrings: + handlers: + python: + paths: [src] # Dice al plugin dove trovare il codice sorgente + +nav: + - Home: index.md + - API Reference: reference.md + +--- END OF FILE: mkdocs.yml --- +---------------------------------------- + +--- START OF FILE: pyproject.toml --- + +# pyproject.toml + +[build-system] +requires = ["setuptools>=61.0"] +build-backend = "setuptools.build_meta" + +[project] +name = "deepbase" +# Increment the version to reflect changes +version = "1.8.0" +authors = [ + { name="Giuliano Ranauro", email="ranaurogln@email.com" }, +] +description = "A CLI utility to consolidate project context for LLMs." +readme = "README.md" +license = { file="LICENSE" } +requires-python = ">=3.8" +classifiers = [ + "Programming Language :: Python :: 3", + "License :: OSI Approved :: GNU General Public License v3 (GPLv3)", + "Operating System :: OS Independent", + "Topic :: Software Development", + "Topic :: Utilities", +] +keywords = ["llm", "context", "developer-tool", "ai", "code-analysis", "deepbase"] + +# Main project dependencies +dependencies = [ + "typer[all]", # For a modern and robust CLI + "rich", # For colored output and progress bars + "tomli", # To read .toml configuration files + "chardet" # To reliably detect file encoding +] + +[project.urls] +"Homepage" = "https://github.com/follen99/deepbase" +"Bug Tracker" = "https://github.com/follen99/deepbase/issues" + +# Update the script to point to the Typer app object +[project.scripts] +deepbase = "deepbase.main:app" + +# Optional dependencies for development (e.g., testing) +[project.optional-dependencies] +dev = [ + "pytest", +] + +docs = [ + "mkdocs>=1.5.0", + "mkdocs-material>=9.5.0", + "mkdocstrings[python]>=0.24.0", +] + +--- END OF FILE: pyproject.toml --- +---------------------------------------- + +--- START OF FILE: examples/deepbase_context.md --- + +# Project Context: DeepBase + +================================================================================ +### PROJECT STRUCTURE ### +================================================================================ + +Project Structure in: /home/follen/Documents/uni-git/DeepBase + +📂 ./ + 📄 .gitignore + 📄 README.md + 📄 pyproject.toml + 📂 src/ + 📂 deepbase/ + 📄 __init__.py + 📄 main.py + 📂 deepbase.egg-info/ + 📂 examples/ + 📄 deepbase_context.md + 📂 tests/ + 📄 test_main.py + + +================================================================================ +### FILE CONTENTS ### +================================================================================ + +--- START OF FILE: .gitignore --- + +# Byte-compiled / optimized / DLL files +__pycache__/ +*.py[cod] +*$py.class + +# C extensions +*.so + +# Distribution / packaging +.Python +env/ +build/ +develop-eggs/ +dist/ +downloads/ +eggs/ +.eggs/ +lib/ +lib64/ +parts/ +sdist/ +var/ +wheels/ +share/python-wheels/ +*.egg-info/ +.installed.cfg +*.egg +MANIFEST + +# PyInstaller +# Usually these files are written by a python script from a template +# before PyInstaller builds the exe, so as to inject date/other infos into it. +*.manifest +*.spec + +# Installer logs +debug.log +pip-log.txt +pip-delete-this-directory.txt + +# Unit test / coverage reports +htmlcov/ +.tox/ +.nox/ +.coverage +.coverage.* +.cache +nosetests.xml +coverage.xml +*.cover +*.py,cover +.hypothesis/ +.pytest_cache/ + +# Translations +*.mo +*.pot + +# Django stuff: +*.log +local_settings.py +db.sqlite3 + +# Flask stuff: +instance/ +.webassets-cache + +# Scrapy stuff: +.scrapy + +# Sphinx documentation +docs/_build/ + +# PyBuilder +.target/ + +# Jupyter Notebook +.ipynb_checkpoints + +# IPython +profile_default/ +ipython_config.py + +# pyenv +.python-version + +# pipenv +# According to pypa/pipenv#598, it is recommended to include Pipfile.lock in version control. +# However, in case you generate it automatically, you may want to ignore it. +# Pipfile.lock + +# poetry +# According to python-poetry/poetry#519, it is recommended to include poetry.lock in version control. +# This is especially if you are building a library. +# poetry.lock + +# PEP 582; used by e.g. github.com/David-OConnor/pyflow +__pypackages__/ + +# Celery stuff +celerybeat-schedule + +# SageMath parsed files +*.sage.py + +# Environments +.env +.venv +env.bak +venv.bak + +# Spyder project settings +.spyderproject +.spyproject + +# Rope project settings +.ropeproject + +# mkdocs documentation +/site + +# mypy +.mypy_cache/ +.dmypy.json +dmypy.json + +# Pyre type checker +.pyre/ + +# pytype static type analyzer +.pytype/ + +# Cython debug symbols +cython_debug/ + +# PyCharm +# JetBrains specific template is maintained in a separate JetBrains.gitignore that can +# be found at https://github.com/github/gitignore/blob/main/Global/JetBrains.gitignore +# and can be added to the global gitignore or merged into this file. For a more nuclear +# option (not recommended) you can uncomment the following to ignore the entire idea folder. +#.idea/ + +# Abstra +# Abstra is an AI-powered process automation framework. +# Ignore directories containing user credentials, local state, and settings. +# Learn more at https://abstra.io/docs +.abstra/ + +# Visual Studio Code +# Visual Studio Code specific template is maintained in a separate VisualStudioCode.gitignore +# that can be found at https://github.com/github/gitignore/blob/main/Global/VisualStudioCode.gitignore +# and can be added to the global gitignore or merged into this file. However, if you prefer, +# you could uncomment the following to ignore the entire vscode folder +# .vscode/ + +# Ruff stuff: +.ruff_cache/ + +# PyPI configuration file +.pypirc + +# Cursor +# Cursor is an AI-powered code editor. `.cursorignore` specifies files/directories to +# exclude from AI features like autocomplete and code analysis. Recommended for sensitive data +# refer to https://docs.cursor.com/context/ignore-files +.cursorignore +.cursorindexingignore + +# Marimo +marimo/_static/ +marimo/_lsp/ +__marimo__/ + + +--- END OF FILE: .gitignore --- + +---------------------------------------- + +--- START OF FILE: README.md --- + +# DeepBase + +**DeepBase** is a command-line tool that analyzes a project directory, extracts the folder structure and the content of all significant code files, and consolidates them into a single text/markdown file. + +This unified "context" is perfect for providing to a Large Language Model (LLM) to enable it to deeply understand the entire codebase. + +## Features + +- **Project Structure**: Generates a tree view of the folder and file structure. +- **Smart Filtering**: Automatically ignores common unnecessary directories (e.g., `.git`, `venv`, `node_modules`). +- **Configurable**: Customize ignored directories and included extensions via a `.deepbase.toml` file. +- **Extension Selection**: Includes only files with relevant code or configuration extensions. +- **Unified Output**: Combines everything into a single file, easy to copy and paste. +- **PyPI Ready**: Easy to install via `pip`. + +## Installation + +You can install DeepBase directly from PyPI: + +```sh +pip install deepbase + +``` + +## How to Use + +Once installed, you will have the `deepbase` command available in your terminal. + +**Basic Usage:** + +Navigate to your project folder (or a parent folder) and run: + +```sh +deepbase . +``` +*The dot `.` indicates the current directory.* + +This command will create a file called `llm_context.md` in the current directory. + +**Specify Directory and Output File:** + +```sh +deepbase /path/to/your/project -o project_context.txt +``` + +### Advanced Configuration + +You can customize DeepBase's behavior by creating a `.deepbase.toml` file in the root of the project you are analyzing. + +**Example `.deepbase.toml`:** +```toml +# Add more directories to ignore. +# These will be added to the default ones. +ignore_dirs = [ + "my_assets_folder", + "experimental" +] + +# Add more extensions or filenames to include. +significant_extensions = [ + ".cfg", + "Makefile" +] +``` + +## License + +This project is released under the GPL 3 license. See the `LICENSE` file for details. + +--- END OF FILE: README.md --- + +---------------------------------------- + +--- START OF FILE: pyproject.toml --- + +# pyproject.toml + +[build-system] +requires = ["setuptools>=61.0"] +build-backend = "setuptools.build_meta" + +[project] +name = "deepbase" +# Increment the version to reflect changes +version = "1.2.0" +authors = [ + { name="Your Name", email="your@email.com" }, +] +description = "A CLI utility to consolidate project context for LLMs." +readme = "README.md" +license = { file="LICENSE" } +requires-python = ">=3.8" +classifiers = [ + "Programming Language :: Python :: 3", + "License :: OSI Approved :: MIT License", + "Operating System :: OS Independent", + "Topic :: Software Development", + "Topic :: Utilities", +] +keywords = ["llm", "context", "developer-tool", "ai", "code-analysis", "deepbase"] + +# Main project dependencies +dependencies = [ + "typer[all]", # For a modern and robust CLI + "rich", # For colored output and progress bars + "tomli", # To read .toml configuration files + "chardet" # To reliably detect file encoding +] + +[project.urls] +"Homepage" = "https://github.com/follen99/deepbase" +"Bug Tracker" = "https://github.com/follen99/deepbase/issues" + +# Update the script to point to the Typer app object +[project.scripts] +deepbase = "deepbase.main:app" + +# Optional dependencies for development (e.g., testing) +[project.optional-dependencies] +dev = [ + "pytest", +] + +--- END OF FILE: pyproject.toml --- + +---------------------------------------- + +--- START OF FILE: src/deepbase/__init__.py --- + + + +--- END OF FILE: src/deepbase/__init__.py --- + +---------------------------------------- + +--- START OF FILE: src/deepbase/main.py --- + +# src/deepbase/main.py + +import os +import typer +from rich.console import Console +from rich.progress import Progress +import tomli +import chardet +from typing import List, Dict, Any, Set + +# --- DEFAULT CONFIGURATION --- + +DEFAULT_CONFIG = { + "ignore_dirs": { + "__pycache__", ".git", ".idea", ".vscode", "venv", ".venv", "env", + ".env", "node_modules", "build", "dist", "target", "out", "bin", + "obj", "logs", "tmp", "eggs", ".eggs", ".pytest_cache", ".tox", + "site", + }, + "significant_extensions": { + ".py", ".java", ".js", ".ts", ".html", ".css", ".scss", ".sql", + ".md", ".json", ".xml", ".yml", ".yaml", ".sh", ".bat", "Dockerfile", + ".dockerignore", ".gitignore", "requirements.txt", "pom.xml", "gradlew", + "pyproject.toml", "setup.py", + } +} + +# --- TOOL INITIALIZATION --- + +app = typer.Typer( + name="deepbase", + help="Analyzes a project directory and creates a unified context document for an LLM.", + add_completion=False +) +console = Console() + + +def load_config(root_dir: str) -> Dict[str, Any]: + """Loads configuration from .deepbase.toml or uses the default.""" + config_path = os.path.join(root_dir, ".deepbase.toml") + config = DEFAULT_CONFIG.copy() + + if os.path.exists(config_path): + console.print(f"[bold cyan]Found configuration file: '.deepbase.toml'[/bold cyan]") + try: + with open(config_path, "rb") as f: + user_config = tomli.load(f) + + # Merge user config with defaults + config["ignore_dirs"].update(user_config.get("ignore_dirs", [])) + config["significant_extensions"].update(user_config.get("significant_extensions", [])) + console.print("[green]Custom configuration loaded successfully.[/green]") + + except tomli.TOMLDecodeError as e: + console.print(f"[bold red]Error parsing .deepbase.toml:[/bold red] {e}") + console.print("[yellow]Using default configuration.[/yellow]") + + return config + + +def is_significant_file(file_path: str, significant_extensions: Set[str]) -> bool: + """Checks if a file is significant based on the provided extensions.""" + file_name = os.path.basename(file_path) + if file_name in significant_extensions: + return True + _, ext = os.path.splitext(file_name) + return ext in significant_extensions + + +def generate_directory_tree(root_dir: str, config: Dict[str, Any]) -> str: + """Generates a text representation of the folder structure.""" + tree_str = f"Project Structure in: {os.path.abspath(root_dir)}\n\n" + ignore_dirs = config["ignore_dirs"] + significant_exts = config["significant_extensions"] + + for dirpath, dirnames, filenames in os.walk(root_dir, topdown=True): + dirnames[:] = [d for d in dirnames if d not in ignore_dirs and not d.startswith('.')] + + level = dirpath.replace(root_dir, '').count(os.sep) + indent = ' ' * 4 * level + + tree_str += f"{indent}📂 {os.path.basename(dirpath) or os.path.basename(os.path.abspath(root_dir))}/\n" + + sub_indent = ' ' * 4 * (level + 1) + + for f in sorted(filenames): + if is_significant_file(os.path.join(dirpath, f), significant_exts): + tree_str += f"{sub_indent}📄 {f}\n" + + return tree_str + + +def get_all_significant_files(root_dir: str, config: Dict[str, Any]) -> List[str]: + """Gets a list of all significant files to be included.""" + significant_files = [] + ignore_dirs = config["ignore_dirs"] + significant_exts = config["significant_extensions"] + + for dirpath, dirnames, filenames in os.walk(root_dir, topdown=True): + dirnames[:] = [d for d in dirnames if d not in ignore_dirs and not d.startswith('.')] + + for filename in sorted(filenames): + file_path = os.path.join(dirpath, filename) + if is_significant_file(file_path, significant_exts): + significant_files.append(file_path) + + return significant_files + + +@app.command() +def create( + directory: str = typer.Argument(..., help="The root directory of the project to scan."), + output: str = typer.Option("llm_context.md", "--output", "-o", help="The output file that will contain the context."), + verbose: bool = typer.Option(False, "--verbose", "-v", help="Show detailed output, including ignored files.") +): + """ + Analyzes a project and creates a unified context file for an LLM. + """ + if not os.path.isdir(directory): + console.print(f"[bold red]Error:[/bold red] The specified directory does not exist: '{directory}'") + raise typer.Exit(code=1) + + console.print(f"[bold green]Starting scan of '{directory}'...[/bold green]") + + config = load_config(directory) + + try: + with open(output, "w", encoding="utf-8") as outfile: + # 1. Write the header + outfile.write(f"# Project Context: {os.path.basename(os.path.abspath(directory))}\n\n") + + # 2. Write the structure + outfile.write("="*80 + "\n### PROJECT STRUCTURE ###\n" + "="*80 + "\n\n") + directory_tree = generate_directory_tree(directory, config) + outfile.write(directory_tree) + outfile.write("\n\n") + + # 3. Write the file contents + outfile.write("="*80 + "\n### FILE CONTENTS ###\n" + "="*80 + "\n\n") + + significant_files = get_all_significant_files(directory, config) + + with Progress(console=console) as progress: + task = progress.add_task("[cyan]Analyzing files...", total=len(significant_files)) + + for file_path in significant_files: + relative_path = os.path.relpath(file_path, directory).replace('\\', '/') + progress.update(task, advance=1, description=f"[cyan]Analyzing: {relative_path}[/cyan]") + + outfile.write(f"--- START OF FILE: {relative_path} ---\n\n") + try: + with open(file_path, "rb") as fb: + raw_data = fb.read() + + # Detect encoding + detection = chardet.detect(raw_data) + encoding = detection['encoding'] if detection['encoding'] else 'utf-8' + + # Read and write content with robust error handling + content = raw_data.decode(encoding, errors="replace") + outfile.write(content) + + except Exception as e: + outfile.write(f"!!! Error while reading file: {e} !!!\n") + + outfile.write(f"\n\n--- END OF FILE: {relative_path} ---\n\n") + outfile.write("-" * 40 + "\n\n") + + console.print(f"\n[bold green]✓ SUCCESS[/bold green]: Context successfully created in file: [cyan]'{output}'[/cyan]") + + except IOError as e: + console.print(f"\n[bold red]Error writing to output file:[/bold red] {e}") + raise typer.Exit(code=1) + except Exception as e: + console.print(f"\n[bold red]An unexpected error occurred:[/bold red] {e}") + raise typer.Exit(code=1) + + +if __name__ == "__main__": + app() + +--- END OF FILE: src/deepbase/main.py --- + +---------------------------------------- + +--- START OF FILE: examples/deepbase_context.md --- + +# Project Context: DeepBase + +================================================================================ +### PROJECT STRUCTURE ### +================================================================================ + +Project Structure in: /home/follen/Documents/uni-git/DeepBase + +📂 ./ + 📄 .gitignore + 📄 README.md + 📄 pyproject.toml + 📂 src/ + 📂 deepbase/ + 📄 __init__.py + 📄 main.py + 📂 deepbase.egg-info/ + 📂 examples/ + 📄 deepbase_context.md + 📂 tests/ + 📄 test_main.py + + +================================================================================ +### FILE CONTENTS ### +================================================================================ + +--- START OF FILE: .gitignore --- + +# Byte-compiled / optimized / DLL files +__pycache__/ +*.py[cod] +*$py.class + +# C extensions +*.so + +# Distribution / packaging +.Python +env/ +build/ +develop-eggs/ +dist/ +downloads/ +eggs/ +.eggs/ +lib/ +lib64/ +parts/ +sdist/ +var/ +wheels/ +share/python-wheels/ +*.egg-info/ +.installed.cfg +*.egg +MANIFEST + +# PyInstaller +# Usually these files are written by a python script from a template +# before PyInstaller builds the exe, so as to inject date/other infos into it. +*.manifest +*.spec + +# Installer logs +debug.log +pip-log.txt +pip-delete-this-directory.txt + +# Unit test / coverage reports +htmlcov/ +.tox/ +.nox/ +.coverage +.coverage.* +.cache +nosetests.xml +coverage.xml +*.cover +*.py,cover +.hypothesis/ +.pytest_cache/ + +# Translations +*.mo +*.pot + +# Django stuff: +*.log +local_settings.py +db.sqlite3 + +# Flask stuff: +instance/ +.webassets-cache + +# Scrapy stuff: +.scrapy + +# Sphinx documentation +docs/_build/ + +# PyBuilder +.target/ + +# Jupyter Notebook +.ipynb_checkpoints + +# IPython +profile_default/ +ipython_config.py + +# pyenv +.python-version + +# pipenv +# According to pypa/pipenv#598, it is recommended to include Pipfile.lock in version control. +# However, in case you generate it automatically, you may want to ignore it. +# Pipfile.lock + +# poetry +# According to python-poetry/poetry#519, it is recommended to include poetry.lock in version control. +# This is especially if you are building a library. +# poetry.lock + +# PEP 582; used by e.g. github.com/David-OConnor/pyflow +__pypackages__/ + +# Celery stuff +celerybeat-schedule + +# SageMath parsed files +*.sage.py + +# Environments +.env +.venv +env.bak +venv.bak + +# Spyder project settings +.spyderproject +.spyproject + +# Rope project settings +.ropeproject + +# mkdocs documentation +/site + +# mypy +.mypy_cache/ +.dmypy.json +dmypy.json + +# Pyre type checker +.pyre/ + +# pytype static type analyzer +.pytype/ + +# Cython debug symbols +cython_debug/ + +# PyCharm +# JetBrains specific template is maintained in a separate JetBrains.gitignore that can +# be found at https://github.com/github/gitignore/blob/main/Global/JetBrains.gitignore +# and can be added to the global gitignore or merged into this file. For a more nuclear +# option (not recommended) you can uncomment the following to ignore the entire idea folder. +#.idea/ + +# Abstra +# Abstra is an AI-powered process automation framework. +# Ignore directories containing user credentials, local state, and settings. +# Learn more at https://abstra.io/docs +.abstra/ + +# Visual Studio Code +# Visual Studio Code specific template is maintained in a separate VisualStudioCode.gitignore +# that can be found at https://github.com/github/gitignore/blob/main/Global/VisualStudioCode.gitignore +# and can be added to the global gitignore or merged into this file. However, if you prefer, +# you could uncomment the following to ignore the entire vscode folder +# .vscode/ + +# Ruff stuff: +.ruff_cache/ + +# PyPI configuration file +.pypirc + +# Cursor +# Cursor is an AI-powered code editor. `.cursorignore` specifies files/directories to +# exclude from AI features like autocomplete and code analysis. Recommended for sensitive data +# refer to https://docs.cursor.com/context/ignore-files +.cursorignore +.cursorindexingignore + +# Marimo +marimo/_static/ +marimo/_lsp/ +__marimo__/ + + +--- END OF FILE: .gitignore --- + +---------------------------------------- + +--- START OF FILE: README.md --- + +# DeepBase + +**DeepBase** is a command-line tool that analyzes a project directory, extracts the folder structure and the content of all significant code files, and consolidates them into a single text/markdown file. + +This unified "context" is perfect for providing to a Large Language Model (LLM) to enable it to deeply understand the entire codebase. + +## Features + +- **Project Structure**: Generates a tree view of the folder and file structure. +- **Smart Filtering**: Automatically ignores common unnecessary directories (e.g., `.git`, `venv`, `node_modules`). +- **Configurable**: Customize ignored directories and included extensions via a `.deepbase.toml` file. +- **Extension Selection**: Includes only files with relevant code or configuration extensions. +- **Unified Output**: Combines everything into a single file, easy to copy and paste. +- **PyPI Ready**: Easy to install via `pip`. + +## Installation + +You can install DeepBase directly from PyPI: + +```sh +pip install deepbase + +``` + +## How to Use + +Once installed, you will have the `deepbase` command available in your terminal. + +**Basic Usage:** + +Navigate to your project folder (or a parent folder) and run: + +```sh +deepbase . +``` +*The dot `.` indicates the current directory.* + +This command will create a file called `llm_context.md` in the current directory. + +**Specify Directory and Output File:** + +```sh +deepbase /path/to/your/project -o project_context.txt +``` + +### Advanced Configuration + +You can customize DeepBase's behavior by creating a `.deepbase.toml` file in the root of the project you are analyzing. + +**Example `.deepbase.toml`:** +```toml +# Add more directories to ignore. +# These will be added to the default ones. +ignore_dirs = [ + "my_assets_folder", + "experimental" +] + +# Add more extensions or filenames to include. +significant_extensions = [ + ".cfg", + "Makefile" +] +``` + +## License + +This project is released under the GPL 3 license. See the `LICENSE` file for details. + +--- END OF FILE: README.md --- + +---------------------------------------- + +--- START OF FILE: pyproject.toml --- + +# pyproject.toml + +[build-system] +requires = ["setuptools>=61.0"] +build-backend = "setuptools.build_meta" + +[project] +name = "deepbase" +# Increment the version to reflect changes +version = "1.2.0" +authors = [ + { name="Your Name", email="your@email.com" }, +] +description = "A CLI utility to consolidate project context for LLMs." +readme = "README.md" +license = { file="LICENSE" } +requires-python = ">=3.8" +classifiers = [ + "Programming Language :: Python :: 3", + "License :: OSI Approved :: MIT License", + "Operating System :: OS Independent", + "Topic :: Software Development", + "Topic :: Utilities", +] +keywords = ["llm", "context", "developer-tool", "ai", "code-analysis", "deepbase"] + +# Main project dependencies +dependencies = [ + "typer[all]", # For a modern and robust CLI + "rich", # For colored output and progress bars + "tomli", # To read .toml configuration files + "chardet" # To reliably detect file encoding +] + +[project.urls] +"Homepage" = "https://github.com/follen99/deepbase" +"Bug Tracker" = "https://github.com/follen99/deepbase/issues" + +# Update the script to point to the Typer app object +[project.scripts] +deepbase = "deepbase.main:app" + +# Optional dependencies for development (e.g., testing) +[project.optional-dependencies] +dev = [ + "pytest", +] + +--- END OF FILE: pyproject.toml --- + +---------------------------------------- + +--- START OF FILE: src/deepbase/__init__.py --- + + + +--- END OF FILE: src/deepbase/__init__.py --- + +---------------------------------------- + +--- START OF FILE: src/deepbase/main.py --- + + + +--- END OF FILE: examples/deepbase_context.md --- + +---------------------------------------- + +--- START OF FILE: tests/test_main.py --- + +# tests/test_main.py + +import os +from typer.testing import CliRunner +from deepbase.main import app + +# Runner instance to execute Typer app commands +runner = CliRunner() + +def test_create_context_successfully(tmp_path): + """ + Tests the creation of a context file in a successful scenario. + """ + # 1. Create a mock project structure + project_dir = tmp_path / "my_test_project" + project_dir.mkdir() + (project_dir / "main.py").write_text("print('hello world')") + (project_dir / "README.md").write_text("# My Project") + + # Create a directory to ignore + ignored_dir = project_dir / "venv" + ignored_dir.mkdir() + (ignored_dir / "ignored_file.py").write_text("ignore me") + + output_file = tmp_path / "context.md" + + # 2. Execute the CLI command with arguments in the correct order + result = runner.invoke(app, [str(project_dir), "--output", str(output_file)]) + + # 3. Verify the results + assert result.exit_code == 0 + assert "SUCCESS" in result.stdout + assert output_file.exists() + + content = output_file.read_text() + + # Check that significant files are included + assert "--- START OF FILE: main.py ---" in content + assert "print('hello world')" in content + assert "--- START OF FILE: README.md ---" in content + + # Check that ignored directory and files are not present + assert "venv" not in content + assert "ignored_file.py" not in content + +def test_directory_not_found(): + """ + Tests the behavior when the input directory does not exist. + """ + result = runner.invoke(app, ["non_existent_dir"]) + assert result.exit_code == 1 + assert "directory does not exist" in result.stdout + +--- END OF FILE: tests/test_main.py --- + +---------------------------------------- + + + +--- END OF FILE: examples/deepbase_context.md --- +---------------------------------------- + +--- START OF FILE: tests/test_suite_python.py --- + +# tests/test_suite_python.py + +import os +import typer +from typer.testing import CliRunner +from deepbase.main import main +import sqlite3 + +# Creiamo un'app Typer temporanea per il testing +test_app = typer.Typer() +test_app.command()(main) + +runner = CliRunner() + +class TestPythonSuite: + """ + Test suite dedicata all'analisi di progetti Python con DeepBase. + FIX: Specifica sempre l'output path esplicito per evitare FileNotFoundError. + FIX: Controlla il contenuto del file generato, non lo stdout, per la struttura. + """ + + def create_dummy_python_project(self, root): + """Helper per popolare una directory con file Python finti.""" + # 1. File principale + main_py = root / "main.py" + main_py.write_text(""" +import os + +def hello_world(): + print("Hello content") + return True + +class MyClass: + def method_one(self): + # This is a comment inside + return 1 +""", encoding="utf-8") + + # 2. Modulo utils + utils_dir = root / "utils" + utils_dir.mkdir() + (utils_dir / "helper.py").write_text("def help_me():\n pass", encoding="utf-8") + + # 3. File da ignorare (segreto) + (root / "secrets.py").write_text("API_KEY = '123'", encoding="utf-8") + + # 4. Cartella da ignorare (es. cache) + cache_dir = root / ".mypy_cache" + cache_dir.mkdir() + (cache_dir / "data.json").write_text("{}", encoding="utf-8") + + def test_basic_structure(self, tmp_path): + """Testa che il comando base generi la struttura nel file.""" + self.create_dummy_python_project(tmp_path) + + output_file = tmp_path / "llm_context.md" + + # Passiamo esplicitamente l'output file nel tmp_path + result = runner.invoke(test_app, [str(tmp_path), "-o", str(output_file)]) + + assert result.exit_code == 0 + assert output_file.exists() + + content = output_file.read_text(encoding="utf-8") + + # Verifica presenza nell'albero (DENTRO IL FILE, non nello stdout) + assert "main.py" in content + assert "utils/" in content + + # Verifica che il CONTENUTO del codice NON ci sia + assert "def hello_world" not in content + assert "import os" not in content + + def test_flag_all_content(self, tmp_path): + """Testa --all: deve includere tutto il codice.""" + self.create_dummy_python_project(tmp_path) + + output_file = tmp_path / "llm_context.md" + result = runner.invoke(test_app, [str(tmp_path), "--all", "-o", str(output_file)]) + + assert result.exit_code == 0 + content = output_file.read_text(encoding="utf-8") + + # Deve contenere il corpo delle funzioni + assert "print(\"Hello content\")" in content + assert "class MyClass:" in content + + def test_flag_light_mode(self, tmp_path): + """Testa --light: deve includere firme ma NON il corpo.""" + self.create_dummy_python_project(tmp_path) + + output_file = tmp_path / "llm_context.md" + result = runner.invoke(test_app, [str(tmp_path), "--light", "-o", str(output_file)]) + + assert result.exit_code == 0 + content = output_file.read_text(encoding="utf-8") + + # Deve contenere la notice Light Mode + assert "[LIGHT MODE]" in content + + # Deve contenere le firme (via AST parsing) + # Nota: controlliamo stringhe parziali per evitare problemi di formattazione spazi + assert "def hello_world" in content + assert "class MyClass:" in content + + # NON deve contenere il corpo del codice + assert "print(\"Hello content\")" not in content + assert "return 1" not in content + + def test_focus_mode_hybrid(self, tmp_path): + """Testa --focus combined (ibrido).""" + self.create_dummy_python_project(tmp_path) + + output_file = tmp_path / "llm_context.md" + # Focus solo su main.py + result = runner.invoke(test_app, [str(tmp_path), "--focus", "main.py", "-o", str(output_file)]) + + assert result.exit_code == 0 + content = output_file.read_text(encoding="utf-8") + + # main.py deve essere FULL + assert "--- START OF FILE: main.py ---" in content + assert "print(\"Hello content\")" in content + + # utils/helper.py NON era in focus, quindi non dovrebbe esserci il contenuto + assert "--- START OF FILE: utils/helper.py ---" not in content + + def test_focus_with_light_background(self, tmp_path): + """Testa --light insieme a --focus.""" + self.create_dummy_python_project(tmp_path) + + output_file = tmp_path / "llm_context.md" + # Focus su main.py, ma background --light + result = runner.invoke(test_app, [str(tmp_path), "--light", "--focus", "main.py", "-o", str(output_file)]) + + content = output_file.read_text(encoding="utf-8") + + # main.py FULL + assert "print(\"Hello content\")" in content + + # utils/helper.py LIGHT (deve esserci la firma) + assert "def help_me" in content + + def test_toml_configuration(self, tmp_path): + """Testa che .deepbase.toml venga letto e rispettato.""" + self.create_dummy_python_project(tmp_path) + + # Crea configurazione per ignorare "secrets.py" + toml_file = tmp_path / ".deepbase.toml" + toml_file.write_text('ignore_files = ["secrets.py"]', encoding="utf-8") + + output_file = tmp_path / "llm_context.md" + result = runner.invoke(test_app, [str(tmp_path), "--all", "-o", str(output_file)]) + + assert result.exit_code == 0 + content = output_file.read_text(encoding="utf-8") + + # secrets.py NON deve apparire + assert "secrets.py" not in content + assert "API_KEY" not in content + + def test_custom_output_path(self, tmp_path): + """Testa l'opzione -o per il file di output.""" + self.create_dummy_python_project(tmp_path) + + custom_out = tmp_path / "custom_analysis.txt" + result = runner.invoke(test_app, [str(tmp_path), "-o", str(custom_out)]) + + assert result.exit_code == 0 + assert custom_out.exists() + + def test_error_handling_invalid_path(self): + """Testa che il programma gestisca percorsi inesistenti.""" + result = runner.invoke(test_app, ["/percorso/inesistente/assoluto"]) + assert result.exit_code == 1 + assert "Target not found" in result.stdout + + def test_database_handling(self, tmp_path): + """Testa il supporto per database SQLite (schema extraction e light mode).""" + import sqlite3 # Import necessario qui o in cima al file + + # Creiamo una cartella e un DB reale + project_dir = tmp_path / "db_project" + project_dir.mkdir() + db_path = project_dir / "test_db.sqlite" + + conn = sqlite3.connect(db_path) + cursor = conn.cursor() + cursor.execute("CREATE TABLE users (id INTEGER PRIMARY KEY, username TEXT NOT NULL)") + cursor.execute("CREATE TABLE posts (id INTEGER PRIMARY KEY, user_id INTEGER, content TEXT)") + conn.commit() + conn.close() + + output_file = project_dir / "context.md" + + # 1. Test Full Mode (--all) -> Deve mostrare schema dettagliato + result = runner.invoke(test_app, [str(project_dir), "--all", "-o", str(output_file)]) + assert result.exit_code == 0 + content = output_file.read_text(encoding="utf-8") + + # Verifica che il DB sia stato processato + assert "test_db.sqlite" in content + + # Verifica il contenuto generato da generate_database_context_full + # Nota: "DATABASE SCHEMA" appare solo in single-file mode, qui cerchiamo il contenuto reale + assert "Table: `users`" in content + # Verifica parziale di una colonna per assicurarsi che lo schema sia stato letto + assert "username" in content + assert "TEXT" in content + + # 2. Test Light Mode (--light) -> Deve mostrare schema compatto (TOON) + result = runner.invoke(test_app, [str(project_dir), "--light", "-o", str(output_file)]) + assert result.exit_code == 0 + content = output_file.read_text(encoding="utf-8") + + # Verifica firma compatta (TOON) + # Cerca la definizione della tabella users e la colonna id + assert "users" in content + # Verifica formato TOON: nome:tipo + assert "id:INTEGER" in content + +--- END OF FILE: tests/test_suite_python.py --- +---------------------------------------- + +--- START OF FILE: src/deepbase/__init__.py --- + + + +--- END OF FILE: src/deepbase/__init__.py --- +---------------------------------------- + +--- START OF FILE: src/deepbase/database.py --- + +# src/deepbase/database.py +""" +Database context extraction module for DeepBase. +Handles SQLite databases to provide structured context about schema and tables. +""" + +import sqlite3 import os +from typing import Dict, List, Any, Optional +from dataclasses import dataclass -def hello_world(): - print("Hello content") - return True -class MyClass: - def method_one(self): - # This is a comment inside - return 1 +@dataclass +class ColumnInfo: + name: str + data_type: str + nullable: bool + default: Optional[str] + primary_key: bool + + +@dataclass +class TableInfo: + name: str + columns: List[ColumnInfo] + foreign_keys: List[Dict[str, str]] + indexes: List[Dict[str, Any]] + row_count: int + + +@dataclass +class DatabaseSchema: + tables: List[TableInfo] + total_size_bytes: int + total_tables: int + total_rows: int + + +def get_database_schema(db_path: str) -> DatabaseSchema: + """ + Extract complete schema information from SQLite database. + """ + conn = sqlite3.connect(db_path) + cursor = conn.cursor() + + # Get database size + total_size = os.path.getsize(db_path) + + # Get all tables (excluding sqlite internal tables) + cursor.execute("SELECT name FROM sqlite_master WHERE type='table' AND name NOT LIKE 'sqlite_%'") + table_names = [row[0] for row in cursor.fetchall()] + + tables = [] + total_rows = 0 + + for table_name in table_names: + # Get row count + cursor.execute(f"SELECT COUNT(*) FROM `{table_name}`") + row_count = cursor.fetchone()[0] + total_rows += row_count + + # Get column info using PRAGMA + cursor.execute(f"PRAGMA table_info(`{table_name}`)") + columns = [] + for row in cursor.fetchall(): + col = ColumnInfo( + name=row[1], + data_type=row[2], + nullable=not row[3], # notnull column: 0=true, 1=false + default=row[4], + primary_key=bool(row[5]) + ) + columns.append(col) + + # Get foreign keys + cursor.execute(f"PRAGMA foreign_key_list(`{table_name}`)") + foreign_keys = [] + for row in cursor.fetchall(): + fk = { + "id": row[0], + "seq": row[1], + "table": row[2], + "from": row[3], + "to": row[4], + "on_update": row[5], + "on_delete": row[6] + } + foreign_keys.append(fk) + + # Get indexes + cursor.execute(f"PRAGMA index_list(`{table_name}`)") + indexes = [] + for row in cursor.fetchall(): + index_name = row[1] + cursor.execute(f"PRAGMA index_info(`{index_name}`)") + index_columns = [r[2] for r in cursor.fetchall()] + indexes.append({ + "name": index_name, + "unique": row[2], + "columns": index_columns + }) + + table_info = TableInfo( + name=table_name, + columns=columns, + foreign_keys=foreign_keys, + indexes=indexes, + row_count=row_count + ) + tables.append(table_info) + + conn.close() + + return DatabaseSchema( + tables=tables, + total_size_bytes=total_size, + total_tables=len(tables), + total_rows=total_rows + ) + + +def generate_database_context_full(schema: DatabaseSchema, db_name: str) -> str: + """ + Generate full detailed context for --all mode. + Includes complete schema, relationships, and sample data hints. + """ + lines = [ + f"# Database: {db_name}", + f"## Overview", + f"- Total Tables: {schema.total_tables}", + f"- Total Rows: {schema.total_rows:,}", + f"- File Size: {schema.total_size_bytes:,} bytes ({schema.total_size_bytes / 1024:.2f} KB)", + "", + "## Schema Details", + "" + ] + + for table in schema.tables: + lines.extend([ + f"### Table: `{table.name}`", + f"- Rows: {table.row_count:,}", + "" + ]) + + # Columns + lines.append("#### Columns:") + lines.append("| Column | Type | Nullable | Default | PK |") + lines.append("|--------|------|----------|---------|-----|") + for col in table.columns: + pk_mark = "✓" if col.primary_key else "" + null_mark = "✓" if col.nullable else "NOT NULL" + default_val = col.default if col.default else "-" + lines.append(f"| `{col.name}` | {col.data_type} | {null_mark} | {default_val} | {pk_mark} |") + lines.append("") + + # Foreign Keys + if table.foreign_keys: + lines.append("#### Foreign Keys:") + for fk in table.foreign_keys: + lines.append(f"- `{fk['from']}` → `{fk['table']}`.`{fk['to']}` (ON UPDATE: {fk['on_update']}, ON DELETE: {fk['on_delete']})") + lines.append("") + + # Indexes + if table.indexes: + lines.append("#### Indexes:") + for idx in table.indexes: + unique_str = "UNIQUE " if idx['unique'] else "" + lines.append(f"- {unique_str}`{idx['name']}` on ({', '.join(f'`{c}`' for c in idx['columns'])})") + lines.append("") + + lines.append("---") + lines.append("") + + return "\n".join(lines) + + +def generate_database_context_toon(schema: DatabaseSchema, db_name: str) -> str: + """ + Generate minimal TOON-style context (skeleton only). + Best for large databases where token efficiency matters. + """ + lines = [ + f"DB: {db_name}", + f"Tables: {schema.total_tables} | Rows: {schema.total_rows:,}", + "" + ] + + for table in schema.tables: + # Compact representation: Table(columns) [FKs] + col_defs = [] + for col in table.columns: + flags = [] + if col.primary_key: + flags.append("PK") + if not col.nullable: + flags.append("NN") + flag_str = f"[{','.join(flags)}]" if flags else "" + col_defs.append(f"{col.name}:{col.data_type}{flag_str}") + + fk_refs = [] + for fk in table.foreign_keys: + fk_refs.append(f"{fk['from']}→{fk['table']}.{fk['to']}") + + fk_str = f" | FK: {', '.join(fk_refs)}" if fk_refs else "" + lines.append(f"T: {table.name}({', '.join(col_defs)}){fk_str}") + + return "\n".join(lines) + + +def generate_database_context_hybrid(schema: DatabaseSchema, db_name: str, focused_tables: List[str]) -> str: + """ + Generate hybrid context: TOON for all, full detail for focused tables. + """ + lines = [ + f"# Database: {db_name}", + f"## Overview", + f"- Total Tables: {schema.total_tables}", + f"- Total Rows: {schema.total_rows:,}", + "", + "## Schema (TOON + Focus)", + "" + ] + + for table in schema.tables: + is_focused = table.name in focused_tables or any(f in table.name for f in focused_tables) + + if is_focused: + # Full detail for focused tables + lines.extend([ + f"### [FOCUSED] Table: `{table.name}` ⭐", + f"- Rows: {table.row_count:,}", + "" + ]) + + lines.append("#### Columns:") + lines.append("| Column | Type | Nullable | Default | PK |") + lines.append("|--------|------|----------|---------|-----|") + for col in table.columns: + pk_mark = "✓" if col.primary_key else "" + null_mark = "✓" if col.nullable else "NOT NULL" + default_val = col.default if col.default else "-" + lines.append(f"| `{col.name}` | {col.data_type} | {null_mark} | {default_val} | {pk_mark} |") + lines.append("") + + if table.foreign_keys: + lines.append("#### Foreign Keys:") + for fk in table.foreign_keys: + lines.append(f"- `{fk['from']}` → `{fk['table']}`.`{fk['to']}`") + lines.append("") + else: + # TOON style for non-focused + col_names = [f"{col.name}:{col.data_type}" + ("(PK)" if col.primary_key else "") + for col in table.columns] + lines.append(f"- `{table.name}`: {', '.join(col_names)}") + + return "\n".join(lines) + + +def is_sqlite_database(file_path: str) -> bool: + """ + Check if file is a valid SQLite database by reading magic bytes. + """ + try: + with open(file_path, 'rb') as f: + header = f.read(16) + return header[:16] == b'SQLite format 3\x00' + except: + return False + +--- END OF FILE: src/deepbase/database.py --- +---------------------------------------- + +--- START OF FILE: src/deepbase/main.py --- + +# src/deepbase/main.py + +import os +import typer +import fnmatch +import math +from rich.console import Console +from rich.progress import Progress +import tomli +import chardet +from importlib.metadata import version as get_package_version, PackageNotFoundError +from typing import List, Dict, Any, Set, Optional, Tuple + +from deepbase.toon import generate_toon_representation, generate_light_representation, generate_database_focused +from deepbase.parsers import get_document_structure +from deepbase.database import is_sqlite_database, get_database_schema, generate_database_context_full + +from rich.table import Table +from rich.panel import Panel + +# --- CONFIGURAZIONI --- + +DEFAULT_CONFIG = { + "ignore_dirs": { + "__pycache__", ".git", ".idea", ".vscode", "venv", ".venv", "env", + ".env", "node_modules", "build", "dist", "target", "out", "bin", + "obj", "logs", "tmp", "eggs", ".eggs", ".pytest_cache", ".tox", + "site", "*.egg-info", "coverage", ".next", ".nuxt", ".output", + "ios", "android" + }, + "ignore_files": { + "package-lock.json", "yarn.lock", "pnpm-lock.yaml", "bun.lockb", + "poetry.lock", "Pipfile.lock", "composer.lock", ".DS_Store", "Thumbs.db" + }, + "significant_extensions": { + ".py", ".java", ".js", ".jsx", ".ts", ".tsx", ".html", ".css", ".scss", ".sql", + ".md", ".json", ".xml", ".yml", ".yaml", ".sh", ".bat", "Dockerfile", + ".dockerignore", ".gitignore", "requirements.txt", "pom.xml", "gradlew", + "pyproject.toml", "setup.py", "package.json", "tsconfig.json", + ".tex", ".bib", ".sty", ".cls", + ".db", ".sqlite", ".sqlite3", ".db3" + } +} + +LIGHT_MODE_NOTICE = """> **[LIGHT MODE]** Questo file √® stato generato in modalit√† risparmio token: vengono incluse solo le firme dei metodi/funzioni e i commenti iniziali dei file. Il corpo del codice √® omesso. Se hai bisogno di approfondire un file, una classe o un metodo specifico, chiedi all'utente di fornire la porzione di codice completa. +""" + +console = Console() + +# --- UTILS --- + +def load_config(root_dir: str) -> Dict[str, Any]: + config_path = os.path.join(root_dir, ".deepbase.toml") + config = DEFAULT_CONFIG.copy() + config["ignore_dirs"] = set(config["ignore_dirs"]) + config["ignore_files"] = set(config["ignore_files"]) + config["significant_extensions"] = set(config["significant_extensions"]) + + if os.path.exists(config_path): + try: + with open(config_path, "rb") as f: + user_config = tomli.load(f) + config["ignore_dirs"].update(user_config.get("ignore_dirs", [])) + config["ignore_files"].update(user_config.get("ignore_files", [])) + config["significant_extensions"].update(user_config.get("significant_extensions", [])) + except tomli.TOMLDecodeError: + pass + return config + + +def estimate_tokens(size_bytes: int) -> str: + if size_bytes == 0: return "0t" + tokens = math.ceil(size_bytes / 4) + if tokens < 1000: + return f"~{tokens}t" + elif tokens < 1000000: + return f"~{tokens/1000:.1f}k t" + else: + return f"~{tokens/1000000:.1f}M t" + + +def estimate_tokens_for_content(text: str) -> int: + return math.ceil(len(text.encode("utf-8")) / 4) + +def calculate_light_tokens(file_path: str, content: str) -> int: + from deepbase.toon import generate_light_representation + light_repr = generate_light_representation(file_path, content) + return estimate_tokens_for_content(light_repr) + +def is_significant_file(file_path: str, config: Dict[str, Any], output_file_abs: str = None) -> bool: + file_name = os.path.basename(file_path) + + if output_file_abs and os.path.abspath(file_path) == output_file_abs: + return False + + if output_file_abs and file_name == os.path.basename(output_file_abs): + return False + + if file_name in config["ignore_files"]: + return False + + significant_extensions = config["significant_extensions"] + + if file_name in significant_extensions: + return True + + _, ext = os.path.splitext(file_path) + if ext in significant_extensions: + return True + + if is_sqlite_database(file_path): + return True + + return False + + +def calculate_project_stats(root_dir: str, config: Dict[str, Any], output_file_abs: str, light_mode: bool = False) -> int: + total_size = 0 + ignore_dirs = config["ignore_dirs"] + for dirpath, dirnames, filenames in os.walk(root_dir, topdown=True): + dirnames[:] = [d for d in dirnames if d not in ignore_dirs and not d.startswith('.')] + for f in filenames: + fpath = os.path.join(dirpath, f) + if is_significant_file(fpath, config, output_file_abs): + try: + if light_mode and not is_sqlite_database(fpath): + content = read_file_content(fpath) + light_repr = generate_light_representation(fpath, content) + total_size += len(light_repr.encode("utf-8")) + else: + total_size += os.path.getsize(fpath) + except OSError: + pass + return total_size + + +# --- ALBERO DELLE DIRECTORY --- + +def _generate_tree_recursive( + current_path: str, + prefix: str, + config: Dict[str, Any], + total_project_size: int, + output_file_abs: str, + light_mode: bool = False +) -> Tuple[str, int]: + output_str = "" + subtree_size = 0 + + try: + items = sorted(os.listdir(current_path)) + except PermissionError: + return "", 0 + + filtered_items = [] + for item in items: + full_path = os.path.join(current_path, item) + is_dir = os.path.isdir(full_path) + + if is_dir: + if item not in config["ignore_dirs"] and not item.startswith('.'): + filtered_items.append((item, True)) + else: + if is_significant_file(full_path, config, output_file_abs): + filtered_items.append((item, False)) + + for i, (name, is_dir) in enumerate(filtered_items): + is_last = (i == len(filtered_items) - 1) + full_path = os.path.join(current_path, name) + connector = "‚îî‚îÄ‚îÄ " if is_last else "‚îú‚îÄ‚îÄ " + + if is_dir: + extension = " " if is_last else "‚îÇ " + sub_tree_str, sub_dir_size = _generate_tree_recursive( + full_path, + prefix + extension, + config, + total_project_size, + output_file_abs + ) + + subtree_size += sub_dir_size + + folder_stats = "" + if total_project_size > 0 and sub_dir_size > 0: + percent = (sub_dir_size / total_project_size) * 100 + token_est = estimate_tokens(sub_dir_size) + folder_stats = f" ({percent:.1f}% | {token_est})" + + output_str += f"{prefix}{connector}üìÅ {name}/{folder_stats}\n" + output_str += sub_tree_str + + else: + icon = "üóÑÔ∏è " if is_sqlite_database(full_path) else "üìÑ " + try: + raw_size = os.path.getsize(full_path) + if light_mode and not is_sqlite_database(full_path): + content = read_file_content(full_path) + light_repr = generate_light_representation(full_path, content) + size = len(light_repr.encode("utf-8")) + else: + size = raw_size + subtree_size += size + + # [FIX] Ripristinate le righe mancanti per stampare il file nell'albero! + file_stats = "" + if total_project_size > 0 and size > 0: + percent = (size / total_project_size) * 100 + token_est = estimate_tokens(size) + file_stats = f" ({percent:.1f}% | {token_est})" + + output_str += f"{prefix}{connector}{icon}{name}{file_stats}\n" + + except OSError: + pass + + return output_str, subtree_size + + +def generate_directory_tree(root_dir: str, config: Dict[str, Any], output_file_abs: str, light_mode: bool = False) -> Tuple[str, int, int]: + abs_root = os.path.abspath(root_dir) + total_size = calculate_project_stats(root_dir, config, output_file_abs, light_mode) + tree_body, _ = _generate_tree_recursive(root_dir, "", config, total_size, output_file_abs, light_mode) + header = f"üìÅ {os.path.basename(abs_root) or '.'}/\n" + total_tokens_est = math.ceil(total_size / 4) + return header + tree_body, total_size, total_tokens_est + + +# --- CORE --- + +def get_all_significant_files(root_dir: str, config: Dict[str, Any], output_file_abs: str) -> List[str]: + significant_files = [] + ignore_dirs = config["ignore_dirs"] + for dirpath, dirnames, filenames in os.walk(root_dir, topdown=True): + dirnames[:] = [d for d in dirnames if d not in ignore_dirs and not d.startswith('.')] + for filename in sorted(filenames): + file_path = os.path.join(dirpath, filename) + if is_significant_file(file_path, config, output_file_abs): + significant_files.append(file_path) + return significant_files + + +def read_file_content(file_path: str) -> str: + if is_sqlite_database(file_path): + try: + schema = get_database_schema(file_path) + return generate_database_context_full(schema, os.path.basename(file_path)) + except Exception as e: + return f"!!! Error reading database: {e} !!!" + try: + with open(file_path, "rb") as fb: + raw_data = fb.read() + detection = chardet.detect(raw_data) + encoding = detection['encoding'] if detection['encoding'] else 'utf-8' + return raw_data.decode(encoding, errors="replace") + except Exception as e: + return f"!!! Error reading file: {e} !!!" + + +def matches_focus(file_path: str, root_dir: str, focus_patterns: List[str]) -> bool: + if not focus_patterns: return False + rel_path = os.path.relpath(file_path, root_dir) + rel_path_fwd = rel_path.replace(os.sep, '/') + for pattern in focus_patterns: + clean_pattern = pattern.replace(os.sep, '/') + if fnmatch.fnmatch(rel_path_fwd, clean_pattern): return True + if clean_pattern in rel_path_fwd: return True + return False + + +def extract_focused_tables(file_path: str, focus_patterns: List[str]) -> List[str]: + if not is_sqlite_database(file_path): return [] + db_name = os.path.basename(file_path) + focused_tables = [] + for pattern in focus_patterns: + if '/' in pattern: + db_pattern, table_name = pattern.split('/', 1) + if fnmatch.fnmatch(db_name, db_pattern): focused_tables.append(table_name) + return focused_tables + + +def load_focus_patterns_from_file(file_path: str) -> List[str]: + patterns = [] + if os.path.exists(file_path): + try: + with open(file_path, 'r', encoding='utf-8') as f: + lines = f.readlines() + for line in lines: + line = line.strip() + if line and not line.startswith("#"): patterns.append(line) + except Exception as e: + console.print(f"[bold yellow]Warning:[/bold yellow] Could not read focus file '{file_path}': {e}") + else: + console.print(f"[bold yellow]Warning:[/bold yellow] Focus file '{file_path}' not found.") + return patterns + + +def version_callback(value: bool): + if value: + try: + v = get_package_version("deepbase") + console.print(f"DeepBase version: [bold cyan]{v}[/bold cyan]") + except PackageNotFoundError: + console.print("DeepBase version: [yellow]unknown (editable/dev mode)[/yellow]") + raise typer.Exit() + + +# --- LOGICA PRINCIPALE (SENZA CLASSE TYPER) --- + +def main( + target: str = typer.Argument(None, help="The file or directory to scan."), + help: bool = typer.Option(False, "--help", "-h", is_eager=True, help="Show this help message and exit."), + version: Optional[bool] = typer.Option(None, "--version", "-v", callback=version_callback, is_eager=True, help="Show version and exit."), + output: str = typer.Option("llm_context.md", "--output", "-o", help="The output file."), + verbose: bool = typer.Option(False, "--verbose", "-V", help="Show detailed output."), + include_all: bool = typer.Option(False, "--all", "-a", help="Include full content of ALL files."), + light_mode: bool = typer.Option(False, "--light", "-l", help="Token-saving mode (signatures only)."), + focus: Optional[List[str]] = typer.Option(None, "--focus", "-f", help="Pattern to focus on (repeatable)."), + focus_file: Optional[str] = typer.Option(None, "--focus-file", "-ff", help="Path to focus patterns file.") +): + """ + Analyzes a directory OR a single file. + Default: structure tree only. + """ + # 1. Custom HELP Logic + if help or target is None: + console.print(Panel.fit( + "[bold cyan]DeepBase[/bold cyan] ‚Äî Consolidate project context for LLMs\n\n" + "[bold]Usage:[/bold] [green]deepbase[/green] [OPTIONS] [TARGET]\n\n" + "[bold]Arguments:[/bold]\n" + " [cyan]TARGET[/cyan] The file or directory to scan [dim][default: current dir][/dim]\n", + title="DeepBase v1.7.0", border_style="cyan" + )) + + # Options Table + options_table = Table(show_header=False, box=None, padding=(0, 2)) + options_table.add_column(style="cyan", no_wrap=True) + options_table.add_column(style="green", no_wrap=True) + options_table.add_column() + + options = [ + ("-v, --version", "", "Show version and exit"), + ("-o, --output", "TEXT", "Output file [dim][default: llm_context.md][/dim]"), + ("-V, --verbose", "", "Show detailed output"), + ("-a, --all", "", "Include full content of ALL files"), + ("-l, --light", "", "Token-saving mode (signatures only)"), + ("-f, --focus", "TEXT", "Pattern to focus on (repeatable)"), + ("-ff, --focus-file", "TEXT", "Path to focus patterns file"), + ("-h, --help", "", "Show this message and exit"), + ] + for opt, meta, desc in options: + options_table.add_row(opt, meta, desc) + + console.print(Panel(options_table, title="Options", border_style="green", title_align="left")) + + config_content = """Create a [cyan].deepbase.toml[/cyan] in your project root: + +[dim]# Ignore additional directories[/dim] +[yellow]ignore_dirs = ["my_assets", "experimental"][/yellow] + +[dim]# Ignore specific files[/dim] +[yellow]ignore_files = ["*.log", "secrets.env"][/yellow] + +[dim]# Add extra file extensions[/dim] +[yellow]significant_extensions = [".cfg", "Makefile", ".tsx"][/yellow]""" + + console.print(Panel( + config_content, + title="Configuration (.deepbase.toml)", + border_style="yellow", + title_align="left" + )) + + links_table = Table(show_header=False, box=None, padding=(0, 2)) + links_table.add_column(style="bold") + links_table.add_column(style="blue") + + links_table.add_row("Documentation:", "https://follen99.github.io/DeepBase/") + links_table.add_row("Repository:", "https://github.com/follen99/DeepBase") + links_table.add_row("Issues:", "https://github.com/follen99/DeepBase/issues") + links_table.add_row("PyPI:", "https://pypi.org/project/deepbase/") + + console.print(Panel(links_table, title="Links", border_style="blue", title_align="left")) + + raise typer.Exit() + + # 2. Main Logic Start + if not os.path.exists(target): + console.print(f"[bold red]Error:[/bold red] Target not found: '{target}'") + raise typer.Exit(code=1) + + abs_output_path = os.path.abspath(output) + + active_focus_patterns = [] + if focus: active_focus_patterns.extend(focus) + if focus_file: + file_patterns = load_focus_patterns_from_file(focus_file) + if file_patterns: active_focus_patterns.extend(file_patterns) + active_focus_patterns = list(set(active_focus_patterns)) + + mode_label = "" + if light_mode: + mode_label = " [bold yellow](LIGHT ‚Äî signatures only)[/bold yellow]" + elif include_all: + mode_label = " [bold cyan](ALL ‚Äî full content)[/bold cyan]" + + console.print(f"[bold green]Analyzing '{target}'...[/bold green]{mode_label}") + + if light_mode: + def fmt_header(title): return f"### {title}\n\n" + def fmt_file_start(path, icon=""): return f"> FILE: {icon}{path}\n" + def fmt_file_end(path): return "\n" + def fmt_separator(): return "" + else: + def fmt_header(title): return f"{'='*80}\n### {title} ###\n{'='*80}\n\n" + def fmt_file_start(path, icon=""): return f"--- START OF FILE: {icon}{path} ---\n\n" + def fmt_file_end(path): return f"\n\n--- END OF FILE: {path} ---\n" + def fmt_separator(): return "-" * 40 + "\n\n" + + try: + with open(output, "w", encoding="utf-8") as outfile: + # CASO 1: Singolo file + if os.path.isfile(target): + filename = os.path.basename(target) + is_db = is_sqlite_database(target) + outfile.write(f"# Analysis: {filename}\n\n") + if light_mode: + outfile.write(LIGHT_MODE_NOTICE + "\n") + + if is_db: + schema = get_database_schema(target) + focused_tables = extract_focused_tables(target, active_focus_patterns) + is_focused = bool(focused_tables) or (active_focus_patterns and any( + fnmatch.fnmatch(filename, p) or p in filename for p in active_focus_patterns + )) + outfile.write(fmt_header("DATABASE SCHEMA")) + if light_mode and not is_focused: + outfile.write(generate_light_representation(target, "")) + elif focused_tables: + outfile.write(generate_database_focused(target, focused_tables)) + else: + outfile.write(generate_database_context_full(schema, filename)) + else: + content = read_file_content(target) + structure = get_document_structure(target, content) + outfile.write(fmt_header("STRUCTURE")) + outfile.write(structure or "N/A") + outfile.write("\n\n") + outfile.write(fmt_header("CONTENT")) + outfile.write(fmt_file_start(filename)) + if light_mode: + outfile.write(generate_light_representation(target, content)) + else: + outfile.write(content) + outfile.write(fmt_file_end(filename)) + + # CASO 2: Directory + elif os.path.isdir(target): + config = load_config(target) + outfile.write(f"# Project Context: {os.path.basename(os.path.abspath(target))}\n\n") + if light_mode: + outfile.write(LIGHT_MODE_NOTICE + "\n") + outfile.write(fmt_header("PROJECT STRUCTURE")) + + tree_str, total_bytes, total_tokens = generate_directory_tree(target, config, abs_output_path, light_mode=light_mode) + + if light_mode: + outfile.write(f"> Total Size (raw): {total_bytes/1024:.2f} KB | Est. Tokens (light): ~{total_tokens:,}\n") + else: + outfile.write(f"> Total Size: {total_bytes/1024:.2f} KB | Est. Tokens: ~{total_tokens:,}\n") + + outfile.write(tree_str) + outfile.write("\n\n") + + if include_all or light_mode or active_focus_patterns: + section_title = "FILE CONTENTS" + if light_mode: section_title += " (LIGHT ‚Äî signatures only)" + outfile.write(fmt_header(section_title)) + files = get_all_significant_files(target, config, abs_output_path) + + with Progress(console=console) as progress: + task = progress.add_task("[cyan]Processing...", total=len(files)) + for fpath in files: + rel_path = os.path.relpath(fpath, target).replace('\\', '/') + is_db = is_sqlite_database(fpath) + is_in_focus = active_focus_patterns and matches_focus(fpath, target, active_focus_patterns) + focused_tables = [] + if is_db: + focused_tables = extract_focused_tables(fpath, active_focus_patterns) + if focused_tables: is_in_focus = True + + should_write_full = include_all or is_in_focus + should_write_light = light_mode and not should_write_full + + if not should_write_full and not should_write_light: + progress.update(task, advance=1) + continue + + progress.update(task, advance=1, description=f"[cyan]{rel_path}[/cyan]") + marker = " [FOCUSED]" if (is_in_focus and light_mode) else "" + icon = "üóÑÔ∏è " if is_db else "" + outfile.write(fmt_file_start(rel_path + marker, icon)) + + if is_db: + if should_write_full: + if focused_tables: + outfile.write(generate_database_focused(fpath, focused_tables)) + else: + schema = get_database_schema(fpath) + outfile.write(generate_database_context_full(schema, os.path.basename(fpath))) + else: + outfile.write(generate_light_representation(fpath, "")) + else: + content = read_file_content(fpath) + if should_write_full: + outfile.write(content) + elif should_write_light: + light_output = generate_light_representation(fpath, content) + outfile.write(light_output) + + outfile.write(fmt_file_end(rel_path)) + outfile.write(fmt_separator()) + else: + console.print("[dim]Directory tree generated. Use --light, --all, or --focus for content.[/dim]") + + console.print(f"\n[bold green]‚úî SUCCESS[/bold green]: Context created in [cyan]'{output}'[/cyan]") + + except Exception as e: + console.print(f"\n[bold red]Error:[/bold red] {e}") + raise typer.Exit(code=1) + +# Entry point che usa typer.run per gestire il comando come SINGOLO +def app(): + typer.run(main) + +if __name__ == "__main__": + app() +--- END OF FILE: src/deepbase/main.py --- +---------------------------------------- + +--- START OF FILE: src/deepbase/toon.py --- + +# src/deepbase/toon.py + +import os +import ast +import json +import re + +# Import database handling +from deepbase.database import ( + get_database_schema, + generate_database_context_toon, + generate_database_context_hybrid, + is_sqlite_database +) + +# Import new parser registry +from deepbase.parsers.registry import registry + +# Manteniamo ToonVisitor originale per la retrocompatibilità (se usato altrove) +# o per la funzione generate_toon_representation "standard" (non light). +class ToonVisitor(ast.NodeVisitor): + def __init__(self): + self.output = [] + self.indent_level = 0 + + def _log(self, text): + indent = " " * self.indent_level + self.output.append(f"{indent}{text}") + + def visit_ClassDef(self, node): + bases = [b.id for b in node.bases if isinstance(b, ast.Name)] + base_str = f"({', '.join(bases)})" if bases else "" + self._log(f"C: {node.name}{base_str}") + + self.indent_level += 1 + docstring = ast.get_docstring(node) + if docstring: + short_doc = docstring.split('\n')[0].strip() + self._log(f"\"\"\"{short_doc}...\"\"\"") + + self.generic_visit(node) + self.indent_level -= 1 + + def visit_FunctionDef(self, node): + self._handle_function(node) + + def visit_AsyncFunctionDef(self, node): + self._handle_function(node, is_async=True) + + def _handle_function(self, node, is_async=False): + args = [arg.arg for arg in node.args.args] + args_str = ", ".join(args) + prefix = "async " if is_async else "" + self._log(f"{prefix}F: {node.name}({args_str})") + + docstring = ast.get_docstring(node) + if docstring: + self.indent_level += 1 + short_doc = docstring.split('\n')[0].strip() + self._log(f"\"\"\"{short_doc}...\"\"\"") + self.indent_level -= 1 + + def generic_visit(self, node): + for child in ast.iter_child_nodes(node): + if isinstance(child, (ast.ClassDef, ast.FunctionDef, ast.AsyncFunctionDef)): + self.visit(child) + +# --- Helper Legacy per TOON non-light (struttura scheletrica) --- +# (Qui potresti voler spostare anche questi nei parser in futuro, +# ma per ora ci concentriamo sulla modalità --light) + +def _handle_markdown(content: str) -> str: + lines = [l.strip() for l in content.splitlines() if l.strip().startswith("#")] + return "\n".join(lines) or "(Markdown file with no headers)" + +def _handle_database_toon(file_path: str) -> str: + if is_sqlite_database(file_path): + try: + schema = get_database_schema(file_path) + return generate_database_context_toon(schema, os.path.basename(file_path)) + except Exception as e: + return f"(DB Error: {e})" + return "(Not a valid SQLite database)" + +# --------------------------------------------------------------------------- +# Funzione pubblica principale — LIGHT (solo firme) +# --------------------------------------------------------------------------- + +def generate_light_representation(file_path: str, content: str) -> str: + """ + Genera una rappresentazione LIGHT usando il nuovo sistema di plugin/parser. + """ + # 1. Gestione Database (caso speciale, non basato su contenuto testo) + if is_sqlite_database(file_path): + return _handle_database_toon(file_path) + + # 2. Usa il registro per trovare il parser corretto + return registry.parse_file(file_path, content) + +def get_light_mode_warnings() -> str: + """ + Restituisce i warning accumulati durante l'esecuzione (es. linguaggi non supportati). + Da chiamare in main.py se si vuole stampare un header. + """ + return registry.get_unsupported_warning() + +# --------------------------------------------------------------------------- +# Funzione pubblica principale — TOON (skeleton legacy) +# --------------------------------------------------------------------------- + +def generate_toon_representation(file_path: str, content: str) -> str: + """ + Genera una rappresentazione TOON (Token Oriented - Skeleton) + Mantiene la logica originale per ora, o delega a Light se preferisci unificare. + """ + _, ext = os.path.splitext(file_path) + ext = ext.lower() + + if is_sqlite_database(file_path): + return _handle_database_toon(file_path) + + if ext == ".py": + try: + tree = ast.parse(content) + visitor = ToonVisitor() + visitor.visit(tree) + return "\n".join(visitor.output) + except SyntaxError: + return f"(Syntax Error parsing {os.path.basename(file_path)})" + + elif ext in [".md", ".markdown"]: + return _handle_markdown(content) + + # Per semplicità, per ora il Toon standard per altri file + # può usare il fallback del nuovo sistema o la vecchia logica. + # Usiamo il fallback del registry per coerenza: + return registry.parse_file(file_path, content) + +# --------------------------------------------------------------------------- +# Helper per database in focus mode +# --------------------------------------------------------------------------- + +def generate_database_focused(file_path: str, focused_tables: list = None) -> str: + from deepbase.database import generate_database_context_full, generate_database_context_hybrid + if not is_sqlite_database(file_path): + return "(Not a valid SQLite database)" + try: + schema = get_database_schema(file_path) + db_name = os.path.basename(file_path) + if focused_tables: + return generate_database_context_hybrid(schema, db_name, focused_tables) + else: + return generate_database_context_full(schema, db_name) + except Exception as e: + return f"(Error processing database: {e})" + +--- END OF FILE: src/deepbase/toon.py --- +---------------------------------------- + +--- START OF FILE: src/deepbase/parsers/__init__.py --- + +# src/deepbase/parsers/__init__.py +from .document import get_document_structure +from .registry import registry + +# Espone anche le classi se necessario in futuro +__all__ = ['get_document_structure', 'registry'] + +--- END OF FILE: src/deepbase/parsers/__init__.py --- +---------------------------------------- + +--- START OF FILE: src/deepbase/parsers/document.py --- + +# src/deepbase/parsers/document.py +import re +import os +from .interface import LanguageParser + +class MarkdownParser(LanguageParser): + def parse(self, content: str, file_path: str) -> str: + lines = [] + for line in content.splitlines(): + if line.strip().startswith("#"): + lines.append(line.strip()) + if not lines: + return "(Markdown file with no headers)" + return "\n".join(lines) + +class LatexParser(LanguageParser): + def parse(self, content: str, file_path: str) -> str: + keep_patterns = [ + r'^\s*\\documentclass', + r'^\s*\\usepackage', + r'^\s*\\input', + r'^\s*\\include', + r'^\s*\\(part|chapter|section|subsection|subsubsection)', + r'^\s*\\begin', + r'^\s*\\end', + r'^\s*\\title', + r'^\s*\\author', + r'^\s*\\date' + ] + combined_pattern = re.compile('|'.join(keep_patterns)) + lines = [] + for line in content.splitlines(): + # Rimuovi commenti inline parziali se necessario, qui semplifichiamo + line_clean = line.split('%')[0].rstrip() + if combined_pattern.match(line_clean): + lines.append(line_clean) + if not lines: + return "(LaTeX content empty or purely textual)" + return "\n".join(lines) + +# Istanziamo i parser per uso interno +_md_parser = MarkdownParser() +_tex_parser = LatexParser() + +def get_document_structure(file_path: str, content: str): + """ + Funzione di compatibilità per main.py. + Restituisce la struttura se è un documento supportato, altrimenti None. + """ + _, ext = os.path.splitext(file_path) + ext = ext.lower() + + if ext in ['.md', '.markdown']: + return _md_parser.parse(content, file_path) + elif ext in ['.tex', '.sty', '.cls']: + return _tex_parser.parse(content, file_path) + + return None + +--- END OF FILE: src/deepbase/parsers/document.py --- +---------------------------------------- + +--- START OF FILE: src/deepbase/parsers/fallback.py --- + +# src/deepbase/parsers/fallback.py +from .interface import LanguageParser + +class FallbackParser(LanguageParser): + """ + Parser generico per file non supportati specificamente. + Tenta di restituire una versione minimizzata o troncata. + """ + def parse(self, content: str, file_path: str) -> str: + lines = [] + # Rimuove righe vuote e commenti base + for line in content.splitlines(): + clean = line.strip() + if clean and not clean.startswith("#"): + lines.append(clean) + + if not lines: + return "(Empty or comments-only file)" + + # Se il file è molto lungo, troncalo per il fallback + if len(lines) > 20: + preview = "\n".join(lines[:20]) + return f"{preview}\n... ({len(lines)-20} more lines hidden - Light Mode Fallback)" + + return "\n".join(lines) + +--- END OF FILE: src/deepbase/parsers/fallback.py --- +---------------------------------------- + +--- START OF FILE: src/deepbase/parsers/interface.py --- + +# src/deepbase/parsers/interface.py +from abc import ABC, abstractmethod + +class LanguageParser(ABC): + """ + Interfaccia base per i parser di linguaggio. + """ + + @abstractmethod + def parse(self, content: str, file_path: str) -> str: + """ + Parsa il contenuto del file e restituisce una rappresentazione 'light' (firme, struttura). + """ + pass + +--- END OF FILE: src/deepbase/parsers/interface.py --- +---------------------------------------- + +--- START OF FILE: src/deepbase/parsers/python.py --- + +# src/deepbase/parsers/python.py +import ast +import os +from .interface import LanguageParser + +def _extract_module_comments(source: str) -> str: + """ + Estrae i commenti # e la docstring di modulo dalle prime righe del sorgente. + """ + lines = [] + in_docstring = False + docstring_char = None + source_lines = source.splitlines() + + for line in source_lines: + stripped = line.strip() + + # Riga vuota: la includiamo solo se siamo già dentro i commenti iniziali + if not stripped: + if lines: + lines.append("") + continue + + # Commenti # semplici + if stripped.startswith("#") and not in_docstring: + lines.append(line.rstrip()) + continue + + # Inizio docstring di modulo (""" o ''') + if not in_docstring and (stripped.startswith('"""') or stripped.startswith("'''")): + docstring_char = stripped[:3] + in_docstring = True + lines.append(line.rstrip()) + # Docstring su singola riga + rest = stripped[3:] + if rest.endswith(docstring_char) and len(rest) >= 3: + in_docstring = False + continue + + if in_docstring: + lines.append(line.rstrip()) + if stripped.endswith(docstring_char): + in_docstring = False + continue ---- END OF FILE: main.py --- + # Qualsiasi altra cosa: fine dell'header + break + + # Rimuovi trailing blank lines + while lines and not lines[-1].strip(): + lines.pop() + + return "\n".join(lines) + + +class LightVisitor(ast.NodeVisitor): + """ + Visita l'AST e produce le firme dei metodi/funzioni Python. + """ + + def __init__(self): + self.output = [] + self.indent_level = 0 + + def _log(self, text): + indent = " " * self.indent_level + self.output.append(f"{indent}{text}") + + def visit_ClassDef(self, node): + self._log(f"class {node.name}:") + self.indent_level += 1 + + # Docstring della classe (prima riga) + docstring = ast.get_docstring(node) + if docstring: + first_line = docstring.split('\n')[0].strip() + self._log(f'"""{first_line}"""') + + self.generic_visit(node) + self.indent_level -= 1 + + def visit_FunctionDef(self, node): + self._emit_signature(node, is_async=False) + + def visit_AsyncFunctionDef(self, node): + self._emit_signature(node, is_async=True) + + def _emit_signature(self, node, is_async: bool): + prefix = "async " if is_async else "" + + # --- Argomenti con annotazioni di tipo --- + args_parts = [] + all_args = node.args.args + defaults = node.args.defaults + defaults_offset = len(all_args) - len(defaults) + + for i, arg in enumerate(all_args): + arg_str = arg.arg + if arg.annotation: + try: + arg_str += f": {ast.unparse(arg.annotation)}" + except Exception: + # Fallback per vecchie versioni python o AST complessi + pass + default_idx = i - defaults_offset + if default_idx >= 0: + try: + default_val = ast.unparse(defaults[default_idx]) + arg_str += f" = {default_val}" + except Exception: + arg_str += " = ..." + args_parts.append(arg_str) + + # *args + if node.args.vararg: + va = node.args.vararg + va_str = f"*{va.arg}" + if va.annotation: + try: + va_str += f": {ast.unparse(va.annotation)}" + except Exception: + pass + args_parts.append(va_str) + + # keyword-only args + kwonly_defaults = { + i: node.args.kw_defaults[i] + for i in range(len(node.args.kwonlyargs)) + if node.args.kw_defaults[i] is not None + } + for i, kwarg in enumerate(node.args.kwonlyargs): + kw_str = kwarg.arg + if kwarg.annotation: + try: + kw_str += f": {ast.unparse(kwarg.annotation)}" + except Exception: + pass + if i in kwonly_defaults: + try: + kw_str += f" = {ast.unparse(kwonly_defaults[i])}" + except Exception: + kw_str += " = ..." + args_parts.append(kw_str) + + # **kwargs + if node.args.kwarg: + kwa = node.args.kwarg + kwa_str = f"**{kwa.arg}" + if kwa.annotation: + try: + kwa_str += f": {ast.unparse(kwa.annotation)}" + except Exception: + pass + args_parts.append(kwa_str) + + args_str = ", ".join(args_parts) + + # --- Tipo di ritorno --- + ret_anno = "" + if node.returns: + try: + ret_anno = f" -> {ast.unparse(node.returns)}" + except Exception: + pass + + self._log(f"{prefix}def {node.name}({args_str}){ret_anno}: ...") + + # Docstring della funzione (prima riga) + docstring = ast.get_docstring(node) + if docstring: + first_line = docstring.split('\n')[0].strip() + self.indent_level += 1 + self._log(f'"""{first_line}"""') + self.indent_level -= 1 + + def generic_visit(self, node): + for child in ast.iter_child_nodes(node): + if isinstance(child, (ast.ClassDef, ast.FunctionDef, ast.AsyncFunctionDef)): + self.visit(child) + + +class PythonParser(LanguageParser): + def parse(self, content: str, file_path: str) -> str: + filename = os.path.basename(file_path) + try: + tree = ast.parse(content) + visitor = LightVisitor() + visitor.visit(tree) + signatures = "\n".join(visitor.output) + + # Prepend commenti/docstring iniziali del modulo + module_header = _extract_module_comments(content) + + parts = [] + if module_header: + parts.append(module_header) + if signatures: + parts.append(signatures) + + result = "\n\n".join(parts) + return result.strip() or f"(No functions or classes found in {filename})" + except SyntaxError: + return f"(Syntax Error parsing {filename})" + except Exception as e: + return f"(Error parsing Python file: {e})" + +--- END OF FILE: src/deepbase/parsers/python.py --- +---------------------------------------- + +--- START OF FILE: src/deepbase/parsers/registry.py --- + +# src/deepbase/parsers/registry.py +import os +from typing import Dict, Set +from .interface import LanguageParser +from .python import PythonParser +from .document import MarkdownParser, LatexParser # <--- Importa i nuovi parser +from .fallback import FallbackParser + +class ParserRegistry: + def __init__(self): + self._parsers: Dict[str, LanguageParser] = {} + self._fallback = FallbackParser() + self._unsupported_extensions_encountered: Set[str] = set() + + # Registrazione parser + self.register_parser('.py', PythonParser()) + + # Registrazione Documenti + md_parser = MarkdownParser() + self.register_parser('.md', md_parser) + self.register_parser('.markdown', md_parser) + + tex_parser = LatexParser() + for ext in ['.tex', '.sty', '.cls']: + self.register_parser(ext, tex_parser) + + def register_parser(self, extension: str, parser: LanguageParser): + self._parsers[extension] = parser + + def get_parser(self, file_path: str) -> LanguageParser: + _, ext = os.path.splitext(file_path) + ext = ext.lower() + + if ext in self._parsers: + return self._parsers[ext] + + if ext: + self._unsupported_extensions_encountered.add(ext) + + return self._fallback + + def parse_file(self, file_path: str, content: str) -> str: + parser = self.get_parser(file_path) + return parser.parse(content, file_path) + + def get_unsupported_warning(self) -> str: + if not self._unsupported_extensions_encountered: + return "" + ext_list = ", ".join(sorted(self._unsupported_extensions_encountered)) + return ( + f"> [WARNING] Light Mode support is currently limited for: {ext_list}. " + "Using generic fallback for these files.\n" + ) + +registry = ParserRegistry() + +--- END OF FILE: src/deepbase/parsers/registry.py --- +---------------------------------------- + +--- START OF FILE: docs/index.md --- + +# DeepBase + +**DeepBase** is a command-line tool that analyzes a project directory, extracts the folder structure and the content of all significant code files, and consolidates them into a single text/markdown file. + +This unified "context" is perfect for providing to a Large Language Model (LLM) to enable it to deeply understand the entire codebase. + +## Features + +- **Project Structure**: Generates a tree view of the folder and file structure. +- **Smart Filtering**: Automatically ignores common unnecessary directories (e.g., `.git`, `venv`, `node_modules`). +- **Configurable**: Customize ignored directories and included extensions via a `.deepbase.toml` file. +- **Extension Selection**: Includes only files with relevant code or configuration extensions. +- **Unified Output**: Combines everything into a single file, easy to copy and paste. +- **PyPI Ready**: Easy to install via `pip`. + +## Installation + +You can install DeepBase directly from PyPI: + +```sh +pip install deepbase + +``` + +## How to Use + +Once installed, you will have the `deepbase` command available in your terminal. + +**Basic Usage:** + +Navigate to your project folder (or a parent folder) and run: + +```sh +deepbase . +``` +*The dot `.` indicates the current directory.* + +This command will create a file called `llm_context.md` in the current directory. + +**Specify Directory and Output File:** + +```sh +deepbase /path/to/your/project -o project_context.txt +``` + +### Advanced Configuration + +You can customize DeepBase's behavior by creating a `.deepbase.toml` file in the root of the project you are analyzing. + +**Example `.deepbase.toml`:** +```toml +# Add more directories to ignore. +# These will be added to the default ones. +ignore_dirs = [ + "my_assets_folder", + "experimental" +] + +# Add more extensions or filenames to include. +significant_extensions = [ + ".cfg", + "Makefile" +] +``` + +## License + +This project is released under the GPL 3 license. See the `LICENSE` file for details. + +--- END OF FILE: docs/index.md --- ---------------------------------------- ---- START OF FILE: utils/helper.py --- +--- START OF FILE: docs/reference.md --- + +# API Reference + +## Main Module -def help_me(): - pass +::: deepbase.main ---- END OF FILE: utils/helper.py --- +--- END OF FILE: docs/reference.md --- ---------------------------------------- diff --git a/pyproject.toml b/pyproject.toml index 8d1ea3d..6e2bf23 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -7,7 +7,7 @@ build-backend = "setuptools.build_meta" [project] name = "deepbase" # Increment the version to reflect changes -version = "1.7.0" +version = "1.8.0" authors = [ { name="Giuliano Ranauro", email="ranaurogln@email.com" }, ] diff --git a/src/deepbase/parsers.py b/src/deepbase/parsers.py deleted file mode 100644 index 832ef24..0000000 --- a/src/deepbase/parsers.py +++ /dev/null @@ -1,85 +0,0 @@ -# src/deepbase/parser.py (AGGIORNAMENTO) - -import os -import re -from typing import Optional - -# Import database functions -from deepbase.database import ( - get_database_schema, - generate_database_context_full, - generate_database_context_toon, - is_sqlite_database -) - - -def extract_markdown_structure(content: str) -> str: - """Estrae solo le intestazioni (headers) da un contenuto Markdown.""" - lines = [] - # Regex per catturare le righe che iniziano con # - header_pattern = re.compile(r'^\s*(#{1,6})\s+(.*)') - - for line in content.splitlines(): - if header_pattern.match(line): - lines.append(line.strip()) - - if not lines: - return "(Nessuna struttura Markdown rilevata)" - return "\n".join(lines) - - -def extract_latex_structure(content: str) -> str: - """Estrae comandi strutturali LaTeX (part, chapter, section, etc).""" - lines = [] - # Regex per catturare comandi strutturali standard di LaTeX - # Supporta \section{Title} e \section*{Title} - tex_pattern = re.compile(r'^\s*\\(part|chapter|section|subsection|subsubsection|paragraph|subparagraph)\*?\{(.+?)\}') - - # Catturiamo anche documentclass e begin/end document per contesto - context_pattern = re.compile(r'^\s*\\(documentclass|begin|end)\{.+?\}') - - for line in content.splitlines(): - if tex_pattern.match(line) or context_pattern.match(line): - lines.append(line.strip()) - - if not lines: - return "(Nessuna struttura LaTeX rilevata)" - return "\n".join(lines) - - -def get_document_structure(file_path: str, content: str) -> Optional[str]: - """Funzione dispatcher che decide quale parser usare.""" - _, ext = os.path.splitext(file_path) - ext = ext.lower() - - # --- DATABASE HANDLER --- - if ext in ['.db', '.sqlite', '.sqlite3', '.db3'] or is_sqlite_database(file_path): - if is_sqlite_database(file_path): - try: - schema = get_database_schema(file_path) - return generate_database_context_full(schema, os.path.basename(file_path)) - except Exception as e: - return f"(Error reading database: {e})" - - if ext in ['.md', '.markdown', '.mdown', '.mkd']: - return extract_markdown_structure(content) - - # --- LATEX HANDLER --- - elif ext in ['.tex']: - return extract_latex_structure(content) - - return None - - -def get_database_toon(file_path: str) -> Optional[str]: - """ - Generate TOON representation for database files. - Called by toon.py when processing databases. - """ - if is_sqlite_database(file_path): - try: - schema = get_database_schema(file_path) - return generate_database_context_toon(schema, os.path.basename(file_path)) - except Exception as e: - return f"(DB Error: {e})" - return None \ No newline at end of file diff --git a/src/deepbase/parsers/__init__.py b/src/deepbase/parsers/__init__.py new file mode 100644 index 0000000..2f42997 --- /dev/null +++ b/src/deepbase/parsers/__init__.py @@ -0,0 +1,6 @@ +# src/deepbase/parsers/__init__.py +from .document import get_document_structure +from .registry import registry + +# Espone anche le classi se necessario in futuro +__all__ = ['get_document_structure', 'registry'] \ No newline at end of file diff --git a/src/deepbase/parsers/document.py b/src/deepbase/parsers/document.py new file mode 100644 index 0000000..715798a --- /dev/null +++ b/src/deepbase/parsers/document.py @@ -0,0 +1,58 @@ +# src/deepbase/parsers/document.py +import re +import os +from .interface import LanguageParser + +class MarkdownParser(LanguageParser): + def parse(self, content: str, file_path: str) -> str: + lines = [] + for line in content.splitlines(): + if line.strip().startswith("#"): + lines.append(line.strip()) + if not lines: + return "(Markdown file with no headers)" + return "\n".join(lines) + +class LatexParser(LanguageParser): + def parse(self, content: str, file_path: str) -> str: + keep_patterns = [ + r'^\s*\\documentclass', + r'^\s*\\usepackage', + r'^\s*\\input', + r'^\s*\\include', + r'^\s*\\(part|chapter|section|subsection|subsubsection)', + r'^\s*\\begin', + r'^\s*\\end', + r'^\s*\\title', + r'^\s*\\author', + r'^\s*\\date' + ] + combined_pattern = re.compile('|'.join(keep_patterns)) + lines = [] + for line in content.splitlines(): + # Rimuovi commenti inline parziali se necessario, qui semplifichiamo + line_clean = line.split('%')[0].rstrip() + if combined_pattern.match(line_clean): + lines.append(line_clean) + if not lines: + return "(LaTeX content empty or purely textual)" + return "\n".join(lines) + +# Istanziamo i parser per uso interno +_md_parser = MarkdownParser() +_tex_parser = LatexParser() + +def get_document_structure(file_path: str, content: str): + """ + Funzione di compatibilità per main.py. + Restituisce la struttura se è un documento supportato, altrimenti None. + """ + _, ext = os.path.splitext(file_path) + ext = ext.lower() + + if ext in ['.md', '.markdown']: + return _md_parser.parse(content, file_path) + elif ext in ['.tex', '.sty', '.cls']: + return _tex_parser.parse(content, file_path) + + return None \ No newline at end of file diff --git a/src/deepbase/parsers/fallback.py b/src/deepbase/parsers/fallback.py new file mode 100644 index 0000000..ae58c4f --- /dev/null +++ b/src/deepbase/parsers/fallback.py @@ -0,0 +1,25 @@ +# src/deepbase/parsers/fallback.py +from .interface import LanguageParser + +class FallbackParser(LanguageParser): + """ + Parser generico per file non supportati specificamente. + Tenta di restituire una versione minimizzata o troncata. + """ + def parse(self, content: str, file_path: str) -> str: + lines = [] + # Rimuove righe vuote e commenti base + for line in content.splitlines(): + clean = line.strip() + if clean and not clean.startswith("#"): + lines.append(clean) + + if not lines: + return "(Empty or comments-only file)" + + # Se il file è molto lungo, troncalo per il fallback + if len(lines) > 20: + preview = "\n".join(lines[:20]) + return f"{preview}\n... ({len(lines)-20} more lines hidden - Light Mode Fallback)" + + return "\n".join(lines) \ No newline at end of file diff --git a/src/deepbase/parsers/interface.py b/src/deepbase/parsers/interface.py new file mode 100644 index 0000000..71406c7 --- /dev/null +++ b/src/deepbase/parsers/interface.py @@ -0,0 +1,14 @@ +# src/deepbase/parsers/interface.py +from abc import ABC, abstractmethod + +class LanguageParser(ABC): + """ + Interfaccia base per i parser di linguaggio. + """ + + @abstractmethod + def parse(self, content: str, file_path: str) -> str: + """ + Parsa il contenuto del file e restituisce una rappresentazione 'light' (firme, struttura). + """ + pass \ No newline at end of file diff --git a/src/deepbase/parsers/python.py b/src/deepbase/parsers/python.py new file mode 100644 index 0000000..2df8c48 --- /dev/null +++ b/src/deepbase/parsers/python.py @@ -0,0 +1,205 @@ +# src/deepbase/parsers/python.py +import ast +import os +from .interface import LanguageParser + +def _extract_module_comments(source: str) -> str: + """ + Estrae i commenti # e la docstring di modulo dalle prime righe del sorgente. + """ + lines = [] + in_docstring = False + docstring_char = None + source_lines = source.splitlines() + + for line in source_lines: + stripped = line.strip() + + # Riga vuota: la includiamo solo se siamo già dentro i commenti iniziali + if not stripped: + if lines: + lines.append("") + continue + + # Commenti # semplici + if stripped.startswith("#") and not in_docstring: + lines.append(line.rstrip()) + continue + + # Inizio docstring di modulo (""" o ''') + if not in_docstring and (stripped.startswith('"""') or stripped.startswith("'''")): + docstring_char = stripped[:3] + in_docstring = True + lines.append(line.rstrip()) + # Docstring su singola riga + rest = stripped[3:] + if rest.endswith(docstring_char) and len(rest) >= 3: + in_docstring = False + continue + + if in_docstring: + lines.append(line.rstrip()) + if stripped.endswith(docstring_char): + in_docstring = False + continue + + # Qualsiasi altra cosa: fine dell'header + break + + # Rimuovi trailing blank lines + while lines and not lines[-1].strip(): + lines.pop() + + return "\n".join(lines) + + +class LightVisitor(ast.NodeVisitor): + """ + Visita l'AST e produce le firme dei metodi/funzioni Python. + """ + + def __init__(self): + self.output = [] + self.indent_level = 0 + + def _log(self, text): + indent = " " * self.indent_level + self.output.append(f"{indent}{text}") + + def visit_ClassDef(self, node): + self._log(f"class {node.name}:") + self.indent_level += 1 + + # Docstring della classe (prima riga) + docstring = ast.get_docstring(node) + if docstring: + first_line = docstring.split('\n')[0].strip() + self._log(f'"""{first_line}"""') + + self.generic_visit(node) + self.indent_level -= 1 + + def visit_FunctionDef(self, node): + self._emit_signature(node, is_async=False) + + def visit_AsyncFunctionDef(self, node): + self._emit_signature(node, is_async=True) + + def _emit_signature(self, node, is_async: bool): + prefix = "async " if is_async else "" + + # --- Argomenti con annotazioni di tipo --- + args_parts = [] + all_args = node.args.args + defaults = node.args.defaults + defaults_offset = len(all_args) - len(defaults) + + for i, arg in enumerate(all_args): + arg_str = arg.arg + if arg.annotation: + try: + arg_str += f": {ast.unparse(arg.annotation)}" + except Exception: + # Fallback per vecchie versioni python o AST complessi + pass + default_idx = i - defaults_offset + if default_idx >= 0: + try: + default_val = ast.unparse(defaults[default_idx]) + arg_str += f" = {default_val}" + except Exception: + arg_str += " = ..." + args_parts.append(arg_str) + + # *args + if node.args.vararg: + va = node.args.vararg + va_str = f"*{va.arg}" + if va.annotation: + try: + va_str += f": {ast.unparse(va.annotation)}" + except Exception: + pass + args_parts.append(va_str) + + # keyword-only args + kwonly_defaults = { + i: node.args.kw_defaults[i] + for i in range(len(node.args.kwonlyargs)) + if node.args.kw_defaults[i] is not None + } + for i, kwarg in enumerate(node.args.kwonlyargs): + kw_str = kwarg.arg + if kwarg.annotation: + try: + kw_str += f": {ast.unparse(kwarg.annotation)}" + except Exception: + pass + if i in kwonly_defaults: + try: + kw_str += f" = {ast.unparse(kwonly_defaults[i])}" + except Exception: + kw_str += " = ..." + args_parts.append(kw_str) + + # **kwargs + if node.args.kwarg: + kwa = node.args.kwarg + kwa_str = f"**{kwa.arg}" + if kwa.annotation: + try: + kwa_str += f": {ast.unparse(kwa.annotation)}" + except Exception: + pass + args_parts.append(kwa_str) + + args_str = ", ".join(args_parts) + + # --- Tipo di ritorno --- + ret_anno = "" + if node.returns: + try: + ret_anno = f" -> {ast.unparse(node.returns)}" + except Exception: + pass + + self._log(f"{prefix}def {node.name}({args_str}){ret_anno}: ...") + + # Docstring della funzione (prima riga) + docstring = ast.get_docstring(node) + if docstring: + first_line = docstring.split('\n')[0].strip() + self.indent_level += 1 + self._log(f'"""{first_line}"""') + self.indent_level -= 1 + + def generic_visit(self, node): + for child in ast.iter_child_nodes(node): + if isinstance(child, (ast.ClassDef, ast.FunctionDef, ast.AsyncFunctionDef)): + self.visit(child) + + +class PythonParser(LanguageParser): + def parse(self, content: str, file_path: str) -> str: + filename = os.path.basename(file_path) + try: + tree = ast.parse(content) + visitor = LightVisitor() + visitor.visit(tree) + signatures = "\n".join(visitor.output) + + # Prepend commenti/docstring iniziali del modulo + module_header = _extract_module_comments(content) + + parts = [] + if module_header: + parts.append(module_header) + if signatures: + parts.append(signatures) + + result = "\n\n".join(parts) + return result.strip() or f"(No functions or classes found in {filename})" + except SyntaxError: + return f"(Syntax Error parsing {filename})" + except Exception as e: + return f"(Error parsing Python file: {e})" \ No newline at end of file diff --git a/src/deepbase/parsers/registry.py b/src/deepbase/parsers/registry.py new file mode 100644 index 0000000..750a184 --- /dev/null +++ b/src/deepbase/parsers/registry.py @@ -0,0 +1,55 @@ +# src/deepbase/parsers/registry.py +import os +from typing import Dict, Set +from .interface import LanguageParser +from .python import PythonParser +from .document import MarkdownParser, LatexParser # <--- Importa i nuovi parser +from .fallback import FallbackParser + +class ParserRegistry: + def __init__(self): + self._parsers: Dict[str, LanguageParser] = {} + self._fallback = FallbackParser() + self._unsupported_extensions_encountered: Set[str] = set() + + # Registrazione parser + self.register_parser('.py', PythonParser()) + + # Registrazione Documenti + md_parser = MarkdownParser() + self.register_parser('.md', md_parser) + self.register_parser('.markdown', md_parser) + + tex_parser = LatexParser() + for ext in ['.tex', '.sty', '.cls']: + self.register_parser(ext, tex_parser) + + def register_parser(self, extension: str, parser: LanguageParser): + self._parsers[extension] = parser + + def get_parser(self, file_path: str) -> LanguageParser: + _, ext = os.path.splitext(file_path) + ext = ext.lower() + + if ext in self._parsers: + return self._parsers[ext] + + if ext: + self._unsupported_extensions_encountered.add(ext) + + return self._fallback + + def parse_file(self, file_path: str, content: str) -> str: + parser = self.get_parser(file_path) + return parser.parse(content, file_path) + + def get_unsupported_warning(self) -> str: + if not self._unsupported_extensions_encountered: + return "" + ext_list = ", ".join(sorted(self._unsupported_extensions_encountered)) + return ( + f"> [WARNING] Light Mode support is currently limited for: {ext_list}. " + "Using generic fallback for these files.\n" + ) + +registry = ParserRegistry() \ No newline at end of file diff --git a/src/deepbase/toon.py b/src/deepbase/toon.py index d23618f..919893a 100644 --- a/src/deepbase/toon.py +++ b/src/deepbase/toon.py @@ -1,9 +1,9 @@ # src/deepbase/toon.py -import ast import os -import re +import ast import json +import re # Import database handling from deepbase.database import ( @@ -13,11 +13,11 @@ is_sqlite_database ) +# Import new parser registry +from deepbase.parsers.registry import registry -# --------------------------------------------------------------------------- -# TOON VISITOR — mantiene classi + firme + docstring (comportamento originale) -# --------------------------------------------------------------------------- - +# Manteniamo ToonVisitor originale per la retrocompatibilità (se usato altrove) +# o per la funzione generate_toon_representation "standard" (non light). class ToonVisitor(ast.NodeVisitor): def __init__(self): self.output = [] @@ -51,19 +51,8 @@ def _handle_function(self, node, is_async=False): args = [arg.arg for arg in node.args.args] args_str = ", ".join(args) prefix = "async " if is_async else "" - - ret_anno = "" - if node.returns: - try: - if isinstance(node.returns, ast.Name): - ret_anno = f" -> {node.returns.id}" - elif isinstance(node.returns, ast.Constant): - ret_anno = f" -> {node.returns.value}" - except Exception: - pass - - self._log(f"{prefix}F: {node.name}({args_str}){ret_anno}") - + self._log(f"{prefix}F: {node.name}({args_str})") + docstring = ast.get_docstring(node) if docstring: self.indent_level += 1 @@ -76,265 +65,15 @@ def generic_visit(self, node): if isinstance(child, (ast.ClassDef, ast.FunctionDef, ast.AsyncFunctionDef)): self.visit(child) - -# --------------------------------------------------------------------------- -# LIGHT VISITOR — solo firme Python con docstring/commenti iniziali -# --------------------------------------------------------------------------- - -def _extract_module_comments(source: str) -> str: - """ - Estrae i commenti # e la docstring di modulo dalle prime righe del sorgente. - Si ferma al primo costrutto non-commento e non-docstring. - """ - lines = [] - in_docstring = False - docstring_char = None - source_lines = source.splitlines() - - for line in source_lines: - stripped = line.strip() - - # Riga vuota: la includiamo solo se siamo già dentro i commenti iniziali - if not stripped: - if lines: - lines.append("") - continue - - # Commenti # semplici - if stripped.startswith("#") and not in_docstring: - lines.append(line.rstrip()) - continue - - # Inizio docstring di modulo (""" o ''') - if not in_docstring and (stripped.startswith('"""') or stripped.startswith("'''")): - docstring_char = stripped[:3] - in_docstring = True - lines.append(line.rstrip()) - # Docstring su singola riga - rest = stripped[3:] - if rest.endswith(docstring_char) and len(rest) >= 3: - in_docstring = False - continue - - if in_docstring: - lines.append(line.rstrip()) - if stripped.endswith(docstring_char): - in_docstring = False - continue - - # Qualsiasi altra cosa: fine dell'header - break - - # Rimuovi trailing blank lines - while lines and not lines[-1].strip(): - lines.pop() - - return "\n".join(lines) - - -class LightVisitor(ast.NodeVisitor): - """ - Visita l'AST e produce le firme dei metodi/funzioni Python, - preservando la corretta indentazione per classi nidificate. - Include la prima riga di docstring di classi e funzioni come commento. - """ - - def __init__(self): - self.output = [] - self.indent_level = 0 - - def _log(self, text): - indent = " " * self.indent_level - self.output.append(f"{indent}{text}") - - def visit_ClassDef(self, node): - self._log(f"class {node.name}:") - self.indent_level += 1 - - # Docstring della classe (prima riga) - docstring = ast.get_docstring(node) - if docstring: - first_line = docstring.split('\n')[0].strip() - self._log(f'"""{first_line}"""') - - self.generic_visit(node) - self.indent_level -= 1 - - def visit_FunctionDef(self, node): - self._emit_signature(node, is_async=False) - - def visit_AsyncFunctionDef(self, node): - self._emit_signature(node, is_async=True) - - def _emit_signature(self, node, is_async: bool): - """Emette la firma completa della funzione/metodo in stile Python.""" - prefix = "async " if is_async else "" - - # --- Argomenti con annotazioni di tipo --- - args_parts = [] - - all_args = node.args.args - defaults = node.args.defaults - defaults_offset = len(all_args) - len(defaults) - - for i, arg in enumerate(all_args): - arg_str = arg.arg - if arg.annotation: - arg_str += f": {ast.unparse(arg.annotation)}" - default_idx = i - defaults_offset - if default_idx >= 0: - default_val = ast.unparse(defaults[default_idx]) - arg_str += f" = {default_val}" - args_parts.append(arg_str) - - # *args - if node.args.vararg: - va = node.args.vararg - va_str = f"*{va.arg}" - if va.annotation: - va_str += f": {ast.unparse(va.annotation)}" - args_parts.append(va_str) - - # keyword-only args - kwonly_defaults = { - i: node.args.kw_defaults[i] - for i in range(len(node.args.kwonlyargs)) - if node.args.kw_defaults[i] is not None - } - for i, kwarg in enumerate(node.args.kwonlyargs): - kw_str = kwarg.arg - if kwarg.annotation: - kw_str += f": {ast.unparse(kwarg.annotation)}" - if i in kwonly_defaults: - kw_str += f" = {ast.unparse(kwonly_defaults[i])}" - args_parts.append(kw_str) - - # **kwargs - if node.args.kwarg: - kwa = node.args.kwarg - kwa_str = f"**{kwa.arg}" - if kwa.annotation: - kwa_str += f": {ast.unparse(kwa.annotation)}" - args_parts.append(kwa_str) - - args_str = ", ".join(args_parts) - - # --- Tipo di ritorno --- - ret_anno = "" - if node.returns: - try: - ret_anno = f" -> {ast.unparse(node.returns)}" - except Exception: - pass - - self._log(f"{prefix}def {node.name}({args_str}){ret_anno}: ...") - - # Docstring della funzione (prima riga, indentata sotto la firma) - docstring = ast.get_docstring(node) - if docstring: - first_line = docstring.split('\n')[0].strip() - self.indent_level += 1 - self._log(f'"""{first_line}"""') - self.indent_level -= 1 - - def generic_visit(self, node): - for child in ast.iter_child_nodes(node): - if isinstance(child, (ast.ClassDef, ast.FunctionDef, ast.AsyncFunctionDef)): - self.visit(child) - - -# --------------------------------------------------------------------------- -# Gestori per file Non-Python -# --------------------------------------------------------------------------- +# --- Helper Legacy per TOON non-light (struttura scheletrica) --- +# (Qui potresti voler spostare anche questi nei parser in futuro, +# ma per ora ci concentriamo sulla modalità --light) def _handle_markdown(content: str) -> str: - """Estrae solo gli header Markdown.""" - lines = [] - for line in content.splitlines(): - if line.strip().startswith("#"): - lines.append(line.strip()) - if not lines: - return "(Markdown file with no headers)" - return "\n".join(lines) - - -def _handle_toml_ini(content: str) -> str: - """Estrae sezioni [Title] e chiavi, ignorando valori lunghi.""" - lines = [] - for line in content.splitlines(): - clean = line.strip() - if not clean or clean.startswith("#"): - continue - if clean.startswith("[") and clean.endswith("]"): - lines.append(clean) - elif "=" in clean: - key = clean.split("=")[0].strip() - lines.append(f"{key} = ...") - return "\n".join(lines) - - -def _handle_json_structure(content: str) -> str: - """Prova a parsare JSON e restituire solo le chiavi di primo/secondo livello.""" - try: - data = json.loads(content) - if isinstance(data, dict): - lines = ["{"] - for k, v in data.items(): - if isinstance(v, dict): - lines.append(f" {k}: {{ ...keys: {list(v.keys())} }}") - elif isinstance(v, list): - lines.append(f" {k}: [ ...size: {len(v)} ]") - else: - lines.append(f" {k}: (value)") - lines.append("}") - return "\n".join(lines) - return "(JSON Array or Scalar)" - except Exception: - return "(Invalid JSON content)" - - -def _handle_minified_config(content: str) -> str: - """Rimuove righe vuote e commenti (per .gitignore, requirements.txt).""" - lines = [] - for line in content.splitlines(): - clean = line.strip() - if clean and not clean.startswith("#"): - lines.append(clean) - if not lines: - return "(Empty or comments-only file)" - return "\n".join(lines) - - -def _handle_latex_structure(content: str) -> str: - """ - Minimizza il LaTeX mantenendo struttura, pacchetti e comandi chiave. - Rimuove il testo semplice. - """ - keep_patterns = [ - r'^\s*\\documentclass', - r'^\s*\\usepackage', - r'^\s*\\input', - r'^\s*\\include', - r'^\s*\\(part|chapter|section|subsection|subsubsection)', - r'^\s*\\begin', - r'^\s*\\end', - r'^\s*\\title', - r'^\s*\\author', - r'^\s*\\date' - ] - combined_pattern = re.compile('|'.join(keep_patterns)) - lines = [] - for line in content.splitlines(): - line = line.split('%')[0].rstrip() - if combined_pattern.match(line): - lines.append(line) - if not lines: - return "(LaTeX content empty or purely textual)" - return "\n".join(lines) - + lines = [l.strip() for l in content.splitlines() if l.strip().startswith("#")] + return "\n".join(lines) or "(Markdown file with no headers)" def _handle_database_toon(file_path: str) -> str: - """Handle database files in TOON mode.""" if is_sqlite_database(file_path): try: schema = get_database_schema(file_path) @@ -343,125 +82,71 @@ def _handle_database_toon(file_path: str) -> str: return f"(DB Error: {e})" return "(Not a valid SQLite database)" - # --------------------------------------------------------------------------- -# Funzione pubblica principale — TOON (skeleton completo) +# Funzione pubblica principale — LIGHT (solo firme) # --------------------------------------------------------------------------- -def generate_toon_representation(file_path: str, content: str) -> str: +def generate_light_representation(file_path: str, content: str) -> str: """ - Genera una rappresentazione TOON (Token Oriented) in base al tipo di file. - Include classi, firme e docstring. + Genera una rappresentazione LIGHT usando il nuovo sistema di plugin/parser. """ - _, ext = os.path.splitext(file_path) - filename = os.path.basename(file_path) - ext = ext.lower() - + # 1. Gestione Database (caso speciale, non basato su contenuto testo) if is_sqlite_database(file_path): return _handle_database_toon(file_path) - if ext == ".py": - try: - tree = ast.parse(content) - visitor = ToonVisitor() - visitor.visit(tree) - return "\n".join(visitor.output) - except SyntaxError: - return f"(Syntax Error parsing {filename})" - - elif ext in [".md", ".markdown"]: - return _handle_markdown(content) - - elif ext in [".tex", ".sty", ".cls"]: - return _handle_latex_structure(content) - - elif ext in [".toml", ".ini", ".cfg"]: - return _handle_toml_ini(content) - - elif ext == ".json": - return _handle_json_structure(content) - - elif ext in [".txt", ".dockerignore", ".gitignore"] or filename in [".gitignore", ".dockerignore", "Dockerfile", "Makefile"]: - return _handle_minified_config(content) - - elif ext in [".yml", ".yaml"]: - lines = [line for line in content.splitlines() if ":" in line and not line.strip().startswith("#")] - clean_lines = [] - for l in lines: - key = l.split(":")[0] - clean_lines.append(f"{key}:") - return "\n".join(clean_lines) - - else: - minified = _handle_minified_config(content) - lines = minified.splitlines() - if len(lines) > 10: - return "\n".join(lines[:10]) + f"\n... ({len(lines)-10} more meaningful lines hidden)" - return minified + # 2. Usa il registro per trovare il parser corretto + return registry.parse_file(file_path, content) +def get_light_mode_warnings() -> str: + """ + Restituisce i warning accumulati durante l'esecuzione (es. linguaggi non supportati). + Da chiamare in main.py se si vuole stampare un header. + """ + return registry.get_unsupported_warning() # --------------------------------------------------------------------------- -# Funzione pubblica principale — LIGHT (solo firme) +# Funzione pubblica principale — TOON (skeleton legacy) # --------------------------------------------------------------------------- -def generate_light_representation(file_path: str, content: str) -> str: +def generate_toon_representation(file_path: str, content: str) -> str: """ - Genera una rappresentazione LIGHT: solo le firme dei metodi/funzioni. - Per file Python: usa LightVisitor (def/async def con tipi, niente corpo) - preceduto dai commenti/docstring di modulo iniziali. - Per altri tipi di file: delega alla rappresentazione TOON standard, - perché per file non-Python non c'è distinzione tra "firma" e "scheletro". + Genera una rappresentazione TOON (Token Oriented - Skeleton) + Mantiene la logica originale per ora, o delega a Light se preferisci unificare. """ _, ext = os.path.splitext(file_path) - filename = os.path.basename(file_path) ext = ext.lower() - # DATABASE: stessa logica TOON (schema compatto) if is_sqlite_database(file_path): return _handle_database_toon(file_path) - # PYTHON: commenti di modulo + firme via LightVisitor if ext == ".py": try: tree = ast.parse(content) - visitor = LightVisitor() + visitor = ToonVisitor() visitor.visit(tree) - signatures = "\n".join(visitor.output) - - # Prepend commenti/docstring iniziali del modulo (se presenti) - module_header = _extract_module_comments(content) - if module_header: - result = module_header + "\n\n" + signatures - else: - result = signatures - - return result.strip() or f"(No functions or classes found in {filename})" + return "\n".join(visitor.output) except SyntaxError: - return f"(Syntax Error parsing {filename})" - - # Tutti gli altri tipi: delega al TOON standard - # (markdown -> headers, toml -> chiavi, json -> struttura, ecc.) - return generate_toon_representation(file_path, content) - + return f"(Syntax Error parsing {os.path.basename(file_path)})" + + elif ext in [".md", ".markdown"]: + return _handle_markdown(content) + + # Per semplicità, per ora il Toon standard per altri file + # può usare il fallback del nuovo sistema o la vecchia logica. + # Usiamo il fallback del registry per coerenza: + return registry.parse_file(file_path, content) # --------------------------------------------------------------------------- -# Helper per database in focus mode (usato da main.py) +# Helper per database in focus mode # --------------------------------------------------------------------------- def generate_database_focused(file_path: str, focused_tables: list = None) -> str: - """ - Generate database context with specific tables in full detail. - Used when database is in focus mode. - """ from deepbase.database import generate_database_context_full, generate_database_context_hybrid - if not is_sqlite_database(file_path): return "(Not a valid SQLite database)" - try: schema = get_database_schema(file_path) db_name = os.path.basename(file_path) - if focused_tables: return generate_database_context_hybrid(schema, db_name, focused_tables) else: diff --git a/tests/test_suite_python.py b/tests/test_suite_python.py index 8d1264f..ff864ff 100644 --- a/tests/test_suite_python.py +++ b/tests/test_suite_python.py @@ -6,215 +6,207 @@ from deepbase.main import main import sqlite3 -# Creiamo un'app Typer temporanea per il testing +# --- SETUP PER I TEST --- test_app = typer.Typer() test_app.command()(main) runner = CliRunner() -class TestPythonSuite: +class TestDeepBaseSuite: """ - Test suite dedicata all'analisi di progetti Python con DeepBase. - FIX: Specifica sempre l'output path esplicito per evitare FileNotFoundError. - FIX: Controlla il contenuto del file generato, non lo stdout, per la struttura. + Test suite completa per DeepBase. + Copre Python, Markdown, LaTeX, Database e il meccanismo di Fallback. """ - def create_dummy_python_project(self, root): - """Helper per popolare una directory con file Python finti.""" - # 1. File principale + def create_dummy_project(self, root): + """Helper per popolare una directory con vari tipi di file.""" + # 1. Python Complex main_py = root / "main.py" - main_py.write_text(""" + main_py.write_text("""\"\"\" +Module docstring here. +Should be preserved. +\"\"\" import os -def hello_world(): - print("Hello content") +# Initial comment +def simple_func(): return True +async def async_func(a: int, b: str = "default") -> bool: + \"\"\"Function docstring.\"\"\" + print("body hidden") + return False + class MyClass: + \"\"\"Class docstring.\"\"\" def method_one(self): - # This is a comment inside return 1 """, encoding="utf-8") - # 2. Modulo utils - utils_dir = root / "utils" - utils_dir.mkdir() - (utils_dir / "helper.py").write_text("def help_me():\n pass", encoding="utf-8") + # 2. Markdown + readme = root / "README.md" + readme.write_text("""# Project Title +Description text that should be removed. +## Section 1 +More text. +### Subsection +""", encoding="utf-8") - # 3. File da ignorare (segreto) - (root / "secrets.py").write_text("API_KEY = '123'", encoding="utf-8") + # 3. LaTeX + doc_tex = root / "document.tex" + doc_tex.write_text(r"""\documentclass{article} +\usepackage{graphicx} +\begin{document} +Text that should be removed in light mode. +\section{Introduction} +\subsection{Background} +\end{document} +""", encoding="utf-8") - # 4. Cartella da ignorare (es. cache) - cache_dir = root / ".mypy_cache" - cache_dir.mkdir() - (cache_dir / "data.json").write_text("{}", encoding="utf-8") + # 4. JavaScript (Unsupported / Fallback test) + script_js = root / "script.js" + script_js.write_text("""function hello() { + console.log("This is JS"); + return true; +} +""", encoding="utf-8") - def test_basic_structure(self, tmp_path): - """Testa che il comando base generi la struttura nel file.""" - self.create_dummy_python_project(tmp_path) - - output_file = tmp_path / "llm_context.md" + # 5. JSON (Legacy TOON support) + config_json = root / "config.json" + config_json.write_text('{"key": "value", "list": [1, 2, 3]}', encoding="utf-8") + + def test_python_light_advanced(self, tmp_path): + """Testa il nuovo parser Python con docstring, async e type hints.""" + self.create_dummy_project(tmp_path) + output_file = tmp_path / "context.md" - # Passiamo esplicitamente l'output file nel tmp_path - result = runner.invoke(test_app, [str(tmp_path), "-o", str(output_file)]) + result = runner.invoke(test_app, [str(tmp_path), "--light", "-o", str(output_file)]) assert result.exit_code == 0 - assert output_file.exists() - content = output_file.read_text(encoding="utf-8") - # Verifica presenza nell'albero (DENTRO IL FILE, non nello stdout) - assert "main.py" in content - assert "utils/" in content + # Verifica Docstring di modulo (controlliamo le righe separate perché è multiline) + assert '"""' in content + assert "Module docstring here." in content + assert "Should be preserved." in content - # Verifica che il CONTENUTO del codice NON ci sia - assert "def hello_world" not in content - assert "import os" not in content - - def test_flag_all_content(self, tmp_path): - """Testa --all: deve includere tutto il codice.""" - self.create_dummy_python_project(tmp_path) + # Verifica Async e Type Hints + assert "async def async_func" in content + assert "b: str" in content - output_file = tmp_path / "llm_context.md" - result = runner.invoke(test_app, [str(tmp_path), "--all", "-o", str(output_file)]) + # Verifica Docstring di funzione (prima riga) + assert '"""Function docstring."""' in content - assert result.exit_code == 0 - content = output_file.read_text(encoding="utf-8") - - # Deve contenere il corpo delle funzioni - assert "print(\"Hello content\")" in content - assert "class MyClass:" in content + # Verifica che il corpo sia rimosso + assert 'print("body hidden")' not in content - def test_flag_light_mode(self, tmp_path): - """Testa --light: deve includere firme ma NON il corpo.""" - self.create_dummy_python_project(tmp_path) + def test_markdown_parsing(self, tmp_path): + """Testa che il parser Markdown estragga solo gli header.""" + self.create_dummy_project(tmp_path) + output_file = tmp_path / "context.md" - output_file = tmp_path / "llm_context.md" result = runner.invoke(test_app, [str(tmp_path), "--light", "-o", str(output_file)]) - - assert result.exit_code == 0 content = output_file.read_text(encoding="utf-8") - # Deve contenere la notice Light Mode - assert "[LIGHT MODE]" in content - - # Deve contenere le firme (via AST parsing) - # Nota: controlliamo stringhe parziali per evitare problemi di formattazione spazi - assert "def hello_world" in content - assert "class MyClass:" in content - - # NON deve contenere il corpo del codice - assert "print(\"Hello content\")" not in content - assert "return 1" not in content + assert "# Project Title" in content + assert "## Section 1" in content + # Il testo descrittivo non deve esserci + assert "Description text that should be removed" not in content - def test_focus_mode_hybrid(self, tmp_path): - """Testa --focus combined (ibrido).""" - self.create_dummy_python_project(tmp_path) + def test_latex_parsing(self, tmp_path): + """Testa che il parser LaTeX mantenga la struttura.""" + self.create_dummy_project(tmp_path) + output_file = tmp_path / "context.md" - output_file = tmp_path / "llm_context.md" - # Focus solo su main.py - result = runner.invoke(test_app, [str(tmp_path), "--focus", "main.py", "-o", str(output_file)]) - - assert result.exit_code == 0 + result = runner.invoke(test_app, [str(tmp_path), "--light", "-o", str(output_file)]) content = output_file.read_text(encoding="utf-8") - # main.py deve essere FULL - assert "--- START OF FILE: main.py ---" in content - assert "print(\"Hello content\")" in content - - # utils/helper.py NON era in focus, quindi non dovrebbe esserci il contenuto - assert "--- START OF FILE: utils/helper.py ---" not in content + assert r"\documentclass{article}" in content + assert r"\section{Introduction}" in content + assert "Text that should be removed" not in content - def test_focus_with_light_background(self, tmp_path): - """Testa --light insieme a --focus.""" - self.create_dummy_python_project(tmp_path) - - output_file = tmp_path / "llm_context.md" - # Focus su main.py, ma background --light - result = runner.invoke(test_app, [str(tmp_path), "--light", "--focus", "main.py", "-o", str(output_file)]) + def test_fallback_and_warning(self, tmp_path): + """ + Testa il meccanismo di fallback per file non supportati (es. .js) + e verifica che venga generato il WARNING. + """ + self.create_dummy_project(tmp_path) + output_file = tmp_path / "context.md" + result = runner.invoke(test_app, [str(tmp_path), "--light", "-o", str(output_file)]) content = output_file.read_text(encoding="utf-8") - # main.py FULL - assert "print(\"Hello content\")" in content + # 1. Verifica che il contenuto JS sia presente (Fallback behavior) + assert "function hello()" in content - # utils/helper.py LIGHT (deve esserci la firma) - assert "def help_me" in content + # 2. Verifica la presenza del WARNING (nello stdout o nel file) + warning_msg = ".js" + assert (warning_msg in result.stdout) or (warning_msg in content) - def test_toml_configuration(self, tmp_path): - """Testa che .deepbase.toml venga letto e rispettato.""" - self.create_dummy_python_project(tmp_path) + def test_json_legacy_support(self, tmp_path): + """Testa che i file JSON vengano ancora gestiti.""" + self.create_dummy_project(tmp_path) + output_file = tmp_path / "context.md" - # Crea configurazione per ignorare "secrets.py" - toml_file = tmp_path / ".deepbase.toml" - toml_file.write_text('ignore_files = ["secrets.py"]', encoding="utf-8") - - output_file = tmp_path / "llm_context.md" - result = runner.invoke(test_app, [str(tmp_path), "--all", "-o", str(output_file)]) - - assert result.exit_code == 0 + result = runner.invoke(test_app, [str(tmp_path), "--light", "-o", str(output_file)]) content = output_file.read_text(encoding="utf-8") - # secrets.py NON deve apparire - assert "secrets.py" not in content - assert "API_KEY" not in content + # Verifica struttura JSON + assert "key" in content + assert "list" in content - def test_custom_output_path(self, tmp_path): - """Testa l'opzione -o per il file di output.""" - self.create_dummy_python_project(tmp_path) - - custom_out = tmp_path / "custom_analysis.txt" - result = runner.invoke(test_app, [str(tmp_path), "-o", str(custom_out)]) - - assert result.exit_code == 0 - assert custom_out.exists() - - def test_error_handling_invalid_path(self): - """Testa che il programma gestisca percorsi inesistenti.""" - result = runner.invoke(test_app, ["/percorso/inesistente/assoluto"]) - assert result.exit_code == 1 - assert "Target not found" in result.stdout - def test_database_handling(self, tmp_path): - """Testa il supporto per database SQLite (schema extraction e light mode).""" - import sqlite3 # Import necessario qui o in cima al file - - # Creiamo una cartella e un DB reale + """Testa integrazione database SQLite.""" project_dir = tmp_path / "db_project" project_dir.mkdir() - db_path = project_dir / "test_db.sqlite" + db_path = project_dir / "test.sqlite" conn = sqlite3.connect(db_path) cursor = conn.cursor() - cursor.execute("CREATE TABLE users (id INTEGER PRIMARY KEY, username TEXT NOT NULL)") - cursor.execute("CREATE TABLE posts (id INTEGER PRIMARY KEY, user_id INTEGER, content TEXT)") + cursor.execute("CREATE TABLE users (id INTEGER PRIMARY KEY, email TEXT)") conn.commit() conn.close() output_file = project_dir / "context.md" - - # 1. Test Full Mode (--all) -> Deve mostrare schema dettagliato - result = runner.invoke(test_app, [str(project_dir), "--all", "-o", str(output_file)]) + result = runner.invoke(test_app, [str(project_dir), "--light", "-o", str(output_file)]) + assert result.exit_code == 0 content = output_file.read_text(encoding="utf-8") - - # Verifica che il DB sia stato processato - assert "test_db.sqlite" in content - # Verifica il contenuto generato da generate_database_context_full - # Nota: "DATABASE SCHEMA" appare solo in single-file mode, qui cerchiamo il contenuto reale - assert "Table: `users`" in content - # Verifica parziale di una colonna per assicurarsi che lo schema sia stato letto - assert "username" in content - assert "TEXT" in content + assert "users" in content + assert "email:TEXT" in content - # 2. Test Light Mode (--light) -> Deve mostrare schema compatto (TOON) - result = runner.invoke(test_app, [str(project_dir), "--light", "-o", str(output_file)]) + def test_focus_mode_hybrid(self, tmp_path): + """Testa --focus combined (ibrido) su file Python.""" + self.create_dummy_project(tmp_path) + output_file = tmp_path / "context.md" + + # Focus su main.py. SENZA --light o --all, il comportamento standard + # per i file NON in focus è di essere presenti SOLO nell'albero (tree). + result = runner.invoke(test_app, [str(tmp_path), "--focus", "main.py", "-o", str(output_file)]) + assert result.exit_code == 0 content = output_file.read_text(encoding="utf-8") - # Verifica firma compatta (TOON) - # Cerca la definizione della tabella users e la colonna id - assert "users" in content - # Verifica formato TOON: nome:tipo - assert "id:INTEGER" in content \ No newline at end of file + # 1. main.py deve essere FULL (contiene il corpo) + assert 'print("body hidden")' in content + + # 2. README.md NON in focus. + # Verifica che sia presente nell'albero dei file + assert "README.md" in content + + # Ma NON deve esserci il suo contenuto (perché non abbiamo passato --light come background) + # Nota: se in futuro cambi il default, aggiorna questo test. + assert "# Project Title" not in content + + def test_ignore_files(self, tmp_path): + """Testa che .deepbase.toml venga rispettato.""" + self.create_dummy_project(tmp_path) + + (tmp_path / ".deepbase.toml").write_text('ignore_files = ["script.js"]', encoding="utf-8") + + output_file = tmp_path / "context.md" + result = runner.invoke(test_app, [str(tmp_path), "--light", "-o", str(output_file)]) + + content = output_file.read_text(encoding="utf-8") + assert "script.js" not in content \ No newline at end of file From 31de31d04579b41220f2819613d52081ec6f0ae4 Mon Sep 17 00:00:00 2001 From: Giuliano Ranauro Date: Fri, 13 Feb 2026 21:39:28 +0100 Subject: [PATCH 7/9] parser javascript light v1 --- src/deepbase/parsers/javascript.py | 84 ++++++++++++++++++++++++++++++ src/deepbase/parsers/registry.py | 13 +++-- tests/test_suite_python.py | 55 ++++++++++++++++++- 3 files changed, 148 insertions(+), 4 deletions(-) create mode 100644 src/deepbase/parsers/javascript.py diff --git a/src/deepbase/parsers/javascript.py b/src/deepbase/parsers/javascript.py new file mode 100644 index 0000000..3bd51c4 --- /dev/null +++ b/src/deepbase/parsers/javascript.py @@ -0,0 +1,84 @@ +# src/deepbase/parsers/javascript.py +import re +from .interface import LanguageParser + +class JavaScriptParser(LanguageParser): + """ + Parser per JavaScript, TypeScript e React Native (.js, .jsx, .ts, .tsx). + Usa regex per identificare firme di funzioni, classi, interfacce e componenti React. + """ + + def parse(self, content: str, file_path: str) -> str: + lines = [] + + # Regex patterns per catturare le definizioni + patterns = [ + # Class definition (es. export default class MyClass extends Component) + re.compile(r'^\s*(export\s+)?(default\s+)?(abstract\s+)?class\s+([a-zA-Z0-9_]+)(.*)?\{'), + + # Function definition standard (es. async function myFunc(a, b)) + re.compile(r'^\s*(export\s+)?(default\s+)?(async\s+)?function\s+([a-zA-Z0-9_]+)\s*\(.*'), + + # Arrow Function / Variable Assignments (es. const MyComponent = (props) => {) + # Cattura costanti che sembrano funzioni o componenti React + re.compile(r'^\s*(export\s+)?(const|let|var)\s+([a-zA-Z0-9_]+)\s*=\s*(async\s*)?(\(.*\)|[^=]+)\s*=>.*'), + + # TypeScript Interfaces & Types + re.compile(r'^\s*(export\s+)?(interface|type)\s+([a-zA-Z0-9_]+).*'), + + # React Hooks (opzionale: spesso sono implementation details, + # ma custom hooks 'useSomething' top-level potrebbero essere rilevanti. + # Per ora li ignoriamo per risparmiare token, tenendo solo le definizioni) + ] + + # JSDoc pattern (multiline) + in_comment = False + + source_lines = content.splitlines() + + for i, line in enumerate(source_lines): + stripped = line.strip() + + # Gestione commenti JSDoc /** ... */ + if stripped.startswith("/**"): + in_comment = True + lines.append(stripped) + if stripped.endswith("*/"): + in_comment = False + continue + + if in_comment: + lines.append(stripped) + if stripped.endswith("*/"): + in_comment = False + continue + + # Ignora commenti single line o righe vuote + if not stripped or stripped.startswith("//"): + continue + + # Verifica se la riga matcha una definizione importante + is_match = False + for pattern in patterns: + # Usiamo match sulla riga pulita o search per flessibilità + if pattern.match(stripped): + # Pulizia fine riga: se finisce con '{', lo sostituiamo con '...' + clean_line = stripped + if clean_line.endswith("{"): + clean_line = clean_line[:-1].strip() + + # Aggiunge firma + ... + lines.append(f"{clean_line} {{ ... }}") + is_match = True + break + + # Fallback per decoratori (es. @Component in Angular o NestJS, usati anche in RN con mobx) + if not is_match and stripped.startswith("@"): + # Mantiene il decoratore se è seguito da una classe nella riga successiva (euristica semplice) + if i + 1 < len(source_lines) and "class " in source_lines[i+1]: + lines.append(stripped) + + if not lines: + return f"(No exported functions, classes or components found in {file_path})" + + return "\n".join(lines) \ No newline at end of file diff --git a/src/deepbase/parsers/registry.py b/src/deepbase/parsers/registry.py index 750a184..123136b 100644 --- a/src/deepbase/parsers/registry.py +++ b/src/deepbase/parsers/registry.py @@ -3,7 +3,8 @@ from typing import Dict, Set from .interface import LanguageParser from .python import PythonParser -from .document import MarkdownParser, LatexParser # <--- Importa i nuovi parser +from .javascript import JavaScriptParser # <--- NUOVO IMPORT +from .document import MarkdownParser, LatexParser from .fallback import FallbackParser class ParserRegistry: @@ -12,10 +13,16 @@ def __init__(self): self._fallback = FallbackParser() self._unsupported_extensions_encountered: Set[str] = set() - # Registrazione parser + # --- Python --- self.register_parser('.py', PythonParser()) - # Registrazione Documenti + # --- JavaScript / TypeScript / React Native --- + js_parser = JavaScriptParser() + # Copre tutto l'ecosistema React/Node/TS + for ext in ['.js', '.jsx', '.ts', '.tsx', '.mjs', '.cjs']: + self.register_parser(ext, js_parser) + + # --- Documentazione --- md_parser = MarkdownParser() self.register_parser('.md', md_parser) self.register_parser('.markdown', md_parser) diff --git a/tests/test_suite_python.py b/tests/test_suite_python.py index ff864ff..8fef89f 100644 --- a/tests/test_suite_python.py +++ b/tests/test_suite_python.py @@ -209,4 +209,57 @@ def test_ignore_files(self, tmp_path): result = runner.invoke(test_app, [str(tmp_path), "--light", "-o", str(output_file)]) content = output_file.read_text(encoding="utf-8") - assert "script.js" not in content \ No newline at end of file + assert "script.js" not in content + + + def test_javascript_react_parsing(self, tmp_path): + """Testa il parsing di file JS, TS e React (JSX/TSX).""" + self.create_dummy_project(tmp_path) + + # Crea un componente React Native finto + rn_file = tmp_path / "App.tsx" + rn_file.write_text(""" +import React, { useEffect } from 'react'; +import { View, Text } from 'react-native'; + +/** + * Componente principale + */ +export const App = (props: Props) => { + useEffect(() => { + console.log("Effect"); + }, []); + + const helper = () => true; + + return ( + + Hello + + ); +}; + +export default class ErrorBoundary extends React.Component { + render() { + return null; + } +} +""", encoding="utf-8") + + output_file = tmp_path / "context.md" + result = runner.invoke(test_app, [str(tmp_path), "--light", "-o", str(output_file)]) + + content = output_file.read_text(encoding="utf-8") + + # Verifica Componente Funzionale + assert "export const App = (props: Props) => { ... }" in content + + # Verifica Commento JSDoc + assert "Componente principale" in content + + # Verifica Classe + assert "export default class ErrorBoundary extends React.Component { ... }" in content + + # Verifica che il corpo (useEffect, JSX) sia nascosto + assert "console.log" not in content + assert "" not in content \ No newline at end of file From c91ab0d832c859cb627dd341ec82decc45d42c74 Mon Sep 17 00:00:00 2001 From: Giuliano Ranauro Date: Fri, 13 Feb 2026 21:55:02 +0100 Subject: [PATCH 8/9] v1.2 algoritmo js MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit ✅ Fix riuscito: export default ora funziona | Metrica | V1 | V1.2 | Delta | | ---------- | -------- | -------- | -------- | | **Totale** | ~3,285 | ~2,991 | **-9%** | | Raw size | 12.83 KB | 11.68 KB | -1.15 KB | | Aspetto | V1 | V1.2 | | ------------------- | ------- | ----------------------- | | Pulizia logica | ✅ | ✅ | | Completezza export | ⚠️ 60% | ✅ **~75%** | | Validità sintattica | ✅ | ✅ | | Efficienza token | ✅ 3,285 | ✅ **2,991** (migliore!) | --- llm_context.md | 3121 ---------------------------- src/deepbase/parsers/javascript.py | 39 +- 2 files changed, 21 insertions(+), 3139 deletions(-) delete mode 100644 llm_context.md diff --git a/llm_context.md b/llm_context.md deleted file mode 100644 index 00dde88..0000000 --- a/llm_context.md +++ /dev/null @@ -1,3121 +0,0 @@ -# Project Context: DeepBase - -================================================================================ -### PROJECT STRUCTURE ### -================================================================================ - -> Total Size: 93.03 KB | Est. Tokens: ~23,817 -📁 DeepBase/ -├── 📄 .gitignore (3.4% | ~805t) -├── 📄 CHANGELOG.md (1.1% | ~255t) -├── 📄 README.md (3.8% | ~908t) -├── 📁 docs/ (2.1% | ~500t) -│ ├── 📄 index.md (2.0% | ~487t) -│ └── 📄 reference.md (0.1% | ~13t) -├── 📁 examples/ (26.2% | ~6.2k t) -│ └── 📄 deepbase_context.md (26.2% | ~6.2k t) -├── 📄 mkdocs.yml (1.0% | ~227t) -├── 📄 pyproject.toml (1.5% | ~363t) -├── 📁 src/ (52.0% | ~12.4k t) -│ ├── 📁 deepbase/ (52.0% | ~12.4k t) -│ │ ├── 📄 __init__.py -│ │ ├── 📄 database.py (9.1% | ~2.2k t) -│ │ ├── 📄 main.py (24.4% | ~5.8k t) -│ │ ├── 📁 parsers/ (12.5% | ~3.0k t) -│ │ │ ├── 📄 __init__.py (0.2% | ~53t) -│ │ │ ├── 📄 document.py (2.0% | ~485t) -│ │ │ ├── 📄 fallback.py (0.9% | ~226t) -│ │ │ ├── 📄 interface.py (0.4% | ~96t) -│ │ │ ├── 📄 python.py (6.9% | ~1.6k t) -│ │ │ └── 📄 registry.py (2.0% | ~477t) -│ │ └── 📄 toon.py (6.1% | ~1.4k t) -│ └── 📁 deepbase.egg-info/ -└── 📁 tests/ (9.0% | ~2.1k t) - ├── 📁 database/ - └── 📄 test_suite_python.py (9.0% | ~2.1k t) - - -================================================================================ -### FILE CONTENTS ### -================================================================================ - ---- START OF FILE: .gitignore --- - -# Byte-compiled / optimized / DLL files -__pycache__/ -*.py[cod] -*$py.class - -# C extensions -*.so - -# Distribution / packaging -.Python -env/ -build/ -develop-eggs/ -dist/ -downloads/ -eggs/ -.eggs/ -lib/ -lib64/ -parts/ -sdist/ -var/ -wheels/ -share/python-wheels/ -*.egg-info/ -.installed.cfg -*.egg -MANIFEST - -# PyInstaller -# Usually these files are written by a python script from a template -# before PyInstaller builds the exe, so as to inject date/other infos into it. -*.manifest -*.spec - -# Installer logs -debug.log -pip-log.txt -pip-delete-this-directory.txt - -# Unit test / coverage reports -htmlcov/ -.tox/ -.nox/ -.coverage -.coverage.* -.cache -nosetests.xml -coverage.xml -*.cover -*.py,cover -.hypothesis/ -.pytest_cache/ - -# Translations -*.mo -*.pot - -# Django stuff: -*.log -local_settings.py -db.sqlite3 - -# Flask stuff: -instance/ -.webassets-cache - -# Scrapy stuff: -.scrapy - -# Sphinx documentation -docs/_build/ - -# PyBuilder -.target/ - -# Jupyter Notebook -.ipynb_checkpoints - -# IPython -profile_default/ -ipython_config.py - -# pyenv -.python-version - -# pipenv -# According to pypa/pipenv#598, it is recommended to include Pipfile.lock in version control. -# However, in case you generate it automatically, you may want to ignore it. -# Pipfile.lock - -# poetry -# According to python-poetry/poetry#519, it is recommended to include poetry.lock in version control. -# This is especially if you are building a library. -# poetry.lock - -# PEP 582; used by e.g. github.com/David-OConnor/pyflow -__pypackages__/ - -# Celery stuff -celerybeat-schedule - -# SageMath parsed files -*.sage.py - -# Environments -.env -.venv -env.bak -venv.bak - -# Spyder project settings -.spyderproject -.spyproject - -# Rope project settings -.ropeproject - -# mkdocs documentation -/site - -# mypy -.mypy_cache/ -.dmypy.json -dmypy.json - -# Pyre type checker -.pyre/ - -# pytype static type analyzer -.pytype/ - -# Cython debug symbols -cython_debug/ - -# PyCharm -# JetBrains specific template is maintained in a separate JetBrains.gitignore that can -# be found at https://github.com/github/gitignore/blob/main/Global/JetBrains.gitignore -# and can be added to the global gitignore or merged into this file. For a more nuclear -# option (not recommended) you can uncomment the following to ignore the entire idea folder. -#.idea/ - -# Abstra -# Abstra is an AI-powered process automation framework. -# Ignore directories containing user credentials, local state, and settings. -# Learn more at https://abstra.io/docs -.abstra/ - -# Visual Studio Code -# Visual Studio Code specific template is maintained in a separate VisualStudioCode.gitignore -# that can be found at https://github.com/github/gitignore/blob/main/Global/VisualStudioCode.gitignore -# and can be added to the global gitignore or merged into this file. However, if you prefer, -# you could uncomment the following to ignore the entire vscode folder -# .vscode/ - -# Ruff stuff: -.ruff_cache/ - -# PyPI configuration file -.pypirc - -# Cursor -# Cursor is an AI-powered code editor. `.cursorignore` specifies files/directories to -# exclude from AI features like autocomplete and code analysis. Recommended for sensitive data -# refer to https://docs.cursor.com/context/ignore-files -.cursorignore -.cursorindexingignore - -# Marimo -marimo/_static/ -marimo/_lsp/ -__marimo__/ - - ---- END OF FILE: .gitignore --- ----------------------------------------- - ---- START OF FILE: CHANGELOG.md --- - -## [1.7.0] - 2024-02-12 - -### Added -- **Smart Token Estimation**: Added approximate token count (~4 chars/token) and file size percentage next to every file and folder in the tree view. -- **Recursive Directory Stats**: Parent folders now show the cumulative size and token count of their contents. -- **Enhanced Tree Visualization**: Replaced simple indentation with proper ASCII tree branches (`├──`, `└──`, `│`) for better readability. -- **CLI Links**: Added links to Documentation, Repository, and Issues in the `--help` output. - -### Changed -- **React/JS Optimization**: Automatically ignores `package-lock.json`, `yarn.lock`, `pnpm-lock.yaml`, and mobile build folders (`ios/`, `android/`) to save tokens. -- **Self-Exclusion Logic**: DeepBase now strictly ignores any existing file named `llm_context.md` (or the specified output name) in the target directory to prevent data duplication. - -### Fixed -- Fixed an issue where previous context files were included in the analysis, doubling the token count. - ---- END OF FILE: CHANGELOG.md --- ----------------------------------------- - ---- START OF FILE: README.md --- - -# DeepBase - -**DeepBase** is a command-line tool that analyzes a project directory, extracts the folder structure and the content of all significant code files, and consolidates them into a single text/markdown file. - -This unified "context" is perfect for providing to a Large Language Model (LLM) to enable it to deeply understand the entire codebase. - -## Features - -- **Project Structure**: Generates a tree view of the folder and file structure. -- **Smart Filtering**: Automatically ignores common unnecessary directories (e.g., `.git`, `venv`, `node_modules`). -- **Token Optimization (TOON)**: Capable of generating "Semantic Skeletons" (class definitions, function signatures, docstrings) instead of full code to save up to 90% of tokens. -- **Hybrid Focus Mode**: Combine lightweight context for the whole project with full content only for specific files or folders. -- **Configurable**: Customize ignored directories and included extensions via a `.deepbase.toml` file. -- **Unified Output**: Combines everything into a single file, easy to copy and paste. -- **PyPI Ready**: Easy to install via `pip`. - -## Installation - -You can install DeepBase directly from PyPI: - -```sh -pip install deepbase -``` - -## How to Use - -Once installed, use the `deepbase` command followed by the target (directory or file). - -### 1. Basic Project Analysis - -**Structure Only (Default)** -Quickly generate a tree view of your project folders and files. No code content is included. - -```sh -deepbase . -``` - -**Include All Content** -To generate the full context including the code of all significant files, use the `--all` (or `-a`) flag. -*Warning: use this only for small projects.* - -```sh -deepbase . --all -``` - -### 2. Smart Token Optimization (TOON) - -For large projects, sending all code to an LLM is expensive and inefficient. **TOON (Token Oriented Object Notation)** extracts only the semantic "skeleton" of your code (classes, signatures, docstrings), ignoring implementations. - -```sh -deepbase . --toon -# or -deepbase . -t -``` -*Result: LLMs understand your architecture using minimal tokens.* - -### 3. Hybrid Mode (Focus) - -This is the power user feature. You can provide the TOON skeleton for the entire project (background context) while focusing on specific files (full content). - -**Focus via CLI:** -Use `-f` or `--focus` with glob patterns (e.g., `*auth*`, `src/utils/*`). - -```sh -deepbase . --toon --focus "server/controllers/*" --focus "client/src/login.js" -``` - -**Focus via File:** -Instead of typing patterns every time, create a text file (e.g., `context_task.txt`) with the list of files/folders you are working on. - -*content of `context_task.txt`:* -```text -server/routes/auth.js -server/models/User.js -client/src/components/LoginForm.jsx -``` - -Run deepbase loading the file: -```sh -deepbase . --toon --focus-file context_task.txt -``` - -### 4. Single File Analysis - -DeepBase supports analyzing a single specific file. - -**Structure Only (Default)** -Extracts only the outline/headers. Useful for large documentation files. - -```sh -deepbase README.md -``` - -**Structure + Content** -Appends the full content after the structure. - -```sh -deepbase README.md --all -``` - -### Configuration (.deepbase.toml) - -You can customize behavior by creating a `.deepbase.toml` file in your project root: - -```toml -ignore_dirs = ["my_assets", "experimental"] -significant_extensions = [".cfg", "Makefile", ".tsx"] -``` - -## Development Workflow - -If you want to contribute or test the tool locally: - -```sh -# Install in editable mode -pip install -e ".[dev]" - -# Run tests -pytest -``` - -## License - -This project is released under the GPL 3 license. See the `LICENSE` file for details. -``` - ---- END OF FILE: README.md --- ----------------------------------------- - ---- START OF FILE: mkdocs.yml --- - -site_name: DeepBase -site_description: A CLI tool to consolidate project context for LLMs. -site_url: https://follen99.github.io/deepbase/ # Aggiorna con il tuo username -repo_url: https://github.com/follen99/deepbase -repo_name: follen99/deepbase - -theme: - name: material - palette: - # Light mode - - scheme: default - primary: teal - accent: purple - toggle: - icon: material/brightness-7 - name: Switch to dark mode - # Dark mode - - scheme: slate - primary: teal - accent: lime - toggle: - icon: material/brightness-4 - name: Switch to light mode - features: - - content.code.copy - - navigation.expand - - navigation.top - - search.suggest - -plugins: - - search - - mkdocstrings: - handlers: - python: - paths: [src] # Dice al plugin dove trovare il codice sorgente - -nav: - - Home: index.md - - API Reference: reference.md - ---- END OF FILE: mkdocs.yml --- ----------------------------------------- - ---- START OF FILE: pyproject.toml --- - -# pyproject.toml - -[build-system] -requires = ["setuptools>=61.0"] -build-backend = "setuptools.build_meta" - -[project] -name = "deepbase" -# Increment the version to reflect changes -version = "1.8.0" -authors = [ - { name="Giuliano Ranauro", email="ranaurogln@email.com" }, -] -description = "A CLI utility to consolidate project context for LLMs." -readme = "README.md" -license = { file="LICENSE" } -requires-python = ">=3.8" -classifiers = [ - "Programming Language :: Python :: 3", - "License :: OSI Approved :: GNU General Public License v3 (GPLv3)", - "Operating System :: OS Independent", - "Topic :: Software Development", - "Topic :: Utilities", -] -keywords = ["llm", "context", "developer-tool", "ai", "code-analysis", "deepbase"] - -# Main project dependencies -dependencies = [ - "typer[all]", # For a modern and robust CLI - "rich", # For colored output and progress bars - "tomli", # To read .toml configuration files - "chardet" # To reliably detect file encoding -] - -[project.urls] -"Homepage" = "https://github.com/follen99/deepbase" -"Bug Tracker" = "https://github.com/follen99/deepbase/issues" - -# Update the script to point to the Typer app object -[project.scripts] -deepbase = "deepbase.main:app" - -# Optional dependencies for development (e.g., testing) -[project.optional-dependencies] -dev = [ - "pytest", -] - -docs = [ - "mkdocs>=1.5.0", - "mkdocs-material>=9.5.0", - "mkdocstrings[python]>=0.24.0", -] - ---- END OF FILE: pyproject.toml --- ----------------------------------------- - ---- START OF FILE: examples/deepbase_context.md --- - -# Project Context: DeepBase - -================================================================================ -### PROJECT STRUCTURE ### -================================================================================ - -Project Structure in: /home/follen/Documents/uni-git/DeepBase - -📂 ./ - 📄 .gitignore - 📄 README.md - 📄 pyproject.toml - 📂 src/ - 📂 deepbase/ - 📄 __init__.py - 📄 main.py - 📂 deepbase.egg-info/ - 📂 examples/ - 📄 deepbase_context.md - 📂 tests/ - 📄 test_main.py - - -================================================================================ -### FILE CONTENTS ### -================================================================================ - ---- START OF FILE: .gitignore --- - -# Byte-compiled / optimized / DLL files -__pycache__/ -*.py[cod] -*$py.class - -# C extensions -*.so - -# Distribution / packaging -.Python -env/ -build/ -develop-eggs/ -dist/ -downloads/ -eggs/ -.eggs/ -lib/ -lib64/ -parts/ -sdist/ -var/ -wheels/ -share/python-wheels/ -*.egg-info/ -.installed.cfg -*.egg -MANIFEST - -# PyInstaller -# Usually these files are written by a python script from a template -# before PyInstaller builds the exe, so as to inject date/other infos into it. -*.manifest -*.spec - -# Installer logs -debug.log -pip-log.txt -pip-delete-this-directory.txt - -# Unit test / coverage reports -htmlcov/ -.tox/ -.nox/ -.coverage -.coverage.* -.cache -nosetests.xml -coverage.xml -*.cover -*.py,cover -.hypothesis/ -.pytest_cache/ - -# Translations -*.mo -*.pot - -# Django stuff: -*.log -local_settings.py -db.sqlite3 - -# Flask stuff: -instance/ -.webassets-cache - -# Scrapy stuff: -.scrapy - -# Sphinx documentation -docs/_build/ - -# PyBuilder -.target/ - -# Jupyter Notebook -.ipynb_checkpoints - -# IPython -profile_default/ -ipython_config.py - -# pyenv -.python-version - -# pipenv -# According to pypa/pipenv#598, it is recommended to include Pipfile.lock in version control. -# However, in case you generate it automatically, you may want to ignore it. -# Pipfile.lock - -# poetry -# According to python-poetry/poetry#519, it is recommended to include poetry.lock in version control. -# This is especially if you are building a library. -# poetry.lock - -# PEP 582; used by e.g. github.com/David-OConnor/pyflow -__pypackages__/ - -# Celery stuff -celerybeat-schedule - -# SageMath parsed files -*.sage.py - -# Environments -.env -.venv -env.bak -venv.bak - -# Spyder project settings -.spyderproject -.spyproject - -# Rope project settings -.ropeproject - -# mkdocs documentation -/site - -# mypy -.mypy_cache/ -.dmypy.json -dmypy.json - -# Pyre type checker -.pyre/ - -# pytype static type analyzer -.pytype/ - -# Cython debug symbols -cython_debug/ - -# PyCharm -# JetBrains specific template is maintained in a separate JetBrains.gitignore that can -# be found at https://github.com/github/gitignore/blob/main/Global/JetBrains.gitignore -# and can be added to the global gitignore or merged into this file. For a more nuclear -# option (not recommended) you can uncomment the following to ignore the entire idea folder. -#.idea/ - -# Abstra -# Abstra is an AI-powered process automation framework. -# Ignore directories containing user credentials, local state, and settings. -# Learn more at https://abstra.io/docs -.abstra/ - -# Visual Studio Code -# Visual Studio Code specific template is maintained in a separate VisualStudioCode.gitignore -# that can be found at https://github.com/github/gitignore/blob/main/Global/VisualStudioCode.gitignore -# and can be added to the global gitignore or merged into this file. However, if you prefer, -# you could uncomment the following to ignore the entire vscode folder -# .vscode/ - -# Ruff stuff: -.ruff_cache/ - -# PyPI configuration file -.pypirc - -# Cursor -# Cursor is an AI-powered code editor. `.cursorignore` specifies files/directories to -# exclude from AI features like autocomplete and code analysis. Recommended for sensitive data -# refer to https://docs.cursor.com/context/ignore-files -.cursorignore -.cursorindexingignore - -# Marimo -marimo/_static/ -marimo/_lsp/ -__marimo__/ - - ---- END OF FILE: .gitignore --- - ----------------------------------------- - ---- START OF FILE: README.md --- - -# DeepBase - -**DeepBase** is a command-line tool that analyzes a project directory, extracts the folder structure and the content of all significant code files, and consolidates them into a single text/markdown file. - -This unified "context" is perfect for providing to a Large Language Model (LLM) to enable it to deeply understand the entire codebase. - -## Features - -- **Project Structure**: Generates a tree view of the folder and file structure. -- **Smart Filtering**: Automatically ignores common unnecessary directories (e.g., `.git`, `venv`, `node_modules`). -- **Configurable**: Customize ignored directories and included extensions via a `.deepbase.toml` file. -- **Extension Selection**: Includes only files with relevant code or configuration extensions. -- **Unified Output**: Combines everything into a single file, easy to copy and paste. -- **PyPI Ready**: Easy to install via `pip`. - -## Installation - -You can install DeepBase directly from PyPI: - -```sh -pip install deepbase - -``` - -## How to Use - -Once installed, you will have the `deepbase` command available in your terminal. - -**Basic Usage:** - -Navigate to your project folder (or a parent folder) and run: - -```sh -deepbase . -``` -*The dot `.` indicates the current directory.* - -This command will create a file called `llm_context.md` in the current directory. - -**Specify Directory and Output File:** - -```sh -deepbase /path/to/your/project -o project_context.txt -``` - -### Advanced Configuration - -You can customize DeepBase's behavior by creating a `.deepbase.toml` file in the root of the project you are analyzing. - -**Example `.deepbase.toml`:** -```toml -# Add more directories to ignore. -# These will be added to the default ones. -ignore_dirs = [ - "my_assets_folder", - "experimental" -] - -# Add more extensions or filenames to include. -significant_extensions = [ - ".cfg", - "Makefile" -] -``` - -## License - -This project is released under the GPL 3 license. See the `LICENSE` file for details. - ---- END OF FILE: README.md --- - ----------------------------------------- - ---- START OF FILE: pyproject.toml --- - -# pyproject.toml - -[build-system] -requires = ["setuptools>=61.0"] -build-backend = "setuptools.build_meta" - -[project] -name = "deepbase" -# Increment the version to reflect changes -version = "1.2.0" -authors = [ - { name="Your Name", email="your@email.com" }, -] -description = "A CLI utility to consolidate project context for LLMs." -readme = "README.md" -license = { file="LICENSE" } -requires-python = ">=3.8" -classifiers = [ - "Programming Language :: Python :: 3", - "License :: OSI Approved :: MIT License", - "Operating System :: OS Independent", - "Topic :: Software Development", - "Topic :: Utilities", -] -keywords = ["llm", "context", "developer-tool", "ai", "code-analysis", "deepbase"] - -# Main project dependencies -dependencies = [ - "typer[all]", # For a modern and robust CLI - "rich", # For colored output and progress bars - "tomli", # To read .toml configuration files - "chardet" # To reliably detect file encoding -] - -[project.urls] -"Homepage" = "https://github.com/follen99/deepbase" -"Bug Tracker" = "https://github.com/follen99/deepbase/issues" - -# Update the script to point to the Typer app object -[project.scripts] -deepbase = "deepbase.main:app" - -# Optional dependencies for development (e.g., testing) -[project.optional-dependencies] -dev = [ - "pytest", -] - ---- END OF FILE: pyproject.toml --- - ----------------------------------------- - ---- START OF FILE: src/deepbase/__init__.py --- - - - ---- END OF FILE: src/deepbase/__init__.py --- - ----------------------------------------- - ---- START OF FILE: src/deepbase/main.py --- - -# src/deepbase/main.py - -import os -import typer -from rich.console import Console -from rich.progress import Progress -import tomli -import chardet -from typing import List, Dict, Any, Set - -# --- DEFAULT CONFIGURATION --- - -DEFAULT_CONFIG = { - "ignore_dirs": { - "__pycache__", ".git", ".idea", ".vscode", "venv", ".venv", "env", - ".env", "node_modules", "build", "dist", "target", "out", "bin", - "obj", "logs", "tmp", "eggs", ".eggs", ".pytest_cache", ".tox", - "site", - }, - "significant_extensions": { - ".py", ".java", ".js", ".ts", ".html", ".css", ".scss", ".sql", - ".md", ".json", ".xml", ".yml", ".yaml", ".sh", ".bat", "Dockerfile", - ".dockerignore", ".gitignore", "requirements.txt", "pom.xml", "gradlew", - "pyproject.toml", "setup.py", - } -} - -# --- TOOL INITIALIZATION --- - -app = typer.Typer( - name="deepbase", - help="Analyzes a project directory and creates a unified context document for an LLM.", - add_completion=False -) -console = Console() - - -def load_config(root_dir: str) -> Dict[str, Any]: - """Loads configuration from .deepbase.toml or uses the default.""" - config_path = os.path.join(root_dir, ".deepbase.toml") - config = DEFAULT_CONFIG.copy() - - if os.path.exists(config_path): - console.print(f"[bold cyan]Found configuration file: '.deepbase.toml'[/bold cyan]") - try: - with open(config_path, "rb") as f: - user_config = tomli.load(f) - - # Merge user config with defaults - config["ignore_dirs"].update(user_config.get("ignore_dirs", [])) - config["significant_extensions"].update(user_config.get("significant_extensions", [])) - console.print("[green]Custom configuration loaded successfully.[/green]") - - except tomli.TOMLDecodeError as e: - console.print(f"[bold red]Error parsing .deepbase.toml:[/bold red] {e}") - console.print("[yellow]Using default configuration.[/yellow]") - - return config - - -def is_significant_file(file_path: str, significant_extensions: Set[str]) -> bool: - """Checks if a file is significant based on the provided extensions.""" - file_name = os.path.basename(file_path) - if file_name in significant_extensions: - return True - _, ext = os.path.splitext(file_name) - return ext in significant_extensions - - -def generate_directory_tree(root_dir: str, config: Dict[str, Any]) -> str: - """Generates a text representation of the folder structure.""" - tree_str = f"Project Structure in: {os.path.abspath(root_dir)}\n\n" - ignore_dirs = config["ignore_dirs"] - significant_exts = config["significant_extensions"] - - for dirpath, dirnames, filenames in os.walk(root_dir, topdown=True): - dirnames[:] = [d for d in dirnames if d not in ignore_dirs and not d.startswith('.')] - - level = dirpath.replace(root_dir, '').count(os.sep) - indent = ' ' * 4 * level - - tree_str += f"{indent}📂 {os.path.basename(dirpath) or os.path.basename(os.path.abspath(root_dir))}/\n" - - sub_indent = ' ' * 4 * (level + 1) - - for f in sorted(filenames): - if is_significant_file(os.path.join(dirpath, f), significant_exts): - tree_str += f"{sub_indent}📄 {f}\n" - - return tree_str - - -def get_all_significant_files(root_dir: str, config: Dict[str, Any]) -> List[str]: - """Gets a list of all significant files to be included.""" - significant_files = [] - ignore_dirs = config["ignore_dirs"] - significant_exts = config["significant_extensions"] - - for dirpath, dirnames, filenames in os.walk(root_dir, topdown=True): - dirnames[:] = [d for d in dirnames if d not in ignore_dirs and not d.startswith('.')] - - for filename in sorted(filenames): - file_path = os.path.join(dirpath, filename) - if is_significant_file(file_path, significant_exts): - significant_files.append(file_path) - - return significant_files - - -@app.command() -def create( - directory: str = typer.Argument(..., help="The root directory of the project to scan."), - output: str = typer.Option("llm_context.md", "--output", "-o", help="The output file that will contain the context."), - verbose: bool = typer.Option(False, "--verbose", "-v", help="Show detailed output, including ignored files.") -): - """ - Analyzes a project and creates a unified context file for an LLM. - """ - if not os.path.isdir(directory): - console.print(f"[bold red]Error:[/bold red] The specified directory does not exist: '{directory}'") - raise typer.Exit(code=1) - - console.print(f"[bold green]Starting scan of '{directory}'...[/bold green]") - - config = load_config(directory) - - try: - with open(output, "w", encoding="utf-8") as outfile: - # 1. Write the header - outfile.write(f"# Project Context: {os.path.basename(os.path.abspath(directory))}\n\n") - - # 2. Write the structure - outfile.write("="*80 + "\n### PROJECT STRUCTURE ###\n" + "="*80 + "\n\n") - directory_tree = generate_directory_tree(directory, config) - outfile.write(directory_tree) - outfile.write("\n\n") - - # 3. Write the file contents - outfile.write("="*80 + "\n### FILE CONTENTS ###\n" + "="*80 + "\n\n") - - significant_files = get_all_significant_files(directory, config) - - with Progress(console=console) as progress: - task = progress.add_task("[cyan]Analyzing files...", total=len(significant_files)) - - for file_path in significant_files: - relative_path = os.path.relpath(file_path, directory).replace('\\', '/') - progress.update(task, advance=1, description=f"[cyan]Analyzing: {relative_path}[/cyan]") - - outfile.write(f"--- START OF FILE: {relative_path} ---\n\n") - try: - with open(file_path, "rb") as fb: - raw_data = fb.read() - - # Detect encoding - detection = chardet.detect(raw_data) - encoding = detection['encoding'] if detection['encoding'] else 'utf-8' - - # Read and write content with robust error handling - content = raw_data.decode(encoding, errors="replace") - outfile.write(content) - - except Exception as e: - outfile.write(f"!!! Error while reading file: {e} !!!\n") - - outfile.write(f"\n\n--- END OF FILE: {relative_path} ---\n\n") - outfile.write("-" * 40 + "\n\n") - - console.print(f"\n[bold green]✓ SUCCESS[/bold green]: Context successfully created in file: [cyan]'{output}'[/cyan]") - - except IOError as e: - console.print(f"\n[bold red]Error writing to output file:[/bold red] {e}") - raise typer.Exit(code=1) - except Exception as e: - console.print(f"\n[bold red]An unexpected error occurred:[/bold red] {e}") - raise typer.Exit(code=1) - - -if __name__ == "__main__": - app() - ---- END OF FILE: src/deepbase/main.py --- - ----------------------------------------- - ---- START OF FILE: examples/deepbase_context.md --- - -# Project Context: DeepBase - -================================================================================ -### PROJECT STRUCTURE ### -================================================================================ - -Project Structure in: /home/follen/Documents/uni-git/DeepBase - -📂 ./ - 📄 .gitignore - 📄 README.md - 📄 pyproject.toml - 📂 src/ - 📂 deepbase/ - 📄 __init__.py - 📄 main.py - 📂 deepbase.egg-info/ - 📂 examples/ - 📄 deepbase_context.md - 📂 tests/ - 📄 test_main.py - - -================================================================================ -### FILE CONTENTS ### -================================================================================ - ---- START OF FILE: .gitignore --- - -# Byte-compiled / optimized / DLL files -__pycache__/ -*.py[cod] -*$py.class - -# C extensions -*.so - -# Distribution / packaging -.Python -env/ -build/ -develop-eggs/ -dist/ -downloads/ -eggs/ -.eggs/ -lib/ -lib64/ -parts/ -sdist/ -var/ -wheels/ -share/python-wheels/ -*.egg-info/ -.installed.cfg -*.egg -MANIFEST - -# PyInstaller -# Usually these files are written by a python script from a template -# before PyInstaller builds the exe, so as to inject date/other infos into it. -*.manifest -*.spec - -# Installer logs -debug.log -pip-log.txt -pip-delete-this-directory.txt - -# Unit test / coverage reports -htmlcov/ -.tox/ -.nox/ -.coverage -.coverage.* -.cache -nosetests.xml -coverage.xml -*.cover -*.py,cover -.hypothesis/ -.pytest_cache/ - -# Translations -*.mo -*.pot - -# Django stuff: -*.log -local_settings.py -db.sqlite3 - -# Flask stuff: -instance/ -.webassets-cache - -# Scrapy stuff: -.scrapy - -# Sphinx documentation -docs/_build/ - -# PyBuilder -.target/ - -# Jupyter Notebook -.ipynb_checkpoints - -# IPython -profile_default/ -ipython_config.py - -# pyenv -.python-version - -# pipenv -# According to pypa/pipenv#598, it is recommended to include Pipfile.lock in version control. -# However, in case you generate it automatically, you may want to ignore it. -# Pipfile.lock - -# poetry -# According to python-poetry/poetry#519, it is recommended to include poetry.lock in version control. -# This is especially if you are building a library. -# poetry.lock - -# PEP 582; used by e.g. github.com/David-OConnor/pyflow -__pypackages__/ - -# Celery stuff -celerybeat-schedule - -# SageMath parsed files -*.sage.py - -# Environments -.env -.venv -env.bak -venv.bak - -# Spyder project settings -.spyderproject -.spyproject - -# Rope project settings -.ropeproject - -# mkdocs documentation -/site - -# mypy -.mypy_cache/ -.dmypy.json -dmypy.json - -# Pyre type checker -.pyre/ - -# pytype static type analyzer -.pytype/ - -# Cython debug symbols -cython_debug/ - -# PyCharm -# JetBrains specific template is maintained in a separate JetBrains.gitignore that can -# be found at https://github.com/github/gitignore/blob/main/Global/JetBrains.gitignore -# and can be added to the global gitignore or merged into this file. For a more nuclear -# option (not recommended) you can uncomment the following to ignore the entire idea folder. -#.idea/ - -# Abstra -# Abstra is an AI-powered process automation framework. -# Ignore directories containing user credentials, local state, and settings. -# Learn more at https://abstra.io/docs -.abstra/ - -# Visual Studio Code -# Visual Studio Code specific template is maintained in a separate VisualStudioCode.gitignore -# that can be found at https://github.com/github/gitignore/blob/main/Global/VisualStudioCode.gitignore -# and can be added to the global gitignore or merged into this file. However, if you prefer, -# you could uncomment the following to ignore the entire vscode folder -# .vscode/ - -# Ruff stuff: -.ruff_cache/ - -# PyPI configuration file -.pypirc - -# Cursor -# Cursor is an AI-powered code editor. `.cursorignore` specifies files/directories to -# exclude from AI features like autocomplete and code analysis. Recommended for sensitive data -# refer to https://docs.cursor.com/context/ignore-files -.cursorignore -.cursorindexingignore - -# Marimo -marimo/_static/ -marimo/_lsp/ -__marimo__/ - - ---- END OF FILE: .gitignore --- - ----------------------------------------- - ---- START OF FILE: README.md --- - -# DeepBase - -**DeepBase** is a command-line tool that analyzes a project directory, extracts the folder structure and the content of all significant code files, and consolidates them into a single text/markdown file. - -This unified "context" is perfect for providing to a Large Language Model (LLM) to enable it to deeply understand the entire codebase. - -## Features - -- **Project Structure**: Generates a tree view of the folder and file structure. -- **Smart Filtering**: Automatically ignores common unnecessary directories (e.g., `.git`, `venv`, `node_modules`). -- **Configurable**: Customize ignored directories and included extensions via a `.deepbase.toml` file. -- **Extension Selection**: Includes only files with relevant code or configuration extensions. -- **Unified Output**: Combines everything into a single file, easy to copy and paste. -- **PyPI Ready**: Easy to install via `pip`. - -## Installation - -You can install DeepBase directly from PyPI: - -```sh -pip install deepbase - -``` - -## How to Use - -Once installed, you will have the `deepbase` command available in your terminal. - -**Basic Usage:** - -Navigate to your project folder (or a parent folder) and run: - -```sh -deepbase . -``` -*The dot `.` indicates the current directory.* - -This command will create a file called `llm_context.md` in the current directory. - -**Specify Directory and Output File:** - -```sh -deepbase /path/to/your/project -o project_context.txt -``` - -### Advanced Configuration - -You can customize DeepBase's behavior by creating a `.deepbase.toml` file in the root of the project you are analyzing. - -**Example `.deepbase.toml`:** -```toml -# Add more directories to ignore. -# These will be added to the default ones. -ignore_dirs = [ - "my_assets_folder", - "experimental" -] - -# Add more extensions or filenames to include. -significant_extensions = [ - ".cfg", - "Makefile" -] -``` - -## License - -This project is released under the GPL 3 license. See the `LICENSE` file for details. - ---- END OF FILE: README.md --- - ----------------------------------------- - ---- START OF FILE: pyproject.toml --- - -# pyproject.toml - -[build-system] -requires = ["setuptools>=61.0"] -build-backend = "setuptools.build_meta" - -[project] -name = "deepbase" -# Increment the version to reflect changes -version = "1.2.0" -authors = [ - { name="Your Name", email="your@email.com" }, -] -description = "A CLI utility to consolidate project context for LLMs." -readme = "README.md" -license = { file="LICENSE" } -requires-python = ">=3.8" -classifiers = [ - "Programming Language :: Python :: 3", - "License :: OSI Approved :: MIT License", - "Operating System :: OS Independent", - "Topic :: Software Development", - "Topic :: Utilities", -] -keywords = ["llm", "context", "developer-tool", "ai", "code-analysis", "deepbase"] - -# Main project dependencies -dependencies = [ - "typer[all]", # For a modern and robust CLI - "rich", # For colored output and progress bars - "tomli", # To read .toml configuration files - "chardet" # To reliably detect file encoding -] - -[project.urls] -"Homepage" = "https://github.com/follen99/deepbase" -"Bug Tracker" = "https://github.com/follen99/deepbase/issues" - -# Update the script to point to the Typer app object -[project.scripts] -deepbase = "deepbase.main:app" - -# Optional dependencies for development (e.g., testing) -[project.optional-dependencies] -dev = [ - "pytest", -] - ---- END OF FILE: pyproject.toml --- - ----------------------------------------- - ---- START OF FILE: src/deepbase/__init__.py --- - - - ---- END OF FILE: src/deepbase/__init__.py --- - ----------------------------------------- - ---- START OF FILE: src/deepbase/main.py --- - - - ---- END OF FILE: examples/deepbase_context.md --- - ----------------------------------------- - ---- START OF FILE: tests/test_main.py --- - -# tests/test_main.py - -import os -from typer.testing import CliRunner -from deepbase.main import app - -# Runner instance to execute Typer app commands -runner = CliRunner() - -def test_create_context_successfully(tmp_path): - """ - Tests the creation of a context file in a successful scenario. - """ - # 1. Create a mock project structure - project_dir = tmp_path / "my_test_project" - project_dir.mkdir() - (project_dir / "main.py").write_text("print('hello world')") - (project_dir / "README.md").write_text("# My Project") - - # Create a directory to ignore - ignored_dir = project_dir / "venv" - ignored_dir.mkdir() - (ignored_dir / "ignored_file.py").write_text("ignore me") - - output_file = tmp_path / "context.md" - - # 2. Execute the CLI command with arguments in the correct order - result = runner.invoke(app, [str(project_dir), "--output", str(output_file)]) - - # 3. Verify the results - assert result.exit_code == 0 - assert "SUCCESS" in result.stdout - assert output_file.exists() - - content = output_file.read_text() - - # Check that significant files are included - assert "--- START OF FILE: main.py ---" in content - assert "print('hello world')" in content - assert "--- START OF FILE: README.md ---" in content - - # Check that ignored directory and files are not present - assert "venv" not in content - assert "ignored_file.py" not in content - -def test_directory_not_found(): - """ - Tests the behavior when the input directory does not exist. - """ - result = runner.invoke(app, ["non_existent_dir"]) - assert result.exit_code == 1 - assert "directory does not exist" in result.stdout - ---- END OF FILE: tests/test_main.py --- - ----------------------------------------- - - - ---- END OF FILE: examples/deepbase_context.md --- ----------------------------------------- - ---- START OF FILE: tests/test_suite_python.py --- - -# tests/test_suite_python.py - -import os -import typer -from typer.testing import CliRunner -from deepbase.main import main -import sqlite3 - -# Creiamo un'app Typer temporanea per il testing -test_app = typer.Typer() -test_app.command()(main) - -runner = CliRunner() - -class TestPythonSuite: - """ - Test suite dedicata all'analisi di progetti Python con DeepBase. - FIX: Specifica sempre l'output path esplicito per evitare FileNotFoundError. - FIX: Controlla il contenuto del file generato, non lo stdout, per la struttura. - """ - - def create_dummy_python_project(self, root): - """Helper per popolare una directory con file Python finti.""" - # 1. File principale - main_py = root / "main.py" - main_py.write_text(""" -import os - -def hello_world(): - print("Hello content") - return True - -class MyClass: - def method_one(self): - # This is a comment inside - return 1 -""", encoding="utf-8") - - # 2. Modulo utils - utils_dir = root / "utils" - utils_dir.mkdir() - (utils_dir / "helper.py").write_text("def help_me():\n pass", encoding="utf-8") - - # 3. File da ignorare (segreto) - (root / "secrets.py").write_text("API_KEY = '123'", encoding="utf-8") - - # 4. Cartella da ignorare (es. cache) - cache_dir = root / ".mypy_cache" - cache_dir.mkdir() - (cache_dir / "data.json").write_text("{}", encoding="utf-8") - - def test_basic_structure(self, tmp_path): - """Testa che il comando base generi la struttura nel file.""" - self.create_dummy_python_project(tmp_path) - - output_file = tmp_path / "llm_context.md" - - # Passiamo esplicitamente l'output file nel tmp_path - result = runner.invoke(test_app, [str(tmp_path), "-o", str(output_file)]) - - assert result.exit_code == 0 - assert output_file.exists() - - content = output_file.read_text(encoding="utf-8") - - # Verifica presenza nell'albero (DENTRO IL FILE, non nello stdout) - assert "main.py" in content - assert "utils/" in content - - # Verifica che il CONTENUTO del codice NON ci sia - assert "def hello_world" not in content - assert "import os" not in content - - def test_flag_all_content(self, tmp_path): - """Testa --all: deve includere tutto il codice.""" - self.create_dummy_python_project(tmp_path) - - output_file = tmp_path / "llm_context.md" - result = runner.invoke(test_app, [str(tmp_path), "--all", "-o", str(output_file)]) - - assert result.exit_code == 0 - content = output_file.read_text(encoding="utf-8") - - # Deve contenere il corpo delle funzioni - assert "print(\"Hello content\")" in content - assert "class MyClass:" in content - - def test_flag_light_mode(self, tmp_path): - """Testa --light: deve includere firme ma NON il corpo.""" - self.create_dummy_python_project(tmp_path) - - output_file = tmp_path / "llm_context.md" - result = runner.invoke(test_app, [str(tmp_path), "--light", "-o", str(output_file)]) - - assert result.exit_code == 0 - content = output_file.read_text(encoding="utf-8") - - # Deve contenere la notice Light Mode - assert "[LIGHT MODE]" in content - - # Deve contenere le firme (via AST parsing) - # Nota: controlliamo stringhe parziali per evitare problemi di formattazione spazi - assert "def hello_world" in content - assert "class MyClass:" in content - - # NON deve contenere il corpo del codice - assert "print(\"Hello content\")" not in content - assert "return 1" not in content - - def test_focus_mode_hybrid(self, tmp_path): - """Testa --focus combined (ibrido).""" - self.create_dummy_python_project(tmp_path) - - output_file = tmp_path / "llm_context.md" - # Focus solo su main.py - result = runner.invoke(test_app, [str(tmp_path), "--focus", "main.py", "-o", str(output_file)]) - - assert result.exit_code == 0 - content = output_file.read_text(encoding="utf-8") - - # main.py deve essere FULL - assert "--- START OF FILE: main.py ---" in content - assert "print(\"Hello content\")" in content - - # utils/helper.py NON era in focus, quindi non dovrebbe esserci il contenuto - assert "--- START OF FILE: utils/helper.py ---" not in content - - def test_focus_with_light_background(self, tmp_path): - """Testa --light insieme a --focus.""" - self.create_dummy_python_project(tmp_path) - - output_file = tmp_path / "llm_context.md" - # Focus su main.py, ma background --light - result = runner.invoke(test_app, [str(tmp_path), "--light", "--focus", "main.py", "-o", str(output_file)]) - - content = output_file.read_text(encoding="utf-8") - - # main.py FULL - assert "print(\"Hello content\")" in content - - # utils/helper.py LIGHT (deve esserci la firma) - assert "def help_me" in content - - def test_toml_configuration(self, tmp_path): - """Testa che .deepbase.toml venga letto e rispettato.""" - self.create_dummy_python_project(tmp_path) - - # Crea configurazione per ignorare "secrets.py" - toml_file = tmp_path / ".deepbase.toml" - toml_file.write_text('ignore_files = ["secrets.py"]', encoding="utf-8") - - output_file = tmp_path / "llm_context.md" - result = runner.invoke(test_app, [str(tmp_path), "--all", "-o", str(output_file)]) - - assert result.exit_code == 0 - content = output_file.read_text(encoding="utf-8") - - # secrets.py NON deve apparire - assert "secrets.py" not in content - assert "API_KEY" not in content - - def test_custom_output_path(self, tmp_path): - """Testa l'opzione -o per il file di output.""" - self.create_dummy_python_project(tmp_path) - - custom_out = tmp_path / "custom_analysis.txt" - result = runner.invoke(test_app, [str(tmp_path), "-o", str(custom_out)]) - - assert result.exit_code == 0 - assert custom_out.exists() - - def test_error_handling_invalid_path(self): - """Testa che il programma gestisca percorsi inesistenti.""" - result = runner.invoke(test_app, ["/percorso/inesistente/assoluto"]) - assert result.exit_code == 1 - assert "Target not found" in result.stdout - - def test_database_handling(self, tmp_path): - """Testa il supporto per database SQLite (schema extraction e light mode).""" - import sqlite3 # Import necessario qui o in cima al file - - # Creiamo una cartella e un DB reale - project_dir = tmp_path / "db_project" - project_dir.mkdir() - db_path = project_dir / "test_db.sqlite" - - conn = sqlite3.connect(db_path) - cursor = conn.cursor() - cursor.execute("CREATE TABLE users (id INTEGER PRIMARY KEY, username TEXT NOT NULL)") - cursor.execute("CREATE TABLE posts (id INTEGER PRIMARY KEY, user_id INTEGER, content TEXT)") - conn.commit() - conn.close() - - output_file = project_dir / "context.md" - - # 1. Test Full Mode (--all) -> Deve mostrare schema dettagliato - result = runner.invoke(test_app, [str(project_dir), "--all", "-o", str(output_file)]) - assert result.exit_code == 0 - content = output_file.read_text(encoding="utf-8") - - # Verifica che il DB sia stato processato - assert "test_db.sqlite" in content - - # Verifica il contenuto generato da generate_database_context_full - # Nota: "DATABASE SCHEMA" appare solo in single-file mode, qui cerchiamo il contenuto reale - assert "Table: `users`" in content - # Verifica parziale di una colonna per assicurarsi che lo schema sia stato letto - assert "username" in content - assert "TEXT" in content - - # 2. Test Light Mode (--light) -> Deve mostrare schema compatto (TOON) - result = runner.invoke(test_app, [str(project_dir), "--light", "-o", str(output_file)]) - assert result.exit_code == 0 - content = output_file.read_text(encoding="utf-8") - - # Verifica firma compatta (TOON) - # Cerca la definizione della tabella users e la colonna id - assert "users" in content - # Verifica formato TOON: nome:tipo - assert "id:INTEGER" in content - ---- END OF FILE: tests/test_suite_python.py --- ----------------------------------------- - ---- START OF FILE: src/deepbase/__init__.py --- - - - ---- END OF FILE: src/deepbase/__init__.py --- ----------------------------------------- - ---- START OF FILE: src/deepbase/database.py --- - -# src/deepbase/database.py -""" -Database context extraction module for DeepBase. -Handles SQLite databases to provide structured context about schema and tables. -""" - -import sqlite3 -import os -from typing import Dict, List, Any, Optional -from dataclasses import dataclass - - -@dataclass -class ColumnInfo: - name: str - data_type: str - nullable: bool - default: Optional[str] - primary_key: bool - - -@dataclass -class TableInfo: - name: str - columns: List[ColumnInfo] - foreign_keys: List[Dict[str, str]] - indexes: List[Dict[str, Any]] - row_count: int - - -@dataclass -class DatabaseSchema: - tables: List[TableInfo] - total_size_bytes: int - total_tables: int - total_rows: int - - -def get_database_schema(db_path: str) -> DatabaseSchema: - """ - Extract complete schema information from SQLite database. - """ - conn = sqlite3.connect(db_path) - cursor = conn.cursor() - - # Get database size - total_size = os.path.getsize(db_path) - - # Get all tables (excluding sqlite internal tables) - cursor.execute("SELECT name FROM sqlite_master WHERE type='table' AND name NOT LIKE 'sqlite_%'") - table_names = [row[0] for row in cursor.fetchall()] - - tables = [] - total_rows = 0 - - for table_name in table_names: - # Get row count - cursor.execute(f"SELECT COUNT(*) FROM `{table_name}`") - row_count = cursor.fetchone()[0] - total_rows += row_count - - # Get column info using PRAGMA - cursor.execute(f"PRAGMA table_info(`{table_name}`)") - columns = [] - for row in cursor.fetchall(): - col = ColumnInfo( - name=row[1], - data_type=row[2], - nullable=not row[3], # notnull column: 0=true, 1=false - default=row[4], - primary_key=bool(row[5]) - ) - columns.append(col) - - # Get foreign keys - cursor.execute(f"PRAGMA foreign_key_list(`{table_name}`)") - foreign_keys = [] - for row in cursor.fetchall(): - fk = { - "id": row[0], - "seq": row[1], - "table": row[2], - "from": row[3], - "to": row[4], - "on_update": row[5], - "on_delete": row[6] - } - foreign_keys.append(fk) - - # Get indexes - cursor.execute(f"PRAGMA index_list(`{table_name}`)") - indexes = [] - for row in cursor.fetchall(): - index_name = row[1] - cursor.execute(f"PRAGMA index_info(`{index_name}`)") - index_columns = [r[2] for r in cursor.fetchall()] - indexes.append({ - "name": index_name, - "unique": row[2], - "columns": index_columns - }) - - table_info = TableInfo( - name=table_name, - columns=columns, - foreign_keys=foreign_keys, - indexes=indexes, - row_count=row_count - ) - tables.append(table_info) - - conn.close() - - return DatabaseSchema( - tables=tables, - total_size_bytes=total_size, - total_tables=len(tables), - total_rows=total_rows - ) - - -def generate_database_context_full(schema: DatabaseSchema, db_name: str) -> str: - """ - Generate full detailed context for --all mode. - Includes complete schema, relationships, and sample data hints. - """ - lines = [ - f"# Database: {db_name}", - f"## Overview", - f"- Total Tables: {schema.total_tables}", - f"- Total Rows: {schema.total_rows:,}", - f"- File Size: {schema.total_size_bytes:,} bytes ({schema.total_size_bytes / 1024:.2f} KB)", - "", - "## Schema Details", - "" - ] - - for table in schema.tables: - lines.extend([ - f"### Table: `{table.name}`", - f"- Rows: {table.row_count:,}", - "" - ]) - - # Columns - lines.append("#### Columns:") - lines.append("| Column | Type | Nullable | Default | PK |") - lines.append("|--------|------|----------|---------|-----|") - for col in table.columns: - pk_mark = "✓" if col.primary_key else "" - null_mark = "✓" if col.nullable else "NOT NULL" - default_val = col.default if col.default else "-" - lines.append(f"| `{col.name}` | {col.data_type} | {null_mark} | {default_val} | {pk_mark} |") - lines.append("") - - # Foreign Keys - if table.foreign_keys: - lines.append("#### Foreign Keys:") - for fk in table.foreign_keys: - lines.append(f"- `{fk['from']}` → `{fk['table']}`.`{fk['to']}` (ON UPDATE: {fk['on_update']}, ON DELETE: {fk['on_delete']})") - lines.append("") - - # Indexes - if table.indexes: - lines.append("#### Indexes:") - for idx in table.indexes: - unique_str = "UNIQUE " if idx['unique'] else "" - lines.append(f"- {unique_str}`{idx['name']}` on ({', '.join(f'`{c}`' for c in idx['columns'])})") - lines.append("") - - lines.append("---") - lines.append("") - - return "\n".join(lines) - - -def generate_database_context_toon(schema: DatabaseSchema, db_name: str) -> str: - """ - Generate minimal TOON-style context (skeleton only). - Best for large databases where token efficiency matters. - """ - lines = [ - f"DB: {db_name}", - f"Tables: {schema.total_tables} | Rows: {schema.total_rows:,}", - "" - ] - - for table in schema.tables: - # Compact representation: Table(columns) [FKs] - col_defs = [] - for col in table.columns: - flags = [] - if col.primary_key: - flags.append("PK") - if not col.nullable: - flags.append("NN") - flag_str = f"[{','.join(flags)}]" if flags else "" - col_defs.append(f"{col.name}:{col.data_type}{flag_str}") - - fk_refs = [] - for fk in table.foreign_keys: - fk_refs.append(f"{fk['from']}→{fk['table']}.{fk['to']}") - - fk_str = f" | FK: {', '.join(fk_refs)}" if fk_refs else "" - lines.append(f"T: {table.name}({', '.join(col_defs)}){fk_str}") - - return "\n".join(lines) - - -def generate_database_context_hybrid(schema: DatabaseSchema, db_name: str, focused_tables: List[str]) -> str: - """ - Generate hybrid context: TOON for all, full detail for focused tables. - """ - lines = [ - f"# Database: {db_name}", - f"## Overview", - f"- Total Tables: {schema.total_tables}", - f"- Total Rows: {schema.total_rows:,}", - "", - "## Schema (TOON + Focus)", - "" - ] - - for table in schema.tables: - is_focused = table.name in focused_tables or any(f in table.name for f in focused_tables) - - if is_focused: - # Full detail for focused tables - lines.extend([ - f"### [FOCUSED] Table: `{table.name}` ⭐", - f"- Rows: {table.row_count:,}", - "" - ]) - - lines.append("#### Columns:") - lines.append("| Column | Type | Nullable | Default | PK |") - lines.append("|--------|------|----------|---------|-----|") - for col in table.columns: - pk_mark = "✓" if col.primary_key else "" - null_mark = "✓" if col.nullable else "NOT NULL" - default_val = col.default if col.default else "-" - lines.append(f"| `{col.name}` | {col.data_type} | {null_mark} | {default_val} | {pk_mark} |") - lines.append("") - - if table.foreign_keys: - lines.append("#### Foreign Keys:") - for fk in table.foreign_keys: - lines.append(f"- `{fk['from']}` → `{fk['table']}`.`{fk['to']}`") - lines.append("") - else: - # TOON style for non-focused - col_names = [f"{col.name}:{col.data_type}" + ("(PK)" if col.primary_key else "") - for col in table.columns] - lines.append(f"- `{table.name}`: {', '.join(col_names)}") - - return "\n".join(lines) - - -def is_sqlite_database(file_path: str) -> bool: - """ - Check if file is a valid SQLite database by reading magic bytes. - """ - try: - with open(file_path, 'rb') as f: - header = f.read(16) - return header[:16] == b'SQLite format 3\x00' - except: - return False - ---- END OF FILE: src/deepbase/database.py --- ----------------------------------------- - ---- START OF FILE: src/deepbase/main.py --- - -# src/deepbase/main.py - -import os -import typer -import fnmatch -import math -from rich.console import Console -from rich.progress import Progress -import tomli -import chardet -from importlib.metadata import version as get_package_version, PackageNotFoundError -from typing import List, Dict, Any, Set, Optional, Tuple - -from deepbase.toon import generate_toon_representation, generate_light_representation, generate_database_focused -from deepbase.parsers import get_document_structure -from deepbase.database import is_sqlite_database, get_database_schema, generate_database_context_full - -from rich.table import Table -from rich.panel import Panel - -# --- CONFIGURAZIONI --- - -DEFAULT_CONFIG = { - "ignore_dirs": { - "__pycache__", ".git", ".idea", ".vscode", "venv", ".venv", "env", - ".env", "node_modules", "build", "dist", "target", "out", "bin", - "obj", "logs", "tmp", "eggs", ".eggs", ".pytest_cache", ".tox", - "site", "*.egg-info", "coverage", ".next", ".nuxt", ".output", - "ios", "android" - }, - "ignore_files": { - "package-lock.json", "yarn.lock", "pnpm-lock.yaml", "bun.lockb", - "poetry.lock", "Pipfile.lock", "composer.lock", ".DS_Store", "Thumbs.db" - }, - "significant_extensions": { - ".py", ".java", ".js", ".jsx", ".ts", ".tsx", ".html", ".css", ".scss", ".sql", - ".md", ".json", ".xml", ".yml", ".yaml", ".sh", ".bat", "Dockerfile", - ".dockerignore", ".gitignore", "requirements.txt", "pom.xml", "gradlew", - "pyproject.toml", "setup.py", "package.json", "tsconfig.json", - ".tex", ".bib", ".sty", ".cls", - ".db", ".sqlite", ".sqlite3", ".db3" - } -} - -LIGHT_MODE_NOTICE = """> **[LIGHT MODE]** Questo file √® stato generato in modalit√† risparmio token: vengono incluse solo le firme dei metodi/funzioni e i commenti iniziali dei file. Il corpo del codice √® omesso. Se hai bisogno di approfondire un file, una classe o un metodo specifico, chiedi all'utente di fornire la porzione di codice completa. -""" - -console = Console() - -# --- UTILS --- - -def load_config(root_dir: str) -> Dict[str, Any]: - config_path = os.path.join(root_dir, ".deepbase.toml") - config = DEFAULT_CONFIG.copy() - config["ignore_dirs"] = set(config["ignore_dirs"]) - config["ignore_files"] = set(config["ignore_files"]) - config["significant_extensions"] = set(config["significant_extensions"]) - - if os.path.exists(config_path): - try: - with open(config_path, "rb") as f: - user_config = tomli.load(f) - config["ignore_dirs"].update(user_config.get("ignore_dirs", [])) - config["ignore_files"].update(user_config.get("ignore_files", [])) - config["significant_extensions"].update(user_config.get("significant_extensions", [])) - except tomli.TOMLDecodeError: - pass - return config - - -def estimate_tokens(size_bytes: int) -> str: - if size_bytes == 0: return "0t" - tokens = math.ceil(size_bytes / 4) - if tokens < 1000: - return f"~{tokens}t" - elif tokens < 1000000: - return f"~{tokens/1000:.1f}k t" - else: - return f"~{tokens/1000000:.1f}M t" - - -def estimate_tokens_for_content(text: str) -> int: - return math.ceil(len(text.encode("utf-8")) / 4) - -def calculate_light_tokens(file_path: str, content: str) -> int: - from deepbase.toon import generate_light_representation - light_repr = generate_light_representation(file_path, content) - return estimate_tokens_for_content(light_repr) - -def is_significant_file(file_path: str, config: Dict[str, Any], output_file_abs: str = None) -> bool: - file_name = os.path.basename(file_path) - - if output_file_abs and os.path.abspath(file_path) == output_file_abs: - return False - - if output_file_abs and file_name == os.path.basename(output_file_abs): - return False - - if file_name in config["ignore_files"]: - return False - - significant_extensions = config["significant_extensions"] - - if file_name in significant_extensions: - return True - - _, ext = os.path.splitext(file_path) - if ext in significant_extensions: - return True - - if is_sqlite_database(file_path): - return True - - return False - - -def calculate_project_stats(root_dir: str, config: Dict[str, Any], output_file_abs: str, light_mode: bool = False) -> int: - total_size = 0 - ignore_dirs = config["ignore_dirs"] - for dirpath, dirnames, filenames in os.walk(root_dir, topdown=True): - dirnames[:] = [d for d in dirnames if d not in ignore_dirs and not d.startswith('.')] - for f in filenames: - fpath = os.path.join(dirpath, f) - if is_significant_file(fpath, config, output_file_abs): - try: - if light_mode and not is_sqlite_database(fpath): - content = read_file_content(fpath) - light_repr = generate_light_representation(fpath, content) - total_size += len(light_repr.encode("utf-8")) - else: - total_size += os.path.getsize(fpath) - except OSError: - pass - return total_size - - -# --- ALBERO DELLE DIRECTORY --- - -def _generate_tree_recursive( - current_path: str, - prefix: str, - config: Dict[str, Any], - total_project_size: int, - output_file_abs: str, - light_mode: bool = False -) -> Tuple[str, int]: - output_str = "" - subtree_size = 0 - - try: - items = sorted(os.listdir(current_path)) - except PermissionError: - return "", 0 - - filtered_items = [] - for item in items: - full_path = os.path.join(current_path, item) - is_dir = os.path.isdir(full_path) - - if is_dir: - if item not in config["ignore_dirs"] and not item.startswith('.'): - filtered_items.append((item, True)) - else: - if is_significant_file(full_path, config, output_file_abs): - filtered_items.append((item, False)) - - for i, (name, is_dir) in enumerate(filtered_items): - is_last = (i == len(filtered_items) - 1) - full_path = os.path.join(current_path, name) - connector = "‚îî‚îÄ‚îÄ " if is_last else "‚îú‚îÄ‚îÄ " - - if is_dir: - extension = " " if is_last else "‚îÇ " - sub_tree_str, sub_dir_size = _generate_tree_recursive( - full_path, - prefix + extension, - config, - total_project_size, - output_file_abs - ) - - subtree_size += sub_dir_size - - folder_stats = "" - if total_project_size > 0 and sub_dir_size > 0: - percent = (sub_dir_size / total_project_size) * 100 - token_est = estimate_tokens(sub_dir_size) - folder_stats = f" ({percent:.1f}% | {token_est})" - - output_str += f"{prefix}{connector}üìÅ {name}/{folder_stats}\n" - output_str += sub_tree_str - - else: - icon = "üóÑÔ∏è " if is_sqlite_database(full_path) else "üìÑ " - try: - raw_size = os.path.getsize(full_path) - if light_mode and not is_sqlite_database(full_path): - content = read_file_content(full_path) - light_repr = generate_light_representation(full_path, content) - size = len(light_repr.encode("utf-8")) - else: - size = raw_size - subtree_size += size - - # [FIX] Ripristinate le righe mancanti per stampare il file nell'albero! - file_stats = "" - if total_project_size > 0 and size > 0: - percent = (size / total_project_size) * 100 - token_est = estimate_tokens(size) - file_stats = f" ({percent:.1f}% | {token_est})" - - output_str += f"{prefix}{connector}{icon}{name}{file_stats}\n" - - except OSError: - pass - - return output_str, subtree_size - - -def generate_directory_tree(root_dir: str, config: Dict[str, Any], output_file_abs: str, light_mode: bool = False) -> Tuple[str, int, int]: - abs_root = os.path.abspath(root_dir) - total_size = calculate_project_stats(root_dir, config, output_file_abs, light_mode) - tree_body, _ = _generate_tree_recursive(root_dir, "", config, total_size, output_file_abs, light_mode) - header = f"üìÅ {os.path.basename(abs_root) or '.'}/\n" - total_tokens_est = math.ceil(total_size / 4) - return header + tree_body, total_size, total_tokens_est - - -# --- CORE --- - -def get_all_significant_files(root_dir: str, config: Dict[str, Any], output_file_abs: str) -> List[str]: - significant_files = [] - ignore_dirs = config["ignore_dirs"] - for dirpath, dirnames, filenames in os.walk(root_dir, topdown=True): - dirnames[:] = [d for d in dirnames if d not in ignore_dirs and not d.startswith('.')] - for filename in sorted(filenames): - file_path = os.path.join(dirpath, filename) - if is_significant_file(file_path, config, output_file_abs): - significant_files.append(file_path) - return significant_files - - -def read_file_content(file_path: str) -> str: - if is_sqlite_database(file_path): - try: - schema = get_database_schema(file_path) - return generate_database_context_full(schema, os.path.basename(file_path)) - except Exception as e: - return f"!!! Error reading database: {e} !!!" - try: - with open(file_path, "rb") as fb: - raw_data = fb.read() - detection = chardet.detect(raw_data) - encoding = detection['encoding'] if detection['encoding'] else 'utf-8' - return raw_data.decode(encoding, errors="replace") - except Exception as e: - return f"!!! Error reading file: {e} !!!" - - -def matches_focus(file_path: str, root_dir: str, focus_patterns: List[str]) -> bool: - if not focus_patterns: return False - rel_path = os.path.relpath(file_path, root_dir) - rel_path_fwd = rel_path.replace(os.sep, '/') - for pattern in focus_patterns: - clean_pattern = pattern.replace(os.sep, '/') - if fnmatch.fnmatch(rel_path_fwd, clean_pattern): return True - if clean_pattern in rel_path_fwd: return True - return False - - -def extract_focused_tables(file_path: str, focus_patterns: List[str]) -> List[str]: - if not is_sqlite_database(file_path): return [] - db_name = os.path.basename(file_path) - focused_tables = [] - for pattern in focus_patterns: - if '/' in pattern: - db_pattern, table_name = pattern.split('/', 1) - if fnmatch.fnmatch(db_name, db_pattern): focused_tables.append(table_name) - return focused_tables - - -def load_focus_patterns_from_file(file_path: str) -> List[str]: - patterns = [] - if os.path.exists(file_path): - try: - with open(file_path, 'r', encoding='utf-8') as f: - lines = f.readlines() - for line in lines: - line = line.strip() - if line and not line.startswith("#"): patterns.append(line) - except Exception as e: - console.print(f"[bold yellow]Warning:[/bold yellow] Could not read focus file '{file_path}': {e}") - else: - console.print(f"[bold yellow]Warning:[/bold yellow] Focus file '{file_path}' not found.") - return patterns - - -def version_callback(value: bool): - if value: - try: - v = get_package_version("deepbase") - console.print(f"DeepBase version: [bold cyan]{v}[/bold cyan]") - except PackageNotFoundError: - console.print("DeepBase version: [yellow]unknown (editable/dev mode)[/yellow]") - raise typer.Exit() - - -# --- LOGICA PRINCIPALE (SENZA CLASSE TYPER) --- - -def main( - target: str = typer.Argument(None, help="The file or directory to scan."), - help: bool = typer.Option(False, "--help", "-h", is_eager=True, help="Show this help message and exit."), - version: Optional[bool] = typer.Option(None, "--version", "-v", callback=version_callback, is_eager=True, help="Show version and exit."), - output: str = typer.Option("llm_context.md", "--output", "-o", help="The output file."), - verbose: bool = typer.Option(False, "--verbose", "-V", help="Show detailed output."), - include_all: bool = typer.Option(False, "--all", "-a", help="Include full content of ALL files."), - light_mode: bool = typer.Option(False, "--light", "-l", help="Token-saving mode (signatures only)."), - focus: Optional[List[str]] = typer.Option(None, "--focus", "-f", help="Pattern to focus on (repeatable)."), - focus_file: Optional[str] = typer.Option(None, "--focus-file", "-ff", help="Path to focus patterns file.") -): - """ - Analyzes a directory OR a single file. - Default: structure tree only. - """ - # 1. Custom HELP Logic - if help or target is None: - console.print(Panel.fit( - "[bold cyan]DeepBase[/bold cyan] ‚Äî Consolidate project context for LLMs\n\n" - "[bold]Usage:[/bold] [green]deepbase[/green] [OPTIONS] [TARGET]\n\n" - "[bold]Arguments:[/bold]\n" - " [cyan]TARGET[/cyan] The file or directory to scan [dim][default: current dir][/dim]\n", - title="DeepBase v1.7.0", border_style="cyan" - )) - - # Options Table - options_table = Table(show_header=False, box=None, padding=(0, 2)) - options_table.add_column(style="cyan", no_wrap=True) - options_table.add_column(style="green", no_wrap=True) - options_table.add_column() - - options = [ - ("-v, --version", "", "Show version and exit"), - ("-o, --output", "TEXT", "Output file [dim][default: llm_context.md][/dim]"), - ("-V, --verbose", "", "Show detailed output"), - ("-a, --all", "", "Include full content of ALL files"), - ("-l, --light", "", "Token-saving mode (signatures only)"), - ("-f, --focus", "TEXT", "Pattern to focus on (repeatable)"), - ("-ff, --focus-file", "TEXT", "Path to focus patterns file"), - ("-h, --help", "", "Show this message and exit"), - ] - for opt, meta, desc in options: - options_table.add_row(opt, meta, desc) - - console.print(Panel(options_table, title="Options", border_style="green", title_align="left")) - - config_content = """Create a [cyan].deepbase.toml[/cyan] in your project root: - -[dim]# Ignore additional directories[/dim] -[yellow]ignore_dirs = ["my_assets", "experimental"][/yellow] - -[dim]# Ignore specific files[/dim] -[yellow]ignore_files = ["*.log", "secrets.env"][/yellow] - -[dim]# Add extra file extensions[/dim] -[yellow]significant_extensions = [".cfg", "Makefile", ".tsx"][/yellow]""" - - console.print(Panel( - config_content, - title="Configuration (.deepbase.toml)", - border_style="yellow", - title_align="left" - )) - - links_table = Table(show_header=False, box=None, padding=(0, 2)) - links_table.add_column(style="bold") - links_table.add_column(style="blue") - - links_table.add_row("Documentation:", "https://follen99.github.io/DeepBase/") - links_table.add_row("Repository:", "https://github.com/follen99/DeepBase") - links_table.add_row("Issues:", "https://github.com/follen99/DeepBase/issues") - links_table.add_row("PyPI:", "https://pypi.org/project/deepbase/") - - console.print(Panel(links_table, title="Links", border_style="blue", title_align="left")) - - raise typer.Exit() - - # 2. Main Logic Start - if not os.path.exists(target): - console.print(f"[bold red]Error:[/bold red] Target not found: '{target}'") - raise typer.Exit(code=1) - - abs_output_path = os.path.abspath(output) - - active_focus_patterns = [] - if focus: active_focus_patterns.extend(focus) - if focus_file: - file_patterns = load_focus_patterns_from_file(focus_file) - if file_patterns: active_focus_patterns.extend(file_patterns) - active_focus_patterns = list(set(active_focus_patterns)) - - mode_label = "" - if light_mode: - mode_label = " [bold yellow](LIGHT ‚Äî signatures only)[/bold yellow]" - elif include_all: - mode_label = " [bold cyan](ALL ‚Äî full content)[/bold cyan]" - - console.print(f"[bold green]Analyzing '{target}'...[/bold green]{mode_label}") - - if light_mode: - def fmt_header(title): return f"### {title}\n\n" - def fmt_file_start(path, icon=""): return f"> FILE: {icon}{path}\n" - def fmt_file_end(path): return "\n" - def fmt_separator(): return "" - else: - def fmt_header(title): return f"{'='*80}\n### {title} ###\n{'='*80}\n\n" - def fmt_file_start(path, icon=""): return f"--- START OF FILE: {icon}{path} ---\n\n" - def fmt_file_end(path): return f"\n\n--- END OF FILE: {path} ---\n" - def fmt_separator(): return "-" * 40 + "\n\n" - - try: - with open(output, "w", encoding="utf-8") as outfile: - # CASO 1: Singolo file - if os.path.isfile(target): - filename = os.path.basename(target) - is_db = is_sqlite_database(target) - outfile.write(f"# Analysis: {filename}\n\n") - if light_mode: - outfile.write(LIGHT_MODE_NOTICE + "\n") - - if is_db: - schema = get_database_schema(target) - focused_tables = extract_focused_tables(target, active_focus_patterns) - is_focused = bool(focused_tables) or (active_focus_patterns and any( - fnmatch.fnmatch(filename, p) or p in filename for p in active_focus_patterns - )) - outfile.write(fmt_header("DATABASE SCHEMA")) - if light_mode and not is_focused: - outfile.write(generate_light_representation(target, "")) - elif focused_tables: - outfile.write(generate_database_focused(target, focused_tables)) - else: - outfile.write(generate_database_context_full(schema, filename)) - else: - content = read_file_content(target) - structure = get_document_structure(target, content) - outfile.write(fmt_header("STRUCTURE")) - outfile.write(structure or "N/A") - outfile.write("\n\n") - outfile.write(fmt_header("CONTENT")) - outfile.write(fmt_file_start(filename)) - if light_mode: - outfile.write(generate_light_representation(target, content)) - else: - outfile.write(content) - outfile.write(fmt_file_end(filename)) - - # CASO 2: Directory - elif os.path.isdir(target): - config = load_config(target) - outfile.write(f"# Project Context: {os.path.basename(os.path.abspath(target))}\n\n") - if light_mode: - outfile.write(LIGHT_MODE_NOTICE + "\n") - outfile.write(fmt_header("PROJECT STRUCTURE")) - - tree_str, total_bytes, total_tokens = generate_directory_tree(target, config, abs_output_path, light_mode=light_mode) - - if light_mode: - outfile.write(f"> Total Size (raw): {total_bytes/1024:.2f} KB | Est. Tokens (light): ~{total_tokens:,}\n") - else: - outfile.write(f"> Total Size: {total_bytes/1024:.2f} KB | Est. Tokens: ~{total_tokens:,}\n") - - outfile.write(tree_str) - outfile.write("\n\n") - - if include_all or light_mode or active_focus_patterns: - section_title = "FILE CONTENTS" - if light_mode: section_title += " (LIGHT ‚Äî signatures only)" - outfile.write(fmt_header(section_title)) - files = get_all_significant_files(target, config, abs_output_path) - - with Progress(console=console) as progress: - task = progress.add_task("[cyan]Processing...", total=len(files)) - for fpath in files: - rel_path = os.path.relpath(fpath, target).replace('\\', '/') - is_db = is_sqlite_database(fpath) - is_in_focus = active_focus_patterns and matches_focus(fpath, target, active_focus_patterns) - focused_tables = [] - if is_db: - focused_tables = extract_focused_tables(fpath, active_focus_patterns) - if focused_tables: is_in_focus = True - - should_write_full = include_all or is_in_focus - should_write_light = light_mode and not should_write_full - - if not should_write_full and not should_write_light: - progress.update(task, advance=1) - continue - - progress.update(task, advance=1, description=f"[cyan]{rel_path}[/cyan]") - marker = " [FOCUSED]" if (is_in_focus and light_mode) else "" - icon = "üóÑÔ∏è " if is_db else "" - outfile.write(fmt_file_start(rel_path + marker, icon)) - - if is_db: - if should_write_full: - if focused_tables: - outfile.write(generate_database_focused(fpath, focused_tables)) - else: - schema = get_database_schema(fpath) - outfile.write(generate_database_context_full(schema, os.path.basename(fpath))) - else: - outfile.write(generate_light_representation(fpath, "")) - else: - content = read_file_content(fpath) - if should_write_full: - outfile.write(content) - elif should_write_light: - light_output = generate_light_representation(fpath, content) - outfile.write(light_output) - - outfile.write(fmt_file_end(rel_path)) - outfile.write(fmt_separator()) - else: - console.print("[dim]Directory tree generated. Use --light, --all, or --focus for content.[/dim]") - - console.print(f"\n[bold green]‚úî SUCCESS[/bold green]: Context created in [cyan]'{output}'[/cyan]") - - except Exception as e: - console.print(f"\n[bold red]Error:[/bold red] {e}") - raise typer.Exit(code=1) - -# Entry point che usa typer.run per gestire il comando come SINGOLO -def app(): - typer.run(main) - -if __name__ == "__main__": - app() - ---- END OF FILE: src/deepbase/main.py --- ----------------------------------------- - ---- START OF FILE: src/deepbase/toon.py --- - -# src/deepbase/toon.py - -import os -import ast -import json -import re - -# Import database handling -from deepbase.database import ( - get_database_schema, - generate_database_context_toon, - generate_database_context_hybrid, - is_sqlite_database -) - -# Import new parser registry -from deepbase.parsers.registry import registry - -# Manteniamo ToonVisitor originale per la retrocompatibilità (se usato altrove) -# o per la funzione generate_toon_representation "standard" (non light). -class ToonVisitor(ast.NodeVisitor): - def __init__(self): - self.output = [] - self.indent_level = 0 - - def _log(self, text): - indent = " " * self.indent_level - self.output.append(f"{indent}{text}") - - def visit_ClassDef(self, node): - bases = [b.id for b in node.bases if isinstance(b, ast.Name)] - base_str = f"({', '.join(bases)})" if bases else "" - self._log(f"C: {node.name}{base_str}") - - self.indent_level += 1 - docstring = ast.get_docstring(node) - if docstring: - short_doc = docstring.split('\n')[0].strip() - self._log(f"\"\"\"{short_doc}...\"\"\"") - - self.generic_visit(node) - self.indent_level -= 1 - - def visit_FunctionDef(self, node): - self._handle_function(node) - - def visit_AsyncFunctionDef(self, node): - self._handle_function(node, is_async=True) - - def _handle_function(self, node, is_async=False): - args = [arg.arg for arg in node.args.args] - args_str = ", ".join(args) - prefix = "async " if is_async else "" - self._log(f"{prefix}F: {node.name}({args_str})") - - docstring = ast.get_docstring(node) - if docstring: - self.indent_level += 1 - short_doc = docstring.split('\n')[0].strip() - self._log(f"\"\"\"{short_doc}...\"\"\"") - self.indent_level -= 1 - - def generic_visit(self, node): - for child in ast.iter_child_nodes(node): - if isinstance(child, (ast.ClassDef, ast.FunctionDef, ast.AsyncFunctionDef)): - self.visit(child) - -# --- Helper Legacy per TOON non-light (struttura scheletrica) --- -# (Qui potresti voler spostare anche questi nei parser in futuro, -# ma per ora ci concentriamo sulla modalità --light) - -def _handle_markdown(content: str) -> str: - lines = [l.strip() for l in content.splitlines() if l.strip().startswith("#")] - return "\n".join(lines) or "(Markdown file with no headers)" - -def _handle_database_toon(file_path: str) -> str: - if is_sqlite_database(file_path): - try: - schema = get_database_schema(file_path) - return generate_database_context_toon(schema, os.path.basename(file_path)) - except Exception as e: - return f"(DB Error: {e})" - return "(Not a valid SQLite database)" - -# --------------------------------------------------------------------------- -# Funzione pubblica principale — LIGHT (solo firme) -# --------------------------------------------------------------------------- - -def generate_light_representation(file_path: str, content: str) -> str: - """ - Genera una rappresentazione LIGHT usando il nuovo sistema di plugin/parser. - """ - # 1. Gestione Database (caso speciale, non basato su contenuto testo) - if is_sqlite_database(file_path): - return _handle_database_toon(file_path) - - # 2. Usa il registro per trovare il parser corretto - return registry.parse_file(file_path, content) - -def get_light_mode_warnings() -> str: - """ - Restituisce i warning accumulati durante l'esecuzione (es. linguaggi non supportati). - Da chiamare in main.py se si vuole stampare un header. - """ - return registry.get_unsupported_warning() - -# --------------------------------------------------------------------------- -# Funzione pubblica principale — TOON (skeleton legacy) -# --------------------------------------------------------------------------- - -def generate_toon_representation(file_path: str, content: str) -> str: - """ - Genera una rappresentazione TOON (Token Oriented - Skeleton) - Mantiene la logica originale per ora, o delega a Light se preferisci unificare. - """ - _, ext = os.path.splitext(file_path) - ext = ext.lower() - - if is_sqlite_database(file_path): - return _handle_database_toon(file_path) - - if ext == ".py": - try: - tree = ast.parse(content) - visitor = ToonVisitor() - visitor.visit(tree) - return "\n".join(visitor.output) - except SyntaxError: - return f"(Syntax Error parsing {os.path.basename(file_path)})" - - elif ext in [".md", ".markdown"]: - return _handle_markdown(content) - - # Per semplicità, per ora il Toon standard per altri file - # può usare il fallback del nuovo sistema o la vecchia logica. - # Usiamo il fallback del registry per coerenza: - return registry.parse_file(file_path, content) - -# --------------------------------------------------------------------------- -# Helper per database in focus mode -# --------------------------------------------------------------------------- - -def generate_database_focused(file_path: str, focused_tables: list = None) -> str: - from deepbase.database import generate_database_context_full, generate_database_context_hybrid - if not is_sqlite_database(file_path): - return "(Not a valid SQLite database)" - try: - schema = get_database_schema(file_path) - db_name = os.path.basename(file_path) - if focused_tables: - return generate_database_context_hybrid(schema, db_name, focused_tables) - else: - return generate_database_context_full(schema, db_name) - except Exception as e: - return f"(Error processing database: {e})" - ---- END OF FILE: src/deepbase/toon.py --- ----------------------------------------- - ---- START OF FILE: src/deepbase/parsers/__init__.py --- - -# src/deepbase/parsers/__init__.py -from .document import get_document_structure -from .registry import registry - -# Espone anche le classi se necessario in futuro -__all__ = ['get_document_structure', 'registry'] - ---- END OF FILE: src/deepbase/parsers/__init__.py --- ----------------------------------------- - ---- START OF FILE: src/deepbase/parsers/document.py --- - -# src/deepbase/parsers/document.py -import re -import os -from .interface import LanguageParser - -class MarkdownParser(LanguageParser): - def parse(self, content: str, file_path: str) -> str: - lines = [] - for line in content.splitlines(): - if line.strip().startswith("#"): - lines.append(line.strip()) - if not lines: - return "(Markdown file with no headers)" - return "\n".join(lines) - -class LatexParser(LanguageParser): - def parse(self, content: str, file_path: str) -> str: - keep_patterns = [ - r'^\s*\\documentclass', - r'^\s*\\usepackage', - r'^\s*\\input', - r'^\s*\\include', - r'^\s*\\(part|chapter|section|subsection|subsubsection)', - r'^\s*\\begin', - r'^\s*\\end', - r'^\s*\\title', - r'^\s*\\author', - r'^\s*\\date' - ] - combined_pattern = re.compile('|'.join(keep_patterns)) - lines = [] - for line in content.splitlines(): - # Rimuovi commenti inline parziali se necessario, qui semplifichiamo - line_clean = line.split('%')[0].rstrip() - if combined_pattern.match(line_clean): - lines.append(line_clean) - if not lines: - return "(LaTeX content empty or purely textual)" - return "\n".join(lines) - -# Istanziamo i parser per uso interno -_md_parser = MarkdownParser() -_tex_parser = LatexParser() - -def get_document_structure(file_path: str, content: str): - """ - Funzione di compatibilità per main.py. - Restituisce la struttura se è un documento supportato, altrimenti None. - """ - _, ext = os.path.splitext(file_path) - ext = ext.lower() - - if ext in ['.md', '.markdown']: - return _md_parser.parse(content, file_path) - elif ext in ['.tex', '.sty', '.cls']: - return _tex_parser.parse(content, file_path) - - return None - ---- END OF FILE: src/deepbase/parsers/document.py --- ----------------------------------------- - ---- START OF FILE: src/deepbase/parsers/fallback.py --- - -# src/deepbase/parsers/fallback.py -from .interface import LanguageParser - -class FallbackParser(LanguageParser): - """ - Parser generico per file non supportati specificamente. - Tenta di restituire una versione minimizzata o troncata. - """ - def parse(self, content: str, file_path: str) -> str: - lines = [] - # Rimuove righe vuote e commenti base - for line in content.splitlines(): - clean = line.strip() - if clean and not clean.startswith("#"): - lines.append(clean) - - if not lines: - return "(Empty or comments-only file)" - - # Se il file è molto lungo, troncalo per il fallback - if len(lines) > 20: - preview = "\n".join(lines[:20]) - return f"{preview}\n... ({len(lines)-20} more lines hidden - Light Mode Fallback)" - - return "\n".join(lines) - ---- END OF FILE: src/deepbase/parsers/fallback.py --- ----------------------------------------- - ---- START OF FILE: src/deepbase/parsers/interface.py --- - -# src/deepbase/parsers/interface.py -from abc import ABC, abstractmethod - -class LanguageParser(ABC): - """ - Interfaccia base per i parser di linguaggio. - """ - - @abstractmethod - def parse(self, content: str, file_path: str) -> str: - """ - Parsa il contenuto del file e restituisce una rappresentazione 'light' (firme, struttura). - """ - pass - ---- END OF FILE: src/deepbase/parsers/interface.py --- ----------------------------------------- - ---- START OF FILE: src/deepbase/parsers/python.py --- - -# src/deepbase/parsers/python.py -import ast -import os -from .interface import LanguageParser - -def _extract_module_comments(source: str) -> str: - """ - Estrae i commenti # e la docstring di modulo dalle prime righe del sorgente. - """ - lines = [] - in_docstring = False - docstring_char = None - source_lines = source.splitlines() - - for line in source_lines: - stripped = line.strip() - - # Riga vuota: la includiamo solo se siamo già dentro i commenti iniziali - if not stripped: - if lines: - lines.append("") - continue - - # Commenti # semplici - if stripped.startswith("#") and not in_docstring: - lines.append(line.rstrip()) - continue - - # Inizio docstring di modulo (""" o ''') - if not in_docstring and (stripped.startswith('"""') or stripped.startswith("'''")): - docstring_char = stripped[:3] - in_docstring = True - lines.append(line.rstrip()) - # Docstring su singola riga - rest = stripped[3:] - if rest.endswith(docstring_char) and len(rest) >= 3: - in_docstring = False - continue - - if in_docstring: - lines.append(line.rstrip()) - if stripped.endswith(docstring_char): - in_docstring = False - continue - - # Qualsiasi altra cosa: fine dell'header - break - - # Rimuovi trailing blank lines - while lines and not lines[-1].strip(): - lines.pop() - - return "\n".join(lines) - - -class LightVisitor(ast.NodeVisitor): - """ - Visita l'AST e produce le firme dei metodi/funzioni Python. - """ - - def __init__(self): - self.output = [] - self.indent_level = 0 - - def _log(self, text): - indent = " " * self.indent_level - self.output.append(f"{indent}{text}") - - def visit_ClassDef(self, node): - self._log(f"class {node.name}:") - self.indent_level += 1 - - # Docstring della classe (prima riga) - docstring = ast.get_docstring(node) - if docstring: - first_line = docstring.split('\n')[0].strip() - self._log(f'"""{first_line}"""') - - self.generic_visit(node) - self.indent_level -= 1 - - def visit_FunctionDef(self, node): - self._emit_signature(node, is_async=False) - - def visit_AsyncFunctionDef(self, node): - self._emit_signature(node, is_async=True) - - def _emit_signature(self, node, is_async: bool): - prefix = "async " if is_async else "" - - # --- Argomenti con annotazioni di tipo --- - args_parts = [] - all_args = node.args.args - defaults = node.args.defaults - defaults_offset = len(all_args) - len(defaults) - - for i, arg in enumerate(all_args): - arg_str = arg.arg - if arg.annotation: - try: - arg_str += f": {ast.unparse(arg.annotation)}" - except Exception: - # Fallback per vecchie versioni python o AST complessi - pass - default_idx = i - defaults_offset - if default_idx >= 0: - try: - default_val = ast.unparse(defaults[default_idx]) - arg_str += f" = {default_val}" - except Exception: - arg_str += " = ..." - args_parts.append(arg_str) - - # *args - if node.args.vararg: - va = node.args.vararg - va_str = f"*{va.arg}" - if va.annotation: - try: - va_str += f": {ast.unparse(va.annotation)}" - except Exception: - pass - args_parts.append(va_str) - - # keyword-only args - kwonly_defaults = { - i: node.args.kw_defaults[i] - for i in range(len(node.args.kwonlyargs)) - if node.args.kw_defaults[i] is not None - } - for i, kwarg in enumerate(node.args.kwonlyargs): - kw_str = kwarg.arg - if kwarg.annotation: - try: - kw_str += f": {ast.unparse(kwarg.annotation)}" - except Exception: - pass - if i in kwonly_defaults: - try: - kw_str += f" = {ast.unparse(kwonly_defaults[i])}" - except Exception: - kw_str += " = ..." - args_parts.append(kw_str) - - # **kwargs - if node.args.kwarg: - kwa = node.args.kwarg - kwa_str = f"**{kwa.arg}" - if kwa.annotation: - try: - kwa_str += f": {ast.unparse(kwa.annotation)}" - except Exception: - pass - args_parts.append(kwa_str) - - args_str = ", ".join(args_parts) - - # --- Tipo di ritorno --- - ret_anno = "" - if node.returns: - try: - ret_anno = f" -> {ast.unparse(node.returns)}" - except Exception: - pass - - self._log(f"{prefix}def {node.name}({args_str}){ret_anno}: ...") - - # Docstring della funzione (prima riga) - docstring = ast.get_docstring(node) - if docstring: - first_line = docstring.split('\n')[0].strip() - self.indent_level += 1 - self._log(f'"""{first_line}"""') - self.indent_level -= 1 - - def generic_visit(self, node): - for child in ast.iter_child_nodes(node): - if isinstance(child, (ast.ClassDef, ast.FunctionDef, ast.AsyncFunctionDef)): - self.visit(child) - - -class PythonParser(LanguageParser): - def parse(self, content: str, file_path: str) -> str: - filename = os.path.basename(file_path) - try: - tree = ast.parse(content) - visitor = LightVisitor() - visitor.visit(tree) - signatures = "\n".join(visitor.output) - - # Prepend commenti/docstring iniziali del modulo - module_header = _extract_module_comments(content) - - parts = [] - if module_header: - parts.append(module_header) - if signatures: - parts.append(signatures) - - result = "\n\n".join(parts) - return result.strip() or f"(No functions or classes found in {filename})" - except SyntaxError: - return f"(Syntax Error parsing {filename})" - except Exception as e: - return f"(Error parsing Python file: {e})" - ---- END OF FILE: src/deepbase/parsers/python.py --- ----------------------------------------- - ---- START OF FILE: src/deepbase/parsers/registry.py --- - -# src/deepbase/parsers/registry.py -import os -from typing import Dict, Set -from .interface import LanguageParser -from .python import PythonParser -from .document import MarkdownParser, LatexParser # <--- Importa i nuovi parser -from .fallback import FallbackParser - -class ParserRegistry: - def __init__(self): - self._parsers: Dict[str, LanguageParser] = {} - self._fallback = FallbackParser() - self._unsupported_extensions_encountered: Set[str] = set() - - # Registrazione parser - self.register_parser('.py', PythonParser()) - - # Registrazione Documenti - md_parser = MarkdownParser() - self.register_parser('.md', md_parser) - self.register_parser('.markdown', md_parser) - - tex_parser = LatexParser() - for ext in ['.tex', '.sty', '.cls']: - self.register_parser(ext, tex_parser) - - def register_parser(self, extension: str, parser: LanguageParser): - self._parsers[extension] = parser - - def get_parser(self, file_path: str) -> LanguageParser: - _, ext = os.path.splitext(file_path) - ext = ext.lower() - - if ext in self._parsers: - return self._parsers[ext] - - if ext: - self._unsupported_extensions_encountered.add(ext) - - return self._fallback - - def parse_file(self, file_path: str, content: str) -> str: - parser = self.get_parser(file_path) - return parser.parse(content, file_path) - - def get_unsupported_warning(self) -> str: - if not self._unsupported_extensions_encountered: - return "" - ext_list = ", ".join(sorted(self._unsupported_extensions_encountered)) - return ( - f"> [WARNING] Light Mode support is currently limited for: {ext_list}. " - "Using generic fallback for these files.\n" - ) - -registry = ParserRegistry() - ---- END OF FILE: src/deepbase/parsers/registry.py --- ----------------------------------------- - ---- START OF FILE: docs/index.md --- - -# DeepBase - -**DeepBase** is a command-line tool that analyzes a project directory, extracts the folder structure and the content of all significant code files, and consolidates them into a single text/markdown file. - -This unified "context" is perfect for providing to a Large Language Model (LLM) to enable it to deeply understand the entire codebase. - -## Features - -- **Project Structure**: Generates a tree view of the folder and file structure. -- **Smart Filtering**: Automatically ignores common unnecessary directories (e.g., `.git`, `venv`, `node_modules`). -- **Configurable**: Customize ignored directories and included extensions via a `.deepbase.toml` file. -- **Extension Selection**: Includes only files with relevant code or configuration extensions. -- **Unified Output**: Combines everything into a single file, easy to copy and paste. -- **PyPI Ready**: Easy to install via `pip`. - -## Installation - -You can install DeepBase directly from PyPI: - -```sh -pip install deepbase - -``` - -## How to Use - -Once installed, you will have the `deepbase` command available in your terminal. - -**Basic Usage:** - -Navigate to your project folder (or a parent folder) and run: - -```sh -deepbase . -``` -*The dot `.` indicates the current directory.* - -This command will create a file called `llm_context.md` in the current directory. - -**Specify Directory and Output File:** - -```sh -deepbase /path/to/your/project -o project_context.txt -``` - -### Advanced Configuration - -You can customize DeepBase's behavior by creating a `.deepbase.toml` file in the root of the project you are analyzing. - -**Example `.deepbase.toml`:** -```toml -# Add more directories to ignore. -# These will be added to the default ones. -ignore_dirs = [ - "my_assets_folder", - "experimental" -] - -# Add more extensions or filenames to include. -significant_extensions = [ - ".cfg", - "Makefile" -] -``` - -## License - -This project is released under the GPL 3 license. See the `LICENSE` file for details. - ---- END OF FILE: docs/index.md --- ----------------------------------------- - ---- START OF FILE: docs/reference.md --- - -# API Reference - -## Main Module - -::: deepbase.main - ---- END OF FILE: docs/reference.md --- ----------------------------------------- - diff --git a/src/deepbase/parsers/javascript.py b/src/deepbase/parsers/javascript.py index 3bd51c4..a24b6ef 100644 --- a/src/deepbase/parsers/javascript.py +++ b/src/deepbase/parsers/javascript.py @@ -5,41 +5,40 @@ class JavaScriptParser(LanguageParser): """ Parser per JavaScript, TypeScript e React Native (.js, .jsx, .ts, .tsx). - Usa regex per identificare firme di funzioni, classi, interfacce e componenti React. + Versione 1.1: Logica Regex base + Supporto Export Default. """ def parse(self, content: str, file_path: str) -> str: lines = [] - # Regex patterns per catturare le definizioni + # Regex patterns per catturare le definizioni strutturali (classi, funzioni, var, tipi) patterns = [ - # Class definition (es. export default class MyClass extends Component) + # Class definition re.compile(r'^\s*(export\s+)?(default\s+)?(abstract\s+)?class\s+([a-zA-Z0-9_]+)(.*)?\{'), - # Function definition standard (es. async function myFunc(a, b)) + # Function definition standard re.compile(r'^\s*(export\s+)?(default\s+)?(async\s+)?function\s+([a-zA-Z0-9_]+)\s*\(.*'), - # Arrow Function / Variable Assignments (es. const MyComponent = (props) => {) - # Cattura costanti che sembrano funzioni o componenti React + # Arrow Function / Variable Assignments re.compile(r'^\s*(export\s+)?(const|let|var)\s+([a-zA-Z0-9_]+)\s*=\s*(async\s*)?(\(.*\)|[^=]+)\s*=>.*'), # TypeScript Interfaces & Types re.compile(r'^\s*(export\s+)?(interface|type)\s+([a-zA-Z0-9_]+).*'), - - # React Hooks (opzionale: spesso sono implementation details, - # ma custom hooks 'useSomething' top-level potrebbero essere rilevanti. - # Per ora li ignoriamo per risparmiare token, tenendo solo le definizioni) ] - # JSDoc pattern (multiline) + # --- NEW: Regex specifica per Export Default diretto (V2 Feature) --- + # Cattura: export default router; | export default MyComponent; + # Il (?!...) assicura che non catturi "class" o "function" che sono gestiti meglio dai pattern sopra. + re_export_default = re.compile(r'^\s*export\s+default\s+(?!class|function)([a-zA-Z0-9_]+);?') + + # JSDoc pattern in_comment = False - source_lines = content.splitlines() for i, line in enumerate(source_lines): stripped = line.strip() - # Gestione commenti JSDoc /** ... */ + # Gestione commenti JSDoc if stripped.startswith("/**"): in_comment = True lines.append(stripped) @@ -57,24 +56,28 @@ def parse(self, content: str, file_path: str) -> str: if not stripped or stripped.startswith("//"): continue - # Verifica se la riga matcha una definizione importante + # --- NEW: Controllo Export Default --- + # Se è un export default semplice, lo aggiungiamo così com'è (senza { ... }) + if re_export_default.match(stripped): + lines.append(stripped) + continue + + # Verifica patterns standard is_match = False for pattern in patterns: - # Usiamo match sulla riga pulita o search per flessibilità if pattern.match(stripped): # Pulizia fine riga: se finisce con '{', lo sostituiamo con '...' clean_line = stripped if clean_line.endswith("{"): clean_line = clean_line[:-1].strip() - # Aggiunge firma + ... + # Aggiunge firma + { ... } per indicare struttura compressa lines.append(f"{clean_line} {{ ... }}") is_match = True break - # Fallback per decoratori (es. @Component in Angular o NestJS, usati anche in RN con mobx) + # Fallback per decoratori if not is_match and stripped.startswith("@"): - # Mantiene il decoratore se è seguito da una classe nella riga successiva (euristica semplice) if i + 1 < len(source_lines) and "class " in source_lines[i+1]: lines.append(stripped) From e0c98ea4a7914a23e58ea625a352df084268dfcd Mon Sep 17 00:00:00 2001 From: Giuliano Ranauro Date: Fri, 13 Feb 2026 22:06:17 +0100 Subject: [PATCH 9/9] Update .gitignore --- .gitignore | 3 +++ 1 file changed, 3 insertions(+) diff --git a/.gitignore b/.gitignore index 6e0ff69..0bf55e9 100644 --- a/.gitignore +++ b/.gitignore @@ -170,3 +170,6 @@ cython_debug/ marimo/_static/ marimo/_lsp/ __marimo__/ + +# output llm context +llm_context.md \ No newline at end of file