diff --git a/.gitignore b/.gitignore index 6e0ff69..0bf55e9 100644 --- a/.gitignore +++ b/.gitignore @@ -170,3 +170,6 @@ cython_debug/ marimo/_static/ marimo/_lsp/ __marimo__/ + +# output llm context +llm_context.md \ No newline at end of file diff --git a/pyproject.toml b/pyproject.toml index 8d1ea3d..6e2bf23 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -7,7 +7,7 @@ build-backend = "setuptools.build_meta" [project] name = "deepbase" # Increment the version to reflect changes -version = "1.7.0" +version = "1.8.0" authors = [ { name="Giuliano Ranauro", email="ranaurogln@email.com" }, ] diff --git a/src/deepbase/main.py b/src/deepbase/main.py index 4d6c84d..c68a236 100644 --- a/src/deepbase/main.py +++ b/src/deepbase/main.py @@ -11,10 +11,13 @@ from importlib.metadata import version as get_package_version, PackageNotFoundError from typing import List, Dict, Any, Set, Optional, Tuple -from deepbase.toon import generate_toon_representation, generate_database_focused +from deepbase.toon import generate_toon_representation, generate_light_representation, generate_database_focused from deepbase.parsers import get_document_structure from deepbase.database import is_sqlite_database, get_database_schema, generate_database_context_full +from rich.table import Table +from rich.panel import Panel + # --- CONFIGURAZIONI --- DEFAULT_CONFIG = { @@ -39,20 +42,9 @@ } } -EPILOG_TEXT = """ -[bold]Documentation:[/] https://follen99.github.io/DeepBase/ -[bold]Repository:[/] https://github.com/follen99/DeepBase -[bold]Issues:[/] https://github.com/follen99/DeepBase/issues -[bold]PyPI:[/] https://pypi.org/project/deepbase/ - -[italic]DeepBase scans your project and consolidates it for LLM context analysis.[/italic] +LIGHT_MODE_NOTICE = """> **[LIGHT MODE]** Questo file è stato generato in modalità risparmio token: vengono incluse solo le firme dei metodi/funzioni e i commenti iniziali dei file. Il corpo del codice è omesso. Se hai bisogno di approfondire un file, una classe o un metodo specifico, chiedi all'utente di fornire la porzione di codice completa. """ -app = typer.Typer( - name="deepbase", - add_completion=False, - rich_markup_mode="rich" -) console = Console() # --- UTILS --- @@ -75,6 +67,7 @@ def load_config(root_dir: str) -> Dict[str, Any]: pass return config + def estimate_tokens(size_bytes: int) -> str: if size_bytes == 0: return "0t" tokens = math.ceil(size_bytes / 4) @@ -85,38 +78,43 @@ def estimate_tokens(size_bytes: int) -> str: else: return f"~{tokens/1000000:.1f}M t" + +def estimate_tokens_for_content(text: str) -> int: + return math.ceil(len(text.encode("utf-8")) / 4) + +def calculate_light_tokens(file_path: str, content: str) -> int: + from deepbase.toon import generate_light_representation + light_repr = generate_light_representation(file_path, content) + return estimate_tokens_for_content(light_repr) + def is_significant_file(file_path: str, config: Dict[str, Any], output_file_abs: str = None) -> bool: file_name = os.path.basename(file_path) - - # Check 1: Esclusione del file di output corrente (tramite path assoluto) + if output_file_abs and os.path.abspath(file_path) == output_file_abs: return False - # Check 2 (NUOVO): Esclusione per nome. - # Se nella cartella target esiste un file che si chiama come il file di output - # (es: "llm_context.md"), lo ignoriamo a prescindere dal path. if output_file_abs and file_name == os.path.basename(output_file_abs): return False - # Check 3: Esclusione file spazzatura (lockfiles, etc definita in config) if file_name in config["ignore_files"]: return False significant_extensions = config["significant_extensions"] - - if file_name in significant_extensions: + + if file_name in significant_extensions: return True - + _, ext = os.path.splitext(file_path) if ext in significant_extensions: return True if is_sqlite_database(file_path): return True - + return False -def calculate_project_stats(root_dir: str, config: Dict[str, Any], output_file_abs: str) -> int: + +def calculate_project_stats(root_dir: str, config: Dict[str, Any], output_file_abs: str, light_mode: bool = False) -> int: total_size = 0 ignore_dirs = config["ignore_dirs"] for dirpath, dirnames, filenames in os.walk(root_dir, topdown=True): @@ -125,11 +123,17 @@ def calculate_project_stats(root_dir: str, config: Dict[str, Any], output_file_a fpath = os.path.join(dirpath, f) if is_significant_file(fpath, config, output_file_abs): try: - total_size += os.path.getsize(fpath) + if light_mode and not is_sqlite_database(fpath): + content = read_file_content(fpath) + light_repr = generate_light_representation(fpath, content) + total_size += len(light_repr.encode("utf-8")) + else: + total_size += os.path.getsize(fpath) except OSError: pass return total_size + # --- ALBERO DELLE DIRECTORY --- def _generate_tree_recursive( @@ -137,14 +141,12 @@ def _generate_tree_recursive( prefix: str, config: Dict[str, Any], total_project_size: int, - output_file_abs: str + output_file_abs: str, + light_mode: bool = False ) -> Tuple[str, int]: - """ - Ritorna una tupla: (stringa_visuale_albero, dimensione_totale_bytes_subtree). - """ output_str = "" subtree_size = 0 - + try: items = sorted(os.listdir(current_path)) except PermissionError: @@ -154,7 +156,7 @@ def _generate_tree_recursive( for item in items: full_path = os.path.join(current_path, item) is_dir = os.path.isdir(full_path) - + if is_dir: if item not in config["ignore_dirs"] and not item.startswith('.'): filtered_items.append((item, True)) @@ -166,21 +168,19 @@ def _generate_tree_recursive( is_last = (i == len(filtered_items) - 1) full_path = os.path.join(current_path, name) connector = "└── " if is_last else "├── " - + if is_dir: - # Ricorsione: ottengo prima il contenuto e la dimensione della sottocartella extension = " " if is_last else "│ " sub_tree_str, sub_dir_size = _generate_tree_recursive( - full_path, - prefix + extension, - config, + full_path, + prefix + extension, + config, total_project_size, output_file_abs ) - + subtree_size += sub_dir_size - - # Genero stats cartella + folder_stats = "" if total_project_size > 0 and sub_dir_size > 0: percent = (sub_dir_size / total_project_size) * 100 @@ -189,35 +189,43 @@ def _generate_tree_recursive( output_str += f"{prefix}{connector}📁 {name}/{folder_stats}\n" output_str += sub_tree_str - + else: - icon = "üóÑÔ∏è " if is_sqlite_database(full_path) else "📄 " + icon = "🗄️ " if is_sqlite_database(full_path) else "📄 " try: - size = os.path.getsize(full_path) + raw_size = os.path.getsize(full_path) + if light_mode and not is_sqlite_database(full_path): + content = read_file_content(full_path) + light_repr = generate_light_representation(full_path, content) + size = len(light_repr.encode("utf-8")) + else: + size = raw_size subtree_size += size - + + # [FIX] Ripristinate le righe mancanti per stampare il file nell'albero! file_stats = "" if total_project_size > 0 and size > 0: percent = (size / total_project_size) * 100 token_est = estimate_tokens(size) file_stats = f" ({percent:.1f}% | {token_est})" - + output_str += f"{prefix}{connector}{icon}{name}{file_stats}\n" + except OSError: pass return output_str, subtree_size -def generate_directory_tree(root_dir: str, config: Dict[str, Any], output_file_abs: str) -> Tuple[str, int, int]: + +def generate_directory_tree(root_dir: str, config: Dict[str, Any], output_file_abs: str, light_mode: bool = False) -> Tuple[str, int, int]: abs_root = os.path.abspath(root_dir) - total_size = calculate_project_stats(root_dir, config, output_file_abs) - - tree_body, _ = _generate_tree_recursive(root_dir, "", config, total_size, output_file_abs) - + total_size = calculate_project_stats(root_dir, config, output_file_abs, light_mode) + tree_body, _ = _generate_tree_recursive(root_dir, "", config, total_size, output_file_abs, light_mode) header = f"📁 {os.path.basename(abs_root) or '.'}/\n" total_tokens_est = math.ceil(total_size / 4) return header + tree_body, total_size, total_tokens_est + # --- CORE --- def get_all_significant_files(root_dir: str, config: Dict[str, Any], output_file_abs: str) -> List[str]: @@ -231,6 +239,7 @@ def get_all_significant_files(root_dir: str, config: Dict[str, Any], output_file significant_files.append(file_path) return significant_files + def read_file_content(file_path: str) -> str: if is_sqlite_database(file_path): try: @@ -247,19 +256,18 @@ def read_file_content(file_path: str) -> str: except Exception as e: return f"!!! Error reading file: {e} !!!" + def matches_focus(file_path: str, root_dir: str, focus_patterns: List[str]) -> bool: if not focus_patterns: return False rel_path = os.path.relpath(file_path, root_dir) rel_path_fwd = rel_path.replace(os.sep, '/') for pattern in focus_patterns: - if '.db/' in pattern or '.sqlite/' in pattern: - parts = pattern.split('/') - if len(parts) >= 2 and any(ext in parts[0] for ext in ['.db', '.sqlite']): pass clean_pattern = pattern.replace(os.sep, '/') if fnmatch.fnmatch(rel_path_fwd, clean_pattern): return True if clean_pattern in rel_path_fwd: return True return False + def extract_focused_tables(file_path: str, focus_patterns: List[str]) -> List[str]: if not is_sqlite_database(file_path): return [] db_name = os.path.basename(file_path) @@ -270,6 +278,7 @@ def extract_focused_tables(file_path: str, focus_patterns: List[str]) -> List[st if fnmatch.fnmatch(db_name, db_pattern): focused_tables.append(table_name) return focused_tables + def load_focus_patterns_from_file(file_path: str) -> List[str]: patterns = [] if os.path.exists(file_path): @@ -285,6 +294,7 @@ def load_focus_patterns_from_file(file_path: str) -> List[str]: console.print(f"[bold yellow]Warning:[/bold yellow] Focus file '{file_path}' not found.") return patterns + def version_callback(value: bool): if value: try: @@ -294,32 +304,93 @@ def version_callback(value: bool): console.print("DeepBase version: [yellow]unknown (editable/dev mode)[/yellow]") raise typer.Exit() -@app.command(epilog=EPILOG_TEXT, rich_help_panel="Main Commands") -def create( + +# --- LOGICA PRINCIPALE (SENZA CLASSE TYPER) --- + +def main( target: str = typer.Argument(None, help="The file or directory to scan."), + help: bool = typer.Option(False, "--help", "-h", is_eager=True, help="Show this help message and exit."), version: Optional[bool] = typer.Option(None, "--version", "-v", callback=version_callback, is_eager=True, help="Show version and exit."), output: str = typer.Option("llm_context.md", "--output", "-o", help="The output file."), verbose: bool = typer.Option(False, "--verbose", "-V", help="Show detailed output."), include_all: bool = typer.Option(False, "--all", "-a", help="Include full content of ALL files."), - toon_mode: bool = typer.Option(False, "--toon", "-t", help="Use 'Skeleton' mode for non-focused files."), - focus: Optional[List[str]] = typer.Option(None, "--focus", "-f", help="Pattern to focus on."), + light_mode: bool = typer.Option(False, "--light", "-l", help="Token-saving mode (signatures only)."), + focus: Optional[List[str]] = typer.Option(None, "--focus", "-f", help="Pattern to focus on (repeatable)."), focus_file: Optional[str] = typer.Option(None, "--focus-file", "-ff", help="Path to focus patterns file.") ): """ Analyzes a directory OR a single file. - Hybrid workflow with Context Skeleton + Focused Content. + Default: structure tree only. """ - if target is None: - console.print("[red]Error: Missing argument 'TARGET'.[/red]") - console.print(f"\n[bold]Usage:[/bold] deepbase [OPTIONS] [TARGET]\n\nTry [cyan]deepbase --help[/cyan] for more info.") - raise typer.Exit(code=1) + # 1. Custom HELP Logic + if help or target is None: + console.print(Panel.fit( + "[bold cyan]DeepBase[/bold cyan] — Consolidate project context for LLMs\n\n" + "[bold]Usage:[/bold] [green]deepbase[/green] [OPTIONS] [TARGET]\n\n" + "[bold]Arguments:[/bold]\n" + " [cyan]TARGET[/cyan] The file or directory to scan [dim][default: current dir][/dim]\n", + title="DeepBase v1.7.0", border_style="cyan" + )) + + # Options Table + options_table = Table(show_header=False, box=None, padding=(0, 2)) + options_table.add_column(style="cyan", no_wrap=True) + options_table.add_column(style="green", no_wrap=True) + options_table.add_column() + + options = [ + ("-v, --version", "", "Show version and exit"), + ("-o, --output", "TEXT", "Output file [dim][default: llm_context.md][/dim]"), + ("-V, --verbose", "", "Show detailed output"), + ("-a, --all", "", "Include full content of ALL files"), + ("-l, --light", "", "Token-saving mode (signatures only)"), + ("-f, --focus", "TEXT", "Pattern to focus on (repeatable)"), + ("-ff, --focus-file", "TEXT", "Path to focus patterns file"), + ("-h, --help", "", "Show this message and exit"), + ] + for opt, meta, desc in options: + options_table.add_row(opt, meta, desc) + + console.print(Panel(options_table, title="Options", border_style="green", title_align="left")) + + config_content = """Create a [cyan].deepbase.toml[/cyan] in your project root: + +[dim]# Ignore additional directories[/dim] +[yellow]ignore_dirs = ["my_assets", "experimental"][/yellow] + +[dim]# Ignore specific files[/dim] +[yellow]ignore_files = ["*.log", "secrets.env"][/yellow] + +[dim]# Add extra file extensions[/dim] +[yellow]significant_extensions = [".cfg", "Makefile", ".tsx"][/yellow]""" + + console.print(Panel( + config_content, + title="Configuration (.deepbase.toml)", + border_style="yellow", + title_align="left" + )) + + links_table = Table(show_header=False, box=None, padding=(0, 2)) + links_table.add_column(style="bold") + links_table.add_column(style="blue") + + links_table.add_row("Documentation:", "https://follen99.github.io/DeepBase/") + links_table.add_row("Repository:", "https://github.com/follen99/DeepBase") + links_table.add_row("Issues:", "https://github.com/follen99/DeepBase/issues") + links_table.add_row("PyPI:", "https://pypi.org/project/deepbase/") + + console.print(Panel(links_table, title="Links", border_style="blue", title_align="left")) + + raise typer.Exit() + # 2. Main Logic Start if not os.path.exists(target): console.print(f"[bold red]Error:[/bold red] Target not found: '{target}'") raise typer.Exit(code=1) abs_output_path = os.path.abspath(output) - + active_focus_patterns = [] if focus: active_focus_patterns.extend(focus) if focus_file: @@ -327,13 +398,19 @@ def create( if file_patterns: active_focus_patterns.extend(file_patterns) active_focus_patterns = list(set(active_focus_patterns)) - console.print(f"[bold green]Analyzing '{target}'...[/bold green]") + mode_label = "" + if light_mode: + mode_label = " [bold yellow](LIGHT — signatures only)[/bold yellow]" + elif include_all: + mode_label = " [bold cyan](ALL — full content)[/bold cyan]" + + console.print(f"[bold green]Analyzing '{target}'...[/bold green]{mode_label}") - if toon_mode: + if light_mode: def fmt_header(title): return f"### {title}\n\n" def fmt_file_start(path, icon=""): return f"> FILE: {icon}{path}\n" def fmt_file_end(path): return "\n" - def fmt_separator(): return "" + def fmt_separator(): return "" else: def fmt_header(title): return f"{'='*80}\n### {title} ###\n{'='*80}\n\n" def fmt_file_start(path, icon=""): return f"--- START OF FILE: {icon}{path} ---\n\n" @@ -342,10 +419,14 @@ def fmt_separator(): return "-" * 40 + "\n\n" try: with open(output, "w", encoding="utf-8") as outfile: + # CASO 1: Singolo file if os.path.isfile(target): filename = os.path.basename(target) is_db = is_sqlite_database(target) outfile.write(f"# Analysis: {filename}\n\n") + if light_mode: + outfile.write(LIGHT_MODE_NOTICE + "\n") + if is_db: schema = get_database_schema(target) focused_tables = extract_focused_tables(target, active_focus_patterns) @@ -353,10 +434,10 @@ def fmt_separator(): return "-" * 40 + "\n\n" fnmatch.fnmatch(filename, p) or p in filename for p in active_focus_patterns )) outfile.write(fmt_header("DATABASE SCHEMA")) - if toon_mode and not is_focused: - outfile.write(generate_toon_representation(target, "")) + if light_mode and not is_focused: + outfile.write(generate_light_representation(target, "")) elif focused_tables: - outfile.write(generate_database_focused(target, focused_tables)) + outfile.write(generate_database_focused(target, focused_tables)) else: outfile.write(generate_database_context_full(schema, filename)) else: @@ -367,26 +448,36 @@ def fmt_separator(): return "-" * 40 + "\n\n" outfile.write("\n\n") outfile.write(fmt_header("CONTENT")) outfile.write(fmt_file_start(filename)) - if toon_mode: outfile.write(generate_toon_representation(target, content)) - else: outfile.write(content) + if light_mode: + outfile.write(generate_light_representation(target, content)) + else: + outfile.write(content) outfile.write(fmt_file_end(filename)) + # CASO 2: Directory elif os.path.isdir(target): config = load_config(target) outfile.write(f"# Project Context: {os.path.basename(os.path.abspath(target))}\n\n") + if light_mode: + outfile.write(LIGHT_MODE_NOTICE + "\n") outfile.write(fmt_header("PROJECT STRUCTURE")) + + tree_str, total_bytes, total_tokens = generate_directory_tree(target, config, abs_output_path, light_mode=light_mode) + + if light_mode: + outfile.write(f"> Total Size (raw): {total_bytes/1024:.2f} KB | Est. Tokens (light): ~{total_tokens:,}\n") + else: + outfile.write(f"> Total Size: {total_bytes/1024:.2f} KB | Est. Tokens: ~{total_tokens:,}\n") - tree_str, total_bytes, total_tokens = generate_directory_tree(target, config, abs_output_path) - outfile.write(f"> Total Size: {total_bytes/1024:.2f} KB | Est. Tokens: ~{total_tokens:,}\n") outfile.write(tree_str) outfile.write("\n\n") - if include_all or toon_mode or active_focus_patterns: + if include_all or light_mode or active_focus_patterns: section_title = "FILE CONTENTS" - if toon_mode: section_title += " (TOON)" + if light_mode: section_title += " (LIGHT — signatures only)" outfile.write(fmt_header(section_title)) files = get_all_significant_files(target, config, abs_output_path) - + with Progress(console=console) as progress: task = progress.add_task("[cyan]Processing...", total=len(files)) for fpath in files: @@ -397,38 +488,50 @@ def fmt_separator(): return "-" * 40 + "\n\n" if is_db: focused_tables = extract_focused_tables(fpath, active_focus_patterns) if focused_tables: is_in_focus = True + should_write_full = include_all or is_in_focus - should_write_toon = toon_mode and not should_write_full - if not should_write_full and not should_write_toon: + should_write_light = light_mode and not should_write_full + + if not should_write_full and not should_write_light: progress.update(task, advance=1) continue progress.update(task, advance=1, description=f"[cyan]{rel_path}[/cyan]") - marker = " [FOCUSED]" if (is_in_focus and toon_mode) else "" - icon = "üóÑÔ∏è " if is_db else "" + marker = " [FOCUSED]" if (is_in_focus and light_mode) else "" + icon = "🗄️ " if is_db else "" outfile.write(fmt_file_start(rel_path + marker, icon)) - + if is_db: if should_write_full: - if focused_tables: outfile.write(generate_database_focused(fpath, focused_tables)) + if focused_tables: + outfile.write(generate_database_focused(fpath, focused_tables)) else: schema = get_database_schema(fpath) outfile.write(generate_database_context_full(schema, os.path.basename(fpath))) - else: outfile.write(generate_toon_representation(fpath, "")) + else: + outfile.write(generate_light_representation(fpath, "")) else: content = read_file_content(fpath) - if should_write_full: outfile.write(content) - elif should_write_toon: outfile.write(generate_toon_representation(fpath, content)) + if should_write_full: + outfile.write(content) + elif should_write_light: + light_output = generate_light_representation(fpath, content) + outfile.write(light_output) + outfile.write(fmt_file_end(rel_path)) outfile.write(fmt_separator()) else: - console.print("[dim]Directory tree generated. Use --toon, --all, or --focus for content.[/dim]") + console.print("[dim]Directory tree generated. Use --light, --all, or --focus for content.[/dim]") - console.print(f"\n[bold green]✓ SUCCESS[/bold green]: Context created in [cyan]'{output}'[/cyan]") + console.print(f"\n[bold green]✔ SUCCESS[/bold green]: Context created in [cyan]'{output}'[/cyan]") except Exception as e: console.print(f"\n[bold red]Error:[/bold red] {e}") raise typer.Exit(code=1) +# Entry point che usa typer.run per gestire il comando come SINGOLO +def app(): + typer.run(main) + if __name__ == "__main__": app() \ No newline at end of file diff --git a/src/deepbase/parsers.py b/src/deepbase/parsers.py deleted file mode 100644 index 832ef24..0000000 --- a/src/deepbase/parsers.py +++ /dev/null @@ -1,85 +0,0 @@ -# src/deepbase/parser.py (AGGIORNAMENTO) - -import os -import re -from typing import Optional - -# Import database functions -from deepbase.database import ( - get_database_schema, - generate_database_context_full, - generate_database_context_toon, - is_sqlite_database -) - - -def extract_markdown_structure(content: str) -> str: - """Estrae solo le intestazioni (headers) da un contenuto Markdown.""" - lines = [] - # Regex per catturare le righe che iniziano con # - header_pattern = re.compile(r'^\s*(#{1,6})\s+(.*)') - - for line in content.splitlines(): - if header_pattern.match(line): - lines.append(line.strip()) - - if not lines: - return "(Nessuna struttura Markdown rilevata)" - return "\n".join(lines) - - -def extract_latex_structure(content: str) -> str: - """Estrae comandi strutturali LaTeX (part, chapter, section, etc).""" - lines = [] - # Regex per catturare comandi strutturali standard di LaTeX - # Supporta \section{Title} e \section*{Title} - tex_pattern = re.compile(r'^\s*\\(part|chapter|section|subsection|subsubsection|paragraph|subparagraph)\*?\{(.+?)\}') - - # Catturiamo anche documentclass e begin/end document per contesto - context_pattern = re.compile(r'^\s*\\(documentclass|begin|end)\{.+?\}') - - for line in content.splitlines(): - if tex_pattern.match(line) or context_pattern.match(line): - lines.append(line.strip()) - - if not lines: - return "(Nessuna struttura LaTeX rilevata)" - return "\n".join(lines) - - -def get_document_structure(file_path: str, content: str) -> Optional[str]: - """Funzione dispatcher che decide quale parser usare.""" - _, ext = os.path.splitext(file_path) - ext = ext.lower() - - # --- DATABASE HANDLER --- - if ext in ['.db', '.sqlite', '.sqlite3', '.db3'] or is_sqlite_database(file_path): - if is_sqlite_database(file_path): - try: - schema = get_database_schema(file_path) - return generate_database_context_full(schema, os.path.basename(file_path)) - except Exception as e: - return f"(Error reading database: {e})" - - if ext in ['.md', '.markdown', '.mdown', '.mkd']: - return extract_markdown_structure(content) - - # --- LATEX HANDLER --- - elif ext in ['.tex']: - return extract_latex_structure(content) - - return None - - -def get_database_toon(file_path: str) -> Optional[str]: - """ - Generate TOON representation for database files. - Called by toon.py when processing databases. - """ - if is_sqlite_database(file_path): - try: - schema = get_database_schema(file_path) - return generate_database_context_toon(schema, os.path.basename(file_path)) - except Exception as e: - return f"(DB Error: {e})" - return None \ No newline at end of file diff --git a/src/deepbase/parsers/__init__.py b/src/deepbase/parsers/__init__.py new file mode 100644 index 0000000..2f42997 --- /dev/null +++ b/src/deepbase/parsers/__init__.py @@ -0,0 +1,6 @@ +# src/deepbase/parsers/__init__.py +from .document import get_document_structure +from .registry import registry + +# Espone anche le classi se necessario in futuro +__all__ = ['get_document_structure', 'registry'] \ No newline at end of file diff --git a/src/deepbase/parsers/document.py b/src/deepbase/parsers/document.py new file mode 100644 index 0000000..715798a --- /dev/null +++ b/src/deepbase/parsers/document.py @@ -0,0 +1,58 @@ +# src/deepbase/parsers/document.py +import re +import os +from .interface import LanguageParser + +class MarkdownParser(LanguageParser): + def parse(self, content: str, file_path: str) -> str: + lines = [] + for line in content.splitlines(): + if line.strip().startswith("#"): + lines.append(line.strip()) + if not lines: + return "(Markdown file with no headers)" + return "\n".join(lines) + +class LatexParser(LanguageParser): + def parse(self, content: str, file_path: str) -> str: + keep_patterns = [ + r'^\s*\\documentclass', + r'^\s*\\usepackage', + r'^\s*\\input', + r'^\s*\\include', + r'^\s*\\(part|chapter|section|subsection|subsubsection)', + r'^\s*\\begin', + r'^\s*\\end', + r'^\s*\\title', + r'^\s*\\author', + r'^\s*\\date' + ] + combined_pattern = re.compile('|'.join(keep_patterns)) + lines = [] + for line in content.splitlines(): + # Rimuovi commenti inline parziali se necessario, qui semplifichiamo + line_clean = line.split('%')[0].rstrip() + if combined_pattern.match(line_clean): + lines.append(line_clean) + if not lines: + return "(LaTeX content empty or purely textual)" + return "\n".join(lines) + +# Istanziamo i parser per uso interno +_md_parser = MarkdownParser() +_tex_parser = LatexParser() + +def get_document_structure(file_path: str, content: str): + """ + Funzione di compatibilità per main.py. + Restituisce la struttura se è un documento supportato, altrimenti None. + """ + _, ext = os.path.splitext(file_path) + ext = ext.lower() + + if ext in ['.md', '.markdown']: + return _md_parser.parse(content, file_path) + elif ext in ['.tex', '.sty', '.cls']: + return _tex_parser.parse(content, file_path) + + return None \ No newline at end of file diff --git a/src/deepbase/parsers/fallback.py b/src/deepbase/parsers/fallback.py new file mode 100644 index 0000000..ae58c4f --- /dev/null +++ b/src/deepbase/parsers/fallback.py @@ -0,0 +1,25 @@ +# src/deepbase/parsers/fallback.py +from .interface import LanguageParser + +class FallbackParser(LanguageParser): + """ + Parser generico per file non supportati specificamente. + Tenta di restituire una versione minimizzata o troncata. + """ + def parse(self, content: str, file_path: str) -> str: + lines = [] + # Rimuove righe vuote e commenti base + for line in content.splitlines(): + clean = line.strip() + if clean and not clean.startswith("#"): + lines.append(clean) + + if not lines: + return "(Empty or comments-only file)" + + # Se il file è molto lungo, troncalo per il fallback + if len(lines) > 20: + preview = "\n".join(lines[:20]) + return f"{preview}\n... ({len(lines)-20} more lines hidden - Light Mode Fallback)" + + return "\n".join(lines) \ No newline at end of file diff --git a/src/deepbase/parsers/interface.py b/src/deepbase/parsers/interface.py new file mode 100644 index 0000000..71406c7 --- /dev/null +++ b/src/deepbase/parsers/interface.py @@ -0,0 +1,14 @@ +# src/deepbase/parsers/interface.py +from abc import ABC, abstractmethod + +class LanguageParser(ABC): + """ + Interfaccia base per i parser di linguaggio. + """ + + @abstractmethod + def parse(self, content: str, file_path: str) -> str: + """ + Parsa il contenuto del file e restituisce una rappresentazione 'light' (firme, struttura). + """ + pass \ No newline at end of file diff --git a/src/deepbase/parsers/javascript.py b/src/deepbase/parsers/javascript.py new file mode 100644 index 0000000..a24b6ef --- /dev/null +++ b/src/deepbase/parsers/javascript.py @@ -0,0 +1,87 @@ +# src/deepbase/parsers/javascript.py +import re +from .interface import LanguageParser + +class JavaScriptParser(LanguageParser): + """ + Parser per JavaScript, TypeScript e React Native (.js, .jsx, .ts, .tsx). + Versione 1.1: Logica Regex base + Supporto Export Default. + """ + + def parse(self, content: str, file_path: str) -> str: + lines = [] + + # Regex patterns per catturare le definizioni strutturali (classi, funzioni, var, tipi) + patterns = [ + # Class definition + re.compile(r'^\s*(export\s+)?(default\s+)?(abstract\s+)?class\s+([a-zA-Z0-9_]+)(.*)?\{'), + + # Function definition standard + re.compile(r'^\s*(export\s+)?(default\s+)?(async\s+)?function\s+([a-zA-Z0-9_]+)\s*\(.*'), + + # Arrow Function / Variable Assignments + re.compile(r'^\s*(export\s+)?(const|let|var)\s+([a-zA-Z0-9_]+)\s*=\s*(async\s*)?(\(.*\)|[^=]+)\s*=>.*'), + + # TypeScript Interfaces & Types + re.compile(r'^\s*(export\s+)?(interface|type)\s+([a-zA-Z0-9_]+).*'), + ] + + # --- NEW: Regex specifica per Export Default diretto (V2 Feature) --- + # Cattura: export default router; | export default MyComponent; + # Il (?!...) assicura che non catturi "class" o "function" che sono gestiti meglio dai pattern sopra. + re_export_default = re.compile(r'^\s*export\s+default\s+(?!class|function)([a-zA-Z0-9_]+);?') + + # JSDoc pattern + in_comment = False + source_lines = content.splitlines() + + for i, line in enumerate(source_lines): + stripped = line.strip() + + # Gestione commenti JSDoc + if stripped.startswith("/**"): + in_comment = True + lines.append(stripped) + if stripped.endswith("*/"): + in_comment = False + continue + + if in_comment: + lines.append(stripped) + if stripped.endswith("*/"): + in_comment = False + continue + + # Ignora commenti single line o righe vuote + if not stripped or stripped.startswith("//"): + continue + + # --- NEW: Controllo Export Default --- + # Se è un export default semplice, lo aggiungiamo così com'è (senza { ... }) + if re_export_default.match(stripped): + lines.append(stripped) + continue + + # Verifica patterns standard + is_match = False + for pattern in patterns: + if pattern.match(stripped): + # Pulizia fine riga: se finisce con '{', lo sostituiamo con '...' + clean_line = stripped + if clean_line.endswith("{"): + clean_line = clean_line[:-1].strip() + + # Aggiunge firma + { ... } per indicare struttura compressa + lines.append(f"{clean_line} {{ ... }}") + is_match = True + break + + # Fallback per decoratori + if not is_match and stripped.startswith("@"): + if i + 1 < len(source_lines) and "class " in source_lines[i+1]: + lines.append(stripped) + + if not lines: + return f"(No exported functions, classes or components found in {file_path})" + + return "\n".join(lines) \ No newline at end of file diff --git a/src/deepbase/parsers/python.py b/src/deepbase/parsers/python.py new file mode 100644 index 0000000..2df8c48 --- /dev/null +++ b/src/deepbase/parsers/python.py @@ -0,0 +1,205 @@ +# src/deepbase/parsers/python.py +import ast +import os +from .interface import LanguageParser + +def _extract_module_comments(source: str) -> str: + """ + Estrae i commenti # e la docstring di modulo dalle prime righe del sorgente. + """ + lines = [] + in_docstring = False + docstring_char = None + source_lines = source.splitlines() + + for line in source_lines: + stripped = line.strip() + + # Riga vuota: la includiamo solo se siamo già dentro i commenti iniziali + if not stripped: + if lines: + lines.append("") + continue + + # Commenti # semplici + if stripped.startswith("#") and not in_docstring: + lines.append(line.rstrip()) + continue + + # Inizio docstring di modulo (""" o ''') + if not in_docstring and (stripped.startswith('"""') or stripped.startswith("'''")): + docstring_char = stripped[:3] + in_docstring = True + lines.append(line.rstrip()) + # Docstring su singola riga + rest = stripped[3:] + if rest.endswith(docstring_char) and len(rest) >= 3: + in_docstring = False + continue + + if in_docstring: + lines.append(line.rstrip()) + if stripped.endswith(docstring_char): + in_docstring = False + continue + + # Qualsiasi altra cosa: fine dell'header + break + + # Rimuovi trailing blank lines + while lines and not lines[-1].strip(): + lines.pop() + + return "\n".join(lines) + + +class LightVisitor(ast.NodeVisitor): + """ + Visita l'AST e produce le firme dei metodi/funzioni Python. + """ + + def __init__(self): + self.output = [] + self.indent_level = 0 + + def _log(self, text): + indent = " " * self.indent_level + self.output.append(f"{indent}{text}") + + def visit_ClassDef(self, node): + self._log(f"class {node.name}:") + self.indent_level += 1 + + # Docstring della classe (prima riga) + docstring = ast.get_docstring(node) + if docstring: + first_line = docstring.split('\n')[0].strip() + self._log(f'"""{first_line}"""') + + self.generic_visit(node) + self.indent_level -= 1 + + def visit_FunctionDef(self, node): + self._emit_signature(node, is_async=False) + + def visit_AsyncFunctionDef(self, node): + self._emit_signature(node, is_async=True) + + def _emit_signature(self, node, is_async: bool): + prefix = "async " if is_async else "" + + # --- Argomenti con annotazioni di tipo --- + args_parts = [] + all_args = node.args.args + defaults = node.args.defaults + defaults_offset = len(all_args) - len(defaults) + + for i, arg in enumerate(all_args): + arg_str = arg.arg + if arg.annotation: + try: + arg_str += f": {ast.unparse(arg.annotation)}" + except Exception: + # Fallback per vecchie versioni python o AST complessi + pass + default_idx = i - defaults_offset + if default_idx >= 0: + try: + default_val = ast.unparse(defaults[default_idx]) + arg_str += f" = {default_val}" + except Exception: + arg_str += " = ..." + args_parts.append(arg_str) + + # *args + if node.args.vararg: + va = node.args.vararg + va_str = f"*{va.arg}" + if va.annotation: + try: + va_str += f": {ast.unparse(va.annotation)}" + except Exception: + pass + args_parts.append(va_str) + + # keyword-only args + kwonly_defaults = { + i: node.args.kw_defaults[i] + for i in range(len(node.args.kwonlyargs)) + if node.args.kw_defaults[i] is not None + } + for i, kwarg in enumerate(node.args.kwonlyargs): + kw_str = kwarg.arg + if kwarg.annotation: + try: + kw_str += f": {ast.unparse(kwarg.annotation)}" + except Exception: + pass + if i in kwonly_defaults: + try: + kw_str += f" = {ast.unparse(kwonly_defaults[i])}" + except Exception: + kw_str += " = ..." + args_parts.append(kw_str) + + # **kwargs + if node.args.kwarg: + kwa = node.args.kwarg + kwa_str = f"**{kwa.arg}" + if kwa.annotation: + try: + kwa_str += f": {ast.unparse(kwa.annotation)}" + except Exception: + pass + args_parts.append(kwa_str) + + args_str = ", ".join(args_parts) + + # --- Tipo di ritorno --- + ret_anno = "" + if node.returns: + try: + ret_anno = f" -> {ast.unparse(node.returns)}" + except Exception: + pass + + self._log(f"{prefix}def {node.name}({args_str}){ret_anno}: ...") + + # Docstring della funzione (prima riga) + docstring = ast.get_docstring(node) + if docstring: + first_line = docstring.split('\n')[0].strip() + self.indent_level += 1 + self._log(f'"""{first_line}"""') + self.indent_level -= 1 + + def generic_visit(self, node): + for child in ast.iter_child_nodes(node): + if isinstance(child, (ast.ClassDef, ast.FunctionDef, ast.AsyncFunctionDef)): + self.visit(child) + + +class PythonParser(LanguageParser): + def parse(self, content: str, file_path: str) -> str: + filename = os.path.basename(file_path) + try: + tree = ast.parse(content) + visitor = LightVisitor() + visitor.visit(tree) + signatures = "\n".join(visitor.output) + + # Prepend commenti/docstring iniziali del modulo + module_header = _extract_module_comments(content) + + parts = [] + if module_header: + parts.append(module_header) + if signatures: + parts.append(signatures) + + result = "\n\n".join(parts) + return result.strip() or f"(No functions or classes found in {filename})" + except SyntaxError: + return f"(Syntax Error parsing {filename})" + except Exception as e: + return f"(Error parsing Python file: {e})" \ No newline at end of file diff --git a/src/deepbase/parsers/registry.py b/src/deepbase/parsers/registry.py new file mode 100644 index 0000000..123136b --- /dev/null +++ b/src/deepbase/parsers/registry.py @@ -0,0 +1,62 @@ +# src/deepbase/parsers/registry.py +import os +from typing import Dict, Set +from .interface import LanguageParser +from .python import PythonParser +from .javascript import JavaScriptParser # <--- NUOVO IMPORT +from .document import MarkdownParser, LatexParser +from .fallback import FallbackParser + +class ParserRegistry: + def __init__(self): + self._parsers: Dict[str, LanguageParser] = {} + self._fallback = FallbackParser() + self._unsupported_extensions_encountered: Set[str] = set() + + # --- Python --- + self.register_parser('.py', PythonParser()) + + # --- JavaScript / TypeScript / React Native --- + js_parser = JavaScriptParser() + # Copre tutto l'ecosistema React/Node/TS + for ext in ['.js', '.jsx', '.ts', '.tsx', '.mjs', '.cjs']: + self.register_parser(ext, js_parser) + + # --- Documentazione --- + md_parser = MarkdownParser() + self.register_parser('.md', md_parser) + self.register_parser('.markdown', md_parser) + + tex_parser = LatexParser() + for ext in ['.tex', '.sty', '.cls']: + self.register_parser(ext, tex_parser) + + def register_parser(self, extension: str, parser: LanguageParser): + self._parsers[extension] = parser + + def get_parser(self, file_path: str) -> LanguageParser: + _, ext = os.path.splitext(file_path) + ext = ext.lower() + + if ext in self._parsers: + return self._parsers[ext] + + if ext: + self._unsupported_extensions_encountered.add(ext) + + return self._fallback + + def parse_file(self, file_path: str, content: str) -> str: + parser = self.get_parser(file_path) + return parser.parse(content, file_path) + + def get_unsupported_warning(self) -> str: + if not self._unsupported_extensions_encountered: + return "" + ext_list = ", ".join(sorted(self._unsupported_extensions_encountered)) + return ( + f"> [WARNING] Light Mode support is currently limited for: {ext_list}. " + "Using generic fallback for these files.\n" + ) + +registry = ParserRegistry() \ No newline at end of file diff --git a/src/deepbase/toon.py b/src/deepbase/toon.py index f53f03a..919893a 100644 --- a/src/deepbase/toon.py +++ b/src/deepbase/toon.py @@ -1,19 +1,23 @@ -# src/deepbase/toon.py (AGGIORNAMENTO) +# src/deepbase/toon.py -import ast import os -import re +import ast import json +import re # Import database handling from deepbase.database import ( - get_database_schema, + get_database_schema, generate_database_context_toon, generate_database_context_hybrid, is_sqlite_database ) +# Import new parser registry +from deepbase.parsers.registry import registry +# Manteniamo ToonVisitor originale per la retrocompatibilità (se usato altrove) +# o per la funzione generate_toon_representation "standard" (non light). class ToonVisitor(ast.NodeVisitor): def __init__(self): self.output = [] @@ -27,13 +31,13 @@ def visit_ClassDef(self, node): bases = [b.id for b in node.bases if isinstance(b, ast.Name)] base_str = f"({', '.join(bases)})" if bases else "" self._log(f"C: {node.name}{base_str}") - + self.indent_level += 1 docstring = ast.get_docstring(node) if docstring: short_doc = docstring.split('\n')[0].strip() self._log(f"\"\"\"{short_doc}...\"\"\"") - + self.generic_visit(node) self.indent_level -= 1 @@ -47,18 +51,7 @@ def _handle_function(self, node, is_async=False): args = [arg.arg for arg in node.args.args] args_str = ", ".join(args) prefix = "async " if is_async else "" - - ret_anno = "" - if node.returns: - try: - if isinstance(node.returns, ast.Name): - ret_anno = f" -> {node.returns.id}" - elif isinstance(node.returns, ast.Constant): - ret_anno = f" -> {node.returns.value}" - except: - pass - - self._log(f"{prefix}F: {node.name}({args_str}){ret_anno}") + self._log(f"{prefix}F: {node.name}({args_str})") docstring = ast.get_docstring(node) if docstring: @@ -72,107 +65,15 @@ def generic_visit(self, node): if isinstance(child, (ast.ClassDef, ast.FunctionDef, ast.AsyncFunctionDef)): self.visit(child) - -# --- Gestori per file Non-Python --- +# --- Helper Legacy per TOON non-light (struttura scheletrica) --- +# (Qui potresti voler spostare anche questi nei parser in futuro, +# ma per ora ci concentriamo sulla modalità --light) def _handle_markdown(content: str) -> str: - """Estrae solo gli header Markdown.""" - lines = [] - for line in content.splitlines(): - if line.strip().startswith("#"): - lines.append(line.strip()) - if not lines: - return "(Markdown file with no headers)" - return "\n".join(lines) - - -def _handle_toml_ini(content: str) -> str: - """Estrae sezioni [Title] e chiavi, ignorando valori lunghi.""" - lines = [] - for line in content.splitlines(): - clean = line.strip() - if not clean or clean.startswith("#"): - continue - - # Mantiene le sezioni [Project] - if clean.startswith("[") and clean.endswith("]"): - lines.append(clean) - # Mantiene le chiavi (key = value), semplificando il valore - elif "=" in clean: - key = clean.split("=")[0].strip() - lines.append(f"{key} = ...") - return "\n".join(lines) - - -def _handle_json_structure(content: str) -> str: - """Prova a parsare JSON e restituire solo le chiavi di primo/secondo livello.""" - try: - data = json.loads(content) - if isinstance(data, dict): - lines = ["{"] - for k, v in data.items(): - if isinstance(v, dict): - lines.append(f" {k}: {{ ...keys: {list(v.keys())} }}") - elif isinstance(v, list): - lines.append(f" {k}: [ ...size: {len(v)} ]") - else: - lines.append(f" {k}: (value)") - lines.append("}") - return "\n".join(lines) - return "(JSON Array or Scalar)" - except: - return "(Invalid JSON content)" - - -def _handle_minified_config(content: str) -> str: - """Rimuove righe vuote e commenti (per .gitignore, requirements.txt).""" - lines = [] - for line in content.splitlines(): - clean = line.strip() - # Ignora righe vuote e commenti - if clean and not clean.startswith("#"): - lines.append(clean) - - if not lines: - return "(Empty or comments-only file)" - return "\n".join(lines) - - -def _handle_latex_structure(content: str) -> str: - """ - Minimizza il LaTeX mantenendo struttura, pacchetti e comandi chiave. - Rimuove il testo semplice. - """ - keep_patterns = [ - r'^\s*\\documentclass', # Tipo documento - r'^\s*\\usepackage', # Dipendenze - r'^\s*\\input', # Inclusioni file - r'^\s*\\include', # Inclusioni file - r'^\s*\\(part|chapter|section|subsection|subsubsection)', # Struttura - r'^\s*\\begin', # Inizio blocchi (figure, table, document) - r'^\s*\\end', # Fine blocchi - r'^\s*\\title', - r'^\s*\\author', - r'^\s*\\date' - ] - - combined_pattern = re.compile('|'.join(keep_patterns)) - lines = [] - - for line in content.splitlines(): - # Rimuove commenti - line = line.split('%')[0].rstrip() - if combined_pattern.match(line): - lines.append(line) - - if not lines: - return "(LaTeX content empty or purely textual)" - - return "\n".join(lines) - + lines = [l.strip() for l in content.splitlines() if l.strip().startswith("#")] + return "\n".join(lines) or "(Markdown file with no headers)" def _handle_database_toon(file_path: str) -> str: - """Handle database files in TOON mode.""" if is_sqlite_database(file_path): try: schema = get_database_schema(file_path) @@ -181,20 +82,43 @@ def _handle_database_toon(file_path: str) -> str: return f"(DB Error: {e})" return "(Not a valid SQLite database)" +# --------------------------------------------------------------------------- +# Funzione pubblica principale — LIGHT (solo firme) +# --------------------------------------------------------------------------- + +def generate_light_representation(file_path: str, content: str) -> str: + """ + Genera una rappresentazione LIGHT usando il nuovo sistema di plugin/parser. + """ + # 1. Gestione Database (caso speciale, non basato su contenuto testo) + if is_sqlite_database(file_path): + return _handle_database_toon(file_path) + + # 2. Usa il registro per trovare il parser corretto + return registry.parse_file(file_path, content) + +def get_light_mode_warnings() -> str: + """ + Restituisce i warning accumulati durante l'esecuzione (es. linguaggi non supportati). + Da chiamare in main.py se si vuole stampare un header. + """ + return registry.get_unsupported_warning() + +# --------------------------------------------------------------------------- +# Funzione pubblica principale — TOON (skeleton legacy) +# --------------------------------------------------------------------------- def generate_toon_representation(file_path: str, content: str) -> str: """ - Genera una rappresentazione TOON (Token Oriented) in base al tipo di file. + Genera una rappresentazione TOON (Token Oriented - Skeleton) + Mantiene la logica originale per ora, o delega a Light se preferisci unificare. """ _, ext = os.path.splitext(file_path) - filename = os.path.basename(file_path) ext = ext.lower() - # 0. DATABASE (check prima per magic bytes, indipendentemente dall'estensione) if is_sqlite_database(file_path): return _handle_database_toon(file_path) - # 1. PYTHON if ext == ".py": try: tree = ast.parse(content) @@ -202,62 +126,27 @@ def generate_toon_representation(file_path: str, content: str) -> str: visitor.visit(tree) return "\n".join(visitor.output) except SyntaxError: - return f"(Syntax Error parsing {filename})" + return f"(Syntax Error parsing {os.path.basename(file_path)})" - # 2. MARKDOWN (Documentazione) elif ext in [".md", ".markdown"]: return _handle_markdown(content) - - # --- 2.5 LATEX --- - elif ext in [".tex", ".sty", ".cls"]: - return _handle_latex_structure(content) - - # 3. CONFIGURAZIONE STRUTTURATA (TOML, INI, CFG) - elif ext in [".toml", ".ini", ".cfg"]: - return _handle_toml_ini(content) - - # 4. DATI (JSON) - elif ext == ".json": - return _handle_json_structure(content) - - # 5. CONFIGURAZIONE A LISTA (.gitignore, requirements.txt, .env) - # Lista di file noti per essere liste di regole - elif ext in [".txt", ".dockerignore", ".gitignore"] or filename in [".gitignore", ".dockerignore", "Dockerfile", "Makefile"]: - return _handle_minified_config(content) - - # 6. YAML (Struttura semplice basata su indentazione) - elif ext in [".yml", ".yaml"]: - # Per YAML facciamo un filtro semplice regex per mostrare solo le chiavi - lines = [line for line in content.splitlines() if ":" in line and not line.strip().startswith("#")] - # Semplificazione brutale: mostra solo le chiavi - clean_lines = [] - for l in lines: - key = l.split(":")[0] - clean_lines.append(f"{key}:") - return "\n".join(clean_lines) - - # 7. DEFAULT: Fallback minificato (o troncato) - else: - # Se non conosciamo il file, mostriamo le prime 5 righe minificate come anteprima - minified = _handle_minified_config(content) - lines = minified.splitlines() - if len(lines) > 10: - return "\n".join(lines[:10]) + f"\n... ({len(lines)-10} more meaningful lines hidden)" - return minified + + # Per semplicità, per ora il Toon standard per altri file + # può usare il fallback del nuovo sistema o la vecchia logica. + # Usiamo il fallback del registry per coerenza: + return registry.parse_file(file_path, content) +# --------------------------------------------------------------------------- +# Helper per database in focus mode +# --------------------------------------------------------------------------- def generate_database_focused(file_path: str, focused_tables: list = None) -> str: - """ - Generate database context with specific tables in full detail. - Used when database is in focus mode. - """ + from deepbase.database import generate_database_context_full, generate_database_context_hybrid if not is_sqlite_database(file_path): return "(Not a valid SQLite database)" - try: schema = get_database_schema(file_path) db_name = os.path.basename(file_path) - if focused_tables: return generate_database_context_hybrid(schema, db_name, focused_tables) else: diff --git a/test.txt b/test.txt new file mode 100644 index 0000000..33e25f2 --- /dev/null +++ b/test.txt @@ -0,0 +1,42 @@ +============================= test session starts ============================== +platform linux -- Python 3.10.12, pytest-9.0.1, pluggy-1.6.0 +rootdir: /home/follen/Documents/git-local/DeepBase +configfile: pyproject.toml +collected 8 items + +tests/test_suite_python.py F....... [100%] + +=================================== FAILURES =================================== +_____________________ TestPythonSuite.test_basic_structure _____________________ + +self = +tmp_path = PosixPath('/tmp/pytest-of-follen/pytest-7/test_basic_structure0') + + def test_basic_structure(self, tmp_path): + """Testa che il comando base generi la struttura nel file.""" + self.create_dummy_python_project(tmp_path) + + output_file = tmp_path / "llm_context.md" + + # Passiamo esplicitamente l'output file nel tmp_path + result = runner.invoke(test_app, [str(tmp_path), "-o", str(output_file)]) + + assert result.exit_code == 0 + assert output_file.exists() + + content = output_file.read_text(encoding="utf-8") + + # Verifica presenza nell'albero (DENTRO IL FILE, non nello stdout) +> assert "main.py" in content +E AssertionError: assert 'main.py' in '# Project Context: test_basic_structure0\n\n=========================================================================...==============\n\n> Total Size: 0.20 KB | Est. Tokens: ~52\n📁 test_basic_structure0/\n└── 📁 utils/ (11.2% | ~6t)\n\n\n' + +tests/test_suite_python.py:66: AssertionError +=============================== warnings summary =============================== +.venv/lib/python3.10/site-packages/typer/main.py:306 + /home/follen/Documents/git-local/DeepBase/.venv/lib/python3.10/site-packages/typer/main.py:306: PytestCollectionWarning: cannot collect 'test_app' because it is not a function. + def __call__(self, *args: Any, **kwargs: Any) -> Any: + +-- Docs: https://docs.pytest.org/en/stable/how-to/capture-warnings.html +=========================== short test summary info ============================ +FAILED tests/test_suite_python.py::TestPythonSuite::test_basic_structure - As... +==================== 1 failed, 7 passed, 1 warning in 0.17s ==================== diff --git a/tests/test_cli.py b/tests/test_cli.py deleted file mode 100644 index 8f2d194..0000000 --- a/tests/test_cli.py +++ /dev/null @@ -1,56 +0,0 @@ -# tests/test_cli.py - -import os -from typer.testing import CliRunner -from deepbase.main import app - -runner = CliRunner() - -# ... (gli altri test sulle directory rimangono uguali) ... - -def test_cli_single_file_default(tmp_path): - """ - Testa che di default (senza -a) venga generata SOLO la struttura. - """ - single_file = tmp_path / "README.md" - unique_content_string = "Questo è il contenuto univoco del file." - single_file.write_text(f"# Intro\n{unique_content_string}\n## Usage", encoding="utf-8") - - output_file = tmp_path / "structure_only.md" - - result = runner.invoke(app, [str(single_file), "-o", str(output_file)]) - - assert result.exit_code == 0 - content = output_file.read_text(encoding="utf-8") - - # DEVE contenere la struttura - assert "# Intro" in content - assert "## Usage" in content - - # NON DEVE contenere il corpo del testo (perché non abbiamo passato -a) - # Nota: la regex dei parser estrae solo le linee con #, quindi la stringa di testo puro - # non dovrebbe apparire nell'output se stiamo stampando solo la structure section. - assert unique_content_string not in content - -def test_cli_single_file_with_all(tmp_path): - """ - Testa che con il flag --all venga generato ANCHE il contenuto. - """ - single_file = tmp_path / "DOCS.md" - unique_content_string = "Dettagli molto importanti." - single_file.write_text(f"# Title\n{unique_content_string}", encoding="utf-8") - - output_file = tmp_path / "full_context.md" - - # Passiamo il flag --all - result = runner.invoke(app, [str(single_file), "--all", "-o", str(output_file)]) - - assert result.exit_code == 0 - content = output_file.read_text(encoding="utf-8") - - # DEVE contenere la struttura - assert "# Title" in content - - # DEVE contenere ANCHE il corpo del testo - assert "--- START OF FILE: DOCS.md ---" in content - assert unique_content_string in content \ No newline at end of file diff --git a/tests/test_parsers.py b/tests/test_parsers.py deleted file mode 100644 index 3123906..0000000 --- a/tests/test_parsers.py +++ /dev/null @@ -1,50 +0,0 @@ -import pytest -from deepbase.parsers import extract_markdown_structure, get_document_structure - -def test_extract_markdown_structure_simple(): - """Testa l'estrazione corretta di header semplici.""" - content = """ -# Titolo Principale -Testo normale che deve essere ignorato. - -## Sottosezione -Altro testo. - -### Livello 3 - """ - expected = "# Titolo Principale\n## Sottosezione\n### Livello 3" - result = extract_markdown_structure(content) - assert result.strip() == expected - -def test_extract_markdown_structure_no_headers(): - """Testa un file markdown senza intestazioni.""" - content = "Solo testo semplice.\nNessun titolo qui." - result = extract_markdown_structure(content) - # FIX: Aggiunta la parola 'Markdown' per matchare l'output aggiornato - assert "Nessuna struttura Markdown rilevata" in result - -def test_extract_markdown_structure_complex(): - """Testa che il codice e i commenti non vengano confusi per header.""" - content = """ -# Header Reale - # Questo è codice, non un header - ## Header Reale 2 -""" - result = extract_markdown_structure(content) - # L'header indentato (codice) non deve apparire, o deve essere gestito come testo - # La regex attuale richiede che # sia all'inizio della riga (con whitespace opzionali) - assert "# Header Reale" in result - assert "## Header Reale 2" in result - # Nota: Se la tua regex permette spazi prima del #, verifica il comportamento desiderato - -def test_dispatcher_extensions(): - """Testa che il dispatcher scelga il parser giusto in base all'estensione.""" - content = "# Test" - - # Markdown extensions - assert get_document_structure("file.md", content) == "# Test" - assert get_document_structure("file.markdown", content) == "# Test" - - # Unsupported extensions (dovrebbe ritornare None o messaggio default) - assert get_document_structure("file.txt", content) is None - assert get_document_structure("script.py", content) is None \ No newline at end of file diff --git a/tests/test_suite_python.py b/tests/test_suite_python.py new file mode 100644 index 0000000..8fef89f --- /dev/null +++ b/tests/test_suite_python.py @@ -0,0 +1,265 @@ +# tests/test_suite_python.py + +import os +import typer +from typer.testing import CliRunner +from deepbase.main import main +import sqlite3 + +# --- SETUP PER I TEST --- +test_app = typer.Typer() +test_app.command()(main) + +runner = CliRunner() + +class TestDeepBaseSuite: + """ + Test suite completa per DeepBase. + Copre Python, Markdown, LaTeX, Database e il meccanismo di Fallback. + """ + + def create_dummy_project(self, root): + """Helper per popolare una directory con vari tipi di file.""" + # 1. Python Complex + main_py = root / "main.py" + main_py.write_text("""\"\"\" +Module docstring here. +Should be preserved. +\"\"\" +import os + +# Initial comment +def simple_func(): + return True + +async def async_func(a: int, b: str = "default") -> bool: + \"\"\"Function docstring.\"\"\" + print("body hidden") + return False + +class MyClass: + \"\"\"Class docstring.\"\"\" + def method_one(self): + return 1 +""", encoding="utf-8") + + # 2. Markdown + readme = root / "README.md" + readme.write_text("""# Project Title +Description text that should be removed. +## Section 1 +More text. +### Subsection +""", encoding="utf-8") + + # 3. LaTeX + doc_tex = root / "document.tex" + doc_tex.write_text(r"""\documentclass{article} +\usepackage{graphicx} +\begin{document} +Text that should be removed in light mode. +\section{Introduction} +\subsection{Background} +\end{document} +""", encoding="utf-8") + + # 4. JavaScript (Unsupported / Fallback test) + script_js = root / "script.js" + script_js.write_text("""function hello() { + console.log("This is JS"); + return true; +} +""", encoding="utf-8") + + # 5. JSON (Legacy TOON support) + config_json = root / "config.json" + config_json.write_text('{"key": "value", "list": [1, 2, 3]}', encoding="utf-8") + + def test_python_light_advanced(self, tmp_path): + """Testa il nuovo parser Python con docstring, async e type hints.""" + self.create_dummy_project(tmp_path) + output_file = tmp_path / "context.md" + + result = runner.invoke(test_app, [str(tmp_path), "--light", "-o", str(output_file)]) + + assert result.exit_code == 0 + content = output_file.read_text(encoding="utf-8") + + # Verifica Docstring di modulo (controlliamo le righe separate perché è multiline) + assert '"""' in content + assert "Module docstring here." in content + assert "Should be preserved." in content + + # Verifica Async e Type Hints + assert "async def async_func" in content + assert "b: str" in content + + # Verifica Docstring di funzione (prima riga) + assert '"""Function docstring."""' in content + + # Verifica che il corpo sia rimosso + assert 'print("body hidden")' not in content + + def test_markdown_parsing(self, tmp_path): + """Testa che il parser Markdown estragga solo gli header.""" + self.create_dummy_project(tmp_path) + output_file = tmp_path / "context.md" + + result = runner.invoke(test_app, [str(tmp_path), "--light", "-o", str(output_file)]) + content = output_file.read_text(encoding="utf-8") + + assert "# Project Title" in content + assert "## Section 1" in content + # Il testo descrittivo non deve esserci + assert "Description text that should be removed" not in content + + def test_latex_parsing(self, tmp_path): + """Testa che il parser LaTeX mantenga la struttura.""" + self.create_dummy_project(tmp_path) + output_file = tmp_path / "context.md" + + result = runner.invoke(test_app, [str(tmp_path), "--light", "-o", str(output_file)]) + content = output_file.read_text(encoding="utf-8") + + assert r"\documentclass{article}" in content + assert r"\section{Introduction}" in content + assert "Text that should be removed" not in content + + def test_fallback_and_warning(self, tmp_path): + """ + Testa il meccanismo di fallback per file non supportati (es. .js) + e verifica che venga generato il WARNING. + """ + self.create_dummy_project(tmp_path) + output_file = tmp_path / "context.md" + + result = runner.invoke(test_app, [str(tmp_path), "--light", "-o", str(output_file)]) + content = output_file.read_text(encoding="utf-8") + + # 1. Verifica che il contenuto JS sia presente (Fallback behavior) + assert "function hello()" in content + + # 2. Verifica la presenza del WARNING (nello stdout o nel file) + warning_msg = ".js" + assert (warning_msg in result.stdout) or (warning_msg in content) + + def test_json_legacy_support(self, tmp_path): + """Testa che i file JSON vengano ancora gestiti.""" + self.create_dummy_project(tmp_path) + output_file = tmp_path / "context.md" + + result = runner.invoke(test_app, [str(tmp_path), "--light", "-o", str(output_file)]) + content = output_file.read_text(encoding="utf-8") + + # Verifica struttura JSON + assert "key" in content + assert "list" in content + + def test_database_handling(self, tmp_path): + """Testa integrazione database SQLite.""" + project_dir = tmp_path / "db_project" + project_dir.mkdir() + db_path = project_dir / "test.sqlite" + + conn = sqlite3.connect(db_path) + cursor = conn.cursor() + cursor.execute("CREATE TABLE users (id INTEGER PRIMARY KEY, email TEXT)") + conn.commit() + conn.close() + + output_file = project_dir / "context.md" + result = runner.invoke(test_app, [str(project_dir), "--light", "-o", str(output_file)]) + + assert result.exit_code == 0 + content = output_file.read_text(encoding="utf-8") + + assert "users" in content + assert "email:TEXT" in content + + def test_focus_mode_hybrid(self, tmp_path): + """Testa --focus combined (ibrido) su file Python.""" + self.create_dummy_project(tmp_path) + output_file = tmp_path / "context.md" + + # Focus su main.py. SENZA --light o --all, il comportamento standard + # per i file NON in focus è di essere presenti SOLO nell'albero (tree). + result = runner.invoke(test_app, [str(tmp_path), "--focus", "main.py", "-o", str(output_file)]) + + assert result.exit_code == 0 + content = output_file.read_text(encoding="utf-8") + + # 1. main.py deve essere FULL (contiene il corpo) + assert 'print("body hidden")' in content + + # 2. README.md NON in focus. + # Verifica che sia presente nell'albero dei file + assert "README.md" in content + + # Ma NON deve esserci il suo contenuto (perché non abbiamo passato --light come background) + # Nota: se in futuro cambi il default, aggiorna questo test. + assert "# Project Title" not in content + + def test_ignore_files(self, tmp_path): + """Testa che .deepbase.toml venga rispettato.""" + self.create_dummy_project(tmp_path) + + (tmp_path / ".deepbase.toml").write_text('ignore_files = ["script.js"]', encoding="utf-8") + + output_file = tmp_path / "context.md" + result = runner.invoke(test_app, [str(tmp_path), "--light", "-o", str(output_file)]) + + content = output_file.read_text(encoding="utf-8") + assert "script.js" not in content + + + def test_javascript_react_parsing(self, tmp_path): + """Testa il parsing di file JS, TS e React (JSX/TSX).""" + self.create_dummy_project(tmp_path) + + # Crea un componente React Native finto + rn_file = tmp_path / "App.tsx" + rn_file.write_text(""" +import React, { useEffect } from 'react'; +import { View, Text } from 'react-native'; + +/** + * Componente principale + */ +export const App = (props: Props) => { + useEffect(() => { + console.log("Effect"); + }, []); + + const helper = () => true; + + return ( + + Hello + + ); +}; + +export default class ErrorBoundary extends React.Component { + render() { + return null; + } +} +""", encoding="utf-8") + + output_file = tmp_path / "context.md" + result = runner.invoke(test_app, [str(tmp_path), "--light", "-o", str(output_file)]) + + content = output_file.read_text(encoding="utf-8") + + # Verifica Componente Funzionale + assert "export const App = (props: Props) => { ... }" in content + + # Verifica Commento JSDoc + assert "Componente principale" in content + + # Verifica Classe + assert "export default class ErrorBoundary extends React.Component { ... }" in content + + # Verifica che il corpo (useEffect, JSX) sia nascosto + assert "console.log" not in content + assert "" not in content \ No newline at end of file