diff --git a/.gitignore b/.gitignore index 75426c02..9dae5e9e 100644 --- a/.gitignore +++ b/.gitignore @@ -164,6 +164,3 @@ cython_debug/ #.idea/ .vscode/settings.json history.txt - -# ignore default output directory -output/ \ No newline at end of file diff --git a/requirements.txt b/requirements.txt index af0b080a..7dfe2bae 100644 --- a/requirements.txt +++ b/requirements.txt @@ -1,5 +1,4 @@ fastapi[standard] uvicorn fastapi-analytics -click slowapi \ No newline at end of file diff --git a/src/cli.py b/src/cli.py deleted file mode 100644 index 5f589542..00000000 --- a/src/cli.py +++ /dev/null @@ -1,59 +0,0 @@ -"""Command line interface for the ingestion behavior.""" -import os -import click -from ingest import ingest_from_query - -@click.group() -def cli(): - pass - - -@click.command() -@click.option("--local_path", help="Parent directory for repository.") -@click.option("--sub_path", help="The sub-directory of the local repository.") -@click.option("--branch", default="main", help="The branch to use.") -@click.option("--user_name", help="The user name of the repository owner.") -@click.option("--repo_name", help="The logical name of the repository.") -@click.option("--slug", help="A slug for the repo.") -@click.option("--output", - default="output", - help="The directory containing the output artifacts.") -def ingest(local_path, sub_path, branch, user_name, repo_name, output, slug): - """Ingest a repository to create LLM-friendly ingestible data.""" - _do_ingest(local_path, sub_path, branch, user_name, repo_name, output, slug) - -def _do_ingest(local_path, sub_path, branch, user_name, repo_name, output, slug): - qry = { - "branch": branch, - "local_path": local_path, - "output": output, - "repo_name": repo_name, - "slug": slug, - "subpath": f"/{sub_path}", - "user_name": user_name, - } - rslt = ingest_from_query(qry) - # result is a tuple of three parts - # 1. The summary string - sum_fname = f"{qry['repo_name']}_summary.txt" - # 2. The tree structure - tree_fname = f"{qry['repo_name']}_tree.txt" - # 3. The file content - content_fname = f"{qry['repo_name']}_content.txt" - if not os.path.exists(output): - os.makedirs(output) - with open(f"{qry['output']}/{sum_fname}", "w") as f: - f.write(rslt[0]) - click.echo(f"Summary written to {qry['output']}/{sum_fname}") - with open(f"{qry['output']}/{tree_fname}", "w") as f: - f.write(rslt[1]) - click.echo(f"Tree written to {qry['output']}/{tree_fname}") - with open(f"{qry['output']}/{content_fname}", "w") as f: - f.write(rslt[2]) - click.echo(f"Content written to {qry['output']}/{content_fname}") - - -cli.add_command(ingest) - -if __name__ == '__main__': - cli() diff --git a/src/ingest.py b/src/ingest.py index bdace25d..da6ea803 100644 --- a/src/ingest.py +++ b/src/ingest.py @@ -156,9 +156,9 @@ def create_tree_structure(query: dict, node: Dict, prefix: str = "", is_last: bo def ingest_from_query(query: dict, ignore_patterns: List[str] = DEFAULT_IGNORE_PATTERNS, max_file_size: int = MAX_FILE_SIZE) -> Dict: """Main entry point for analyzing a codebase directory or single file.""" - path = f"{query['local_path'].strip()}{query['subpath'].strip()}" + path = f"{query['local_path']}{query['subpath']}" if not os.path.exists(path): - raise ValueError(f"{path} cannot be found, make sure the repository is public") + raise ValueError(f"{query['slug']} cannot be found, make sure the repository is public") if query.get('type') == 'blob': if not os.path.isfile(path):