From 880c57b2178bd9e1434ee188cfdbf7b4b567fb67 Mon Sep 17 00:00:00 2001 From: Filip Christiansen <22807962+filipchristiansen@users.noreply.github.com> Date: Fri, 10 Jan 2025 11:26:23 +0100 Subject: [PATCH 1/7] refactor: standardize terminology and documentation MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit - Standardized capitalization of 'Git', 'GitHub', and 'URL' - Removed trailing slashes in links and added missing sentence periods in `README.md` - Adjusted docstrings to adhere to PEP 257 by using imperative tense - Standardized docstrings in `exceptions.py` - Replaced 'GitHub' with 'Git' when referring to broader context - Renamed templates: `github.jinja` → `git.jinja`, `github_form.jinja` → `git_form.jinja` - Renamed variables: `github_url` → `repo_url` --- Dockerfile | 2 +- README.md | 30 +++++++++---------- src/gitingest/__init__.py | 2 +- src/gitingest/exceptions.py | 4 +-- src/gitingest/query_ingestion.py | 4 ++- src/gitingest/repository_clone.py | 17 +++++------ src/gitingest/repository_ingest.py | 2 +- src/query_processor.py | 8 ++--- src/routers/dynamic.py | 16 +++++----- src/routers/index.py | 4 +-- src/templates/api.jinja | 2 +- src/templates/base.jinja | 4 +-- src/templates/components/footer.jinja | 2 +- .../{github_form.jinja => git_form.jinja} | 2 +- src/templates/{github.jinja => git.jinja} | 2 +- src/templates/index.jinja | 4 +-- 16 files changed, 53 insertions(+), 52 deletions(-) rename src/templates/components/{github_form.jinja => git_form.jinja} (98%) rename src/templates/{github.jinja => git.jinja} (97%) diff --git a/Dockerfile b/Dockerfile index 564a5abb..cb0eab80 100644 --- a/Dockerfile +++ b/Dockerfile @@ -20,7 +20,7 @@ FROM python:3.12-slim ENV PYTHONUNBUFFERED=1 ENV PYTHONDONTWRITEBYTECODE=1 -# Install git +# Install Git RUN apt-get update \ && apt-get install -y --no-install-recommends git curl\ && rm -rf /var/lib/apt/lists/* diff --git a/README.md b/README.md index d5fe3079..049f6402 100644 --- a/README.md +++ b/README.md @@ -11,13 +11,13 @@ Turn any Git repository into a prompt-friendly text ingest for LLMs. -You can also replace `hub` with `ingest` in any GitHub URL to access the coresponding digest +You can also replace `hub` with `ingest` in any GitHub URL to access the coresponding digest. -[gitingest.com](https://gitingest.com/) · [Chrome Extension](https://chromewebstore.google.com/detail/adfjahbijlkjfoicpjkhjicpjpjfaood) · [Firefox Add-on](https://addons.mozilla.org/firefox/addon/gitingest/) +[gitingest.com](https://gitingest.com) · [Chrome Extension](https://chromewebstore.google.com/detail/adfjahbijlkjfoicpjkhjicpjpjfaood) · [Firefox Add-on](https://addons.mozilla.org/firefox/addon/gitingest) ## 🚀 Features -- **Easy code context**: Get a text digest from a git repository URL or a directory +- **Easy code context**: Get a text digest from a Git repository URL or a directory - **Smart Formatting**: Optimized output format for LLM prompts - **Statistics about**: - File and directory structure @@ -36,11 +36,12 @@ pip install gitingest Available in the Chrome Web Store -Get The Add-on for Firefox +Get The Add-on for Firefox Get from the Edge Add-ons The extension is open source at [lcandy2/gitingest-extension](https://github.com/lcandy2/gitingest-extension). + Issues and feature requests are welcome to the repo. ## 💡 Command line usage @@ -71,7 +72,7 @@ summary, tree, content = ingest("path/to/directory") summary, tree, content = ingest("https://github.com/cyclotruc/gitingest") ``` -By default, this won't write a file but can be enabled with the `output` argument +By default, this won't write a file but can be enabled with the `output` argument. ## 🌐 Self-host @@ -87,31 +88,30 @@ By default, this won't write a file but can be enabled with the `output` argumen docker run -d --name gitingest -p 8000:8000 gitingest ``` -The application will be available at `http://localhost:8000` +The application will be available at `http://localhost:8000`. If you are hosting it on a domain, you can specify the allowed hostnames via env variable `ALLOWED_HOSTS`. ```bash - #Default: "gitingest.com,*.gitingest.com,localhost, 127.0.0.1". + # Default: "gitingest.com, *.gitingest.com, localhost, 127.0.0.1". ALLOWED_HOSTS="example.com, localhost, 127.0.0.1" ``` ## 🛠️ Stack -- [Tailwind CSS](https://tailwindcss.com/) - Frontend +- [Tailwind CSS](https://tailwindcss.com) - Frontend - [FastAPI](https://github.com/fastapi/fastapi) - Backend framework -- [Jinja2](https://jinja.palletsprojects.com/) - HTML templating +- [Jinja2](https://jinja.palletsprojects.com) - HTML templating - [tiktoken](https://github.com/openai/tiktoken) - Token estimation -- [apianalytics.dev](https://www.apianalytics.dev/) - Simple Analytics +- [apianalytics.dev](https://www.apianalytics.dev) - Simple Analytics -### Looking for a javascript/node package? +### Looking for a JavaScript/Node package? Check out the NPM alternative 📦 Repomix: ## ✔️ Contributing to Gitingest -Gitingest aims to be friendly for first time contributors, with a simple python and html codebase. - If you need any help while working with the code, reach out to us on [discord](https://discord.com/invite/zerRaGK9EC) +Gitingest aims to be friendly for first time contributors, with a simple python and html codebase. If you need any help while working with the code, reach out to us on [Discord](https://discord.com/invite/zerRaGK9EC). ### Ways to help (non-technical) @@ -125,7 +125,7 @@ Gitingest aims to be friendly for first time contributors, with a simple python 2. Setup the dev environment (see Development section bellow) 3. Run unit tests with `pytest` 4. Commit your changes and run `pre-commit` -5. Open a pull request on Github for review and feedback +5. Open a pull request on GitHub for review and feedback 6. (Optionnal) Invite project maintainer to your branch for easier collaboration ## 🔧 Development @@ -161,7 +161,7 @@ Gitingest aims to be friendly for first time contributors, with a simple python pytest ``` -The application should be available at `http://localhost:8000` +The application should be available at `http://localhost:8000`. ### Working on the CLI diff --git a/src/gitingest/__init__.py b/src/gitingest/__init__.py index c592350b..692de607 100644 --- a/src/gitingest/__init__.py +++ b/src/gitingest/__init__.py @@ -1,4 +1,4 @@ -""" Gitingest: A package for ingesting data from git repositories. """ +""" Gitingest: A package for ingesting data from Git repositories. """ from gitingest.query_ingestion import run_ingest_query from gitingest.query_parser import parse_query diff --git a/src/gitingest/exceptions.py b/src/gitingest/exceptions.py index bfb3888b..8808cf77 100644 --- a/src/gitingest/exceptions.py +++ b/src/gitingest/exceptions.py @@ -23,7 +23,7 @@ def __init__(self, pattern: str) -> None: class AsyncTimeoutError(Exception): """ - Raised when an async operation exceeds its timeout limit. + Exception raised when an async operation exceeds its timeout limit. This exception is used by the `async_timeout` decorator to signal that the wrapped asynchronous function has exceeded the specified time limit for execution. @@ -38,7 +38,7 @@ def __init__(self, max_files: int) -> None: class MaxFileSizeReachedError(Exception): - """Raised when the maximum file size is reached.""" + """Exception raised when the maximum file size is reached.""" def __init__(self, max_size: int): super().__init__(f"Maximum file size limit ({max_size/1024/1024:.1f}MB) reached.") diff --git a/src/gitingest/query_ingestion.py b/src/gitingest/query_ingestion.py index c58ea810..3396ca6e 100644 --- a/src/gitingest/query_ingestion.py +++ b/src/gitingest/query_ingestion.py @@ -170,7 +170,9 @@ def _read_file_content(file_path: Path) -> str: def _sort_children(children: list[dict[str, Any]]) -> list[dict[str, Any]]: """ - Sort children nodes with: + Sort the children nodes of a directory according to a specific order. + + Order of sorting: 1. README.md first 2. Regular files (not starting with dot) 3. Hidden files (starting with dot) diff --git a/src/gitingest/repository_clone.py b/src/gitingest/repository_clone.py index 01ba3877..57374ada 100644 --- a/src/gitingest/repository_clone.py +++ b/src/gitingest/repository_clone.py @@ -37,7 +37,7 @@ class CloneConfig: @async_timeout(CLONE_TIMEOUT) async def clone_repo(config: CloneConfig) -> tuple[bytes, bytes]: """ - Clones a repository to a local path based on the provided configuration. + Clone a repository to a local path based on the provided configuration. This function handles the process of cloning a Git repository to the local file system. It can clone a specific branch or commit if provided, and it raises exceptions if @@ -55,7 +55,7 @@ async def clone_repo(config: CloneConfig) -> tuple[bytes, bytes]: Returns ------- tuple[bytes, bytes] - A tuple containing the stdout and stderr of the git commands executed. + A tuple containing the stdout and stderr of the Git commands executed. Raises ------ @@ -101,13 +101,12 @@ async def clone_repo(config: CloneConfig) -> tuple[bytes, bytes]: async def _check_repo_exists(url: str) -> bool: """ - Check if a repository exists at the given URL using an HTTP HEAD request. + Check if a Git repository exists at the provided URL. Parameters ---------- url : str - The URL of the repository. - + The URL of the Git repository to check. Returns ------- bool @@ -130,22 +129,22 @@ async def _check_repo_exists(url: str) -> bool: async def _run_git_command(*args: str) -> tuple[bytes, bytes]: """ - Executes a git command asynchronously and captures its output. + Execute a Git command asynchronously and captures its output. Parameters ---------- *args : str - The git command and its arguments to execute. + The Git command and its arguments to execute. Returns ------- tuple[bytes, bytes] - A tuple containing the stdout and stderr of the git command. + A tuple containing the stdout and stderr of the Git command. Raises ------ RuntimeError - If the git command exits with a non-zero status. + If the Git command exits with a non-zero status. """ proc = await asyncio.create_subprocess_exec( *args, diff --git a/src/gitingest/repository_ingest.py b/src/gitingest/repository_ingest.py index e2cecaa3..a1149847 100644 --- a/src/gitingest/repository_ingest.py +++ b/src/gitingest/repository_ingest.py @@ -27,7 +27,7 @@ def ingest( Parameters ---------- source : str - The source to analyze, which can be a URL (for a GitHub repository) or a local directory path. + The source to analyze, which can be a URL (for a Git repository) or a local directory path. max_file_size : int Maximum allowed file size for file ingestion. Files larger than this size are ignored, by default 10*1024*1024 (10 MB). diff --git a/src/query_processor.py b/src/query_processor.py index f6c7df85..544a2eea 100644 --- a/src/query_processor.py +++ b/src/query_processor.py @@ -26,7 +26,7 @@ async def process_query( """ Process a query by parsing input, cloning a repository, and generating a summary. - Handle user input, process GitHub repository data, and prepare + Handle user input, process Git repository data, and prepare a response for rendering a template with the processed results or an error message. Parameters @@ -34,7 +34,7 @@ async def process_query( request : Request The HTTP request object. input_text : str - Input text provided by the user, typically a GitHub repository URL or slug. + Input text provided by the user, typically a Git repository URL or slug. slider_position : int Position of the slider, representing the maximum file size in the query. pattern_type : str @@ -63,13 +63,13 @@ async def process_query( else: raise ValueError(f"Invalid pattern type: {pattern_type}") - template = "index.jinja" if is_index else "github.jinja" + template = "index.jinja" if is_index else "git.jinja" template_response = partial(templates.TemplateResponse, name=template) max_file_size = log_slider_to_size(slider_position) context = { "request": request, - "github_url": input_text, + "repo_url": input_text, "examples": EXAMPLE_REPOS if is_index else [], "default_file_size": slider_position, "pattern_type": pattern_type, diff --git a/src/routers/dynamic.py b/src/routers/dynamic.py index add89c4f..0787fbfa 100644 --- a/src/routers/dynamic.py +++ b/src/routers/dynamic.py @@ -14,29 +14,29 @@ @router.get("/{full_path:path}") async def catch_all(request: Request, full_path: str) -> HTMLResponse: """ - Renders a page with a GitHub URL based on the provided path. + Render a page with a Git URL based on the provided path. - This endpoint catches all GET requests with a dynamic path, constructs a GitHub URL - using the `full_path` parameter, and renders the `github.jinja` template with that URL. + This endpoint catches all GET requests with a dynamic path, constructs a Git URL + using the `full_path` parameter, and renders the `git.jinja` template with that URL. Parameters ---------- request : Request The incoming request object, which provides context for rendering the response. full_path : str - The full path extracted from the URL, which is used to build the GitHub URL. + The full path extracted from the URL, which is used to build the Git URL. Returns ------- HTMLResponse - An HTML response containing the rendered template, with the GitHub URL + An HTML response containing the rendered template, with the Git URL and other default parameters such as loading state and file size. """ return templates.TemplateResponse( - "github.jinja", + "git.jinja", { "request": request, - "github_url": f"https://github.com/{full_path}", + "repo_url": full_path, "loading": True, "default_file_size": 243, }, @@ -53,7 +53,7 @@ async def process_catch_all( pattern: str = Form(...), ) -> HTMLResponse: """ - Processes the form submission with user input for query parameters. + Process the form submission with user input for query parameters. This endpoint handles POST requests, processes the input parameters (e.g., text, file size, pattern), and calls the `process_query` function to handle the query logic, returning the result as an HTML response. diff --git a/src/routers/index.py b/src/routers/index.py index 70a3f6d2..b338c301 100644 --- a/src/routers/index.py +++ b/src/routers/index.py @@ -15,7 +15,7 @@ @router.get("/", response_class=HTMLResponse) async def home(request: Request) -> HTMLResponse: """ - Renders the home page with example repositories and default parameters. + Render the home page with example repositories and default parameters. This endpoint serves the home page of the application, rendering the `index.jinja` template and providing it with a list of example repositories and default file size values. @@ -51,7 +51,7 @@ async def index_post( pattern: str = Form(...), ) -> HTMLResponse: """ - Processes the form submission with user input for query parameters. + Process the form submission with user input for query parameters. This endpoint handles POST requests from the home page form. It processes the user-submitted input (e.g., text, file size, pattern type) and invokes the `process_query` function to handle diff --git a/src/templates/api.jinja b/src/templates/api.jinja index 85fa0c3b..9bad379a 100644 --- a/src/templates/api.jinja +++ b/src/templates/api.jinja @@ -26,7 +26,7 @@ open an issue on github + class="text-[#6e5000] hover:underline">Open an issue on GitHub to suggest features.

diff --git a/src/templates/base.jinja b/src/templates/base.jinja index 7c8359cf..a6e30bf5 100644 --- a/src/templates/base.jinja +++ b/src/templates/base.jinja @@ -6,7 +6,7 @@ + content="Replace 'hub' with 'ingest' in any GitHub URL for a prompt-friendly text."> @@ -28,7 +28,7 @@ + content="Replace 'hub' with 'ingest' in any GitHub URL for a prompt-friendly text."> diff --git a/src/templates/components/footer.jinja b/src/templates/components/footer.jinja index 61fadb28..1a8f3e6e 100644 --- a/src/templates/components/footer.jinja +++ b/src/templates/components/footer.jinja @@ -1,7 +1,7 @@