From 8959d188adf68b908f59e56ee0f40e556faf46c9 Mon Sep 17 00:00:00 2001 From: Filip Christiansen <22807962+filipchristiansen@users.noreply.github.com> Date: Sun, 19 Jan 2025 15:38:48 +0100 Subject: [PATCH 1/7] Refactor project into a dedicated 'server' module and update all references accordingly --- Dockerfile | 2 +- src/routers/__init__.py | 7 ------- src/server/__init__.py | 0 src/{ => server}/main.py | 10 +++++----- src/{ => server}/query_processor.py | 4 ++-- src/server/routers/__init__.py | 7 +++++++ src/{ => server}/routers/download.py | 0 src/{ => server}/routers/dynamic.py | 6 +++--- src/{ => server}/routers/index.py | 6 +++--- src/{ => server}/server_utils.py | 0 src/{ => server}/static/apple-touch-icon.png | Bin src/{ => server}/static/favicon-64.png | Bin src/{ => server}/static/favicon.ico | Bin src/{ => server}/static/favicon.svg | 0 src/{ => server}/static/js/utils.js | 0 src/{ => server}/static/og-image.png | Bin src/{ => server}/static/robots.txt | 0 src/{ => server}/templates/api.jinja | 0 src/{ => server}/templates/base.jinja | 12 ++++++------ src/{ => server}/templates/components/footer.jinja | 0 .../templates/components/git_form.jinja | 0 src/{ => server}/templates/components/navbar.jinja | 0 src/{ => server}/templates/components/result.jinja | 0 src/{ => server}/templates/git.jinja | 0 src/{ => server}/templates/index.jinja | 0 25 files changed, 27 insertions(+), 27 deletions(-) delete mode 100644 src/routers/__init__.py create mode 100644 src/server/__init__.py rename src/{ => server}/main.py (95%) rename src/{ => server}/query_processor.py (98%) create mode 100644 src/server/routers/__init__.py rename src/{ => server}/routers/download.py (100%) rename src/{ => server}/routers/dynamic.py (94%) rename src/{ => server}/routers/index.py (94%) rename src/{ => server}/server_utils.py (100%) rename src/{ => server}/static/apple-touch-icon.png (100%) rename src/{ => server}/static/favicon-64.png (100%) rename src/{ => server}/static/favicon.ico (100%) rename src/{ => server}/static/favicon.svg (100%) rename src/{ => server}/static/js/utils.js (100%) rename src/{ => server}/static/og-image.png (100%) rename src/{ => server}/static/robots.txt (100%) rename src/{ => server}/templates/api.jinja (100%) rename src/{ => server}/templates/base.jinja (83%) rename src/{ => server}/templates/components/footer.jinja (100%) rename src/{ => server}/templates/components/git_form.jinja (100%) rename src/{ => server}/templates/components/navbar.jinja (100%) rename src/{ => server}/templates/components/result.jinja (100%) rename src/{ => server}/templates/git.jinja (100%) rename src/{ => server}/templates/index.jinja (100%) diff --git a/Dockerfile b/Dockerfile index cb0eab80..63577a98 100644 --- a/Dockerfile +++ b/Dockerfile @@ -41,4 +41,4 @@ USER appuser EXPOSE 8000 -CMD ["python", "-m", "uvicorn", "main:app", "--host", "0.0.0.0", "--port", "8000"] +CMD ["python", "-m", "uvicorn", "server.main:app", "--host", "0.0.0.0", "--port", "8000"] diff --git a/src/routers/__init__.py b/src/routers/__init__.py deleted file mode 100644 index d8d24093..00000000 --- a/src/routers/__init__.py +++ /dev/null @@ -1,7 +0,0 @@ -""" This module contains the routers for the FastAPI application. """ - -from routers.download import router as download -from routers.dynamic import router as dynamic -from routers.index import router as index - -__all__ = ["download", "dynamic", "index"] diff --git a/src/server/__init__.py b/src/server/__init__.py new file mode 100644 index 00000000..e69de29b diff --git a/src/main.py b/src/server/main.py similarity index 95% rename from src/main.py rename to src/server/main.py index 556b3e1d..0554b1f3 100644 --- a/src/main.py +++ b/src/server/main.py @@ -18,8 +18,8 @@ from starlette.middleware.trustedhost import TrustedHostMiddleware from config import DELETE_REPO_AFTER, TMP_BASE_PATH -from routers import download, dynamic, index -from server_utils import limiter +from server.routers import download, dynamic, index +from server.server_utils import limiter # Load environment variables from .env file load_dotenv() @@ -156,7 +156,7 @@ async def rate_limit_exception_handler(request: Request, exc: Exception) -> Resp app.add_exception_handler(RateLimitExceeded, rate_limit_exception_handler) # Mount static files to serve CSS, JS, and other static assets -app.mount("/static", StaticFiles(directory="static"), name="static") +app.mount("/server/static", StaticFiles(directory="server/static"), name="static") # Set up API analytics middleware if an API key is provided if app_analytics_key := os.getenv("API_ANALYTICS_KEY"): @@ -175,7 +175,7 @@ async def rate_limit_exception_handler(request: Request, exc: Exception) -> Resp app.add_middleware(TrustedHostMiddleware, allowed_hosts=allowed_hosts) # Set up template rendering -templates = Jinja2Templates(directory="templates") +templates = Jinja2Templates(directory="server/templates") @app.get("/health") @@ -235,7 +235,7 @@ async def robots() -> FileResponse: FileResponse The `robots.txt` file located in the static directory. """ - return FileResponse("static/robots.txt") + return FileResponse("server/static/robots.txt") # Include routers for modular endpoints diff --git a/src/query_processor.py b/src/server/query_processor.py similarity index 98% rename from src/query_processor.py rename to src/server/query_processor.py index 62f1c83f..d8ec7da1 100644 --- a/src/query_processor.py +++ b/src/server/query_processor.py @@ -10,9 +10,9 @@ from gitingest.query_ingestion import run_ingest_query from gitingest.query_parser import ParsedQuery, parse_query from gitingest.repository_clone import CloneConfig, clone_repo -from server_utils import Colors, log_slider_to_size +from server.server_utils import Colors, log_slider_to_size -templates = Jinja2Templates(directory="templates") +templates = Jinja2Templates(directory="server/templates") async def process_query( diff --git a/src/server/routers/__init__.py b/src/server/routers/__init__.py new file mode 100644 index 00000000..ae6666b1 --- /dev/null +++ b/src/server/routers/__init__.py @@ -0,0 +1,7 @@ +""" This module contains the routers for the FastAPI application. """ + +from server.routers.download import router as download +from server.routers.dynamic import router as dynamic +from server.routers.index import router as index + +__all__ = ["download", "dynamic", "index"] diff --git a/src/routers/download.py b/src/server/routers/download.py similarity index 100% rename from src/routers/download.py rename to src/server/routers/download.py diff --git a/src/routers/dynamic.py b/src/server/routers/dynamic.py similarity index 94% rename from src/routers/dynamic.py rename to src/server/routers/dynamic.py index 0787fbfa..d08836a6 100644 --- a/src/routers/dynamic.py +++ b/src/server/routers/dynamic.py @@ -4,11 +4,11 @@ from fastapi.responses import HTMLResponse from fastapi.templating import Jinja2Templates -from query_processor import process_query -from server_utils import limiter +from server.query_processor import process_query +from server.server_utils import limiter router = APIRouter() -templates = Jinja2Templates(directory="templates") +templates = Jinja2Templates(directory="server/templates") @router.get("/{full_path:path}") diff --git a/src/routers/index.py b/src/server/routers/index.py similarity index 94% rename from src/routers/index.py rename to src/server/routers/index.py index b338c301..1ae33c85 100644 --- a/src/routers/index.py +++ b/src/server/routers/index.py @@ -5,11 +5,11 @@ from fastapi.templating import Jinja2Templates from config import EXAMPLE_REPOS -from query_processor import process_query -from server_utils import limiter +from server.query_processor import process_query +from server.server_utils import limiter router = APIRouter() -templates = Jinja2Templates(directory="templates") +templates = Jinja2Templates(directory="server/templates") @router.get("/", response_class=HTMLResponse) diff --git a/src/server_utils.py b/src/server/server_utils.py similarity index 100% rename from src/server_utils.py rename to src/server/server_utils.py diff --git a/src/static/apple-touch-icon.png b/src/server/static/apple-touch-icon.png similarity index 100% rename from src/static/apple-touch-icon.png rename to src/server/static/apple-touch-icon.png diff --git a/src/static/favicon-64.png b/src/server/static/favicon-64.png similarity index 100% rename from src/static/favicon-64.png rename to src/server/static/favicon-64.png diff --git a/src/static/favicon.ico b/src/server/static/favicon.ico similarity index 100% rename from src/static/favicon.ico rename to src/server/static/favicon.ico diff --git a/src/static/favicon.svg b/src/server/static/favicon.svg similarity index 100% rename from src/static/favicon.svg rename to src/server/static/favicon.svg diff --git a/src/static/js/utils.js b/src/server/static/js/utils.js similarity index 100% rename from src/static/js/utils.js rename to src/server/static/js/utils.js diff --git a/src/static/og-image.png b/src/server/static/og-image.png similarity index 100% rename from src/static/og-image.png rename to src/server/static/og-image.png diff --git a/src/static/robots.txt b/src/server/static/robots.txt similarity index 100% rename from src/static/robots.txt rename to src/server/static/robots.txt diff --git a/src/templates/api.jinja b/src/server/templates/api.jinja similarity index 100% rename from src/templates/api.jinja rename to src/server/templates/api.jinja diff --git a/src/templates/base.jinja b/src/server/templates/base.jinja similarity index 83% rename from src/templates/base.jinja rename to src/server/templates/base.jinja index a6e30bf5..99b99a97 100644 --- a/src/templates/base.jinja +++ b/src/server/templates/base.jinja @@ -3,7 +3,7 @@ - + @@ -11,14 +11,14 @@ content="Gitingest, AI tools, LLM integration, Ingest, Digest, Context, Prompt, Git workflow, codebase extraction, Git repository, Git automation, Summarize, prompt-friendly"> - + + href="/server/static/favicon-64.png"> + href="/server/static/apple-touch-icon.png"> @@ -31,12 +31,12 @@ content="Replace 'hub' with 'ingest' in any GitHub URL for a prompt-friendly text."> - + {% block title %}Gitingest{% endblock %} - + {% block extra_head %}{% endblock %} diff --git a/src/templates/components/footer.jinja b/src/server/templates/components/footer.jinja similarity index 100% rename from src/templates/components/footer.jinja rename to src/server/templates/components/footer.jinja diff --git a/src/templates/components/git_form.jinja b/src/server/templates/components/git_form.jinja similarity index 100% rename from src/templates/components/git_form.jinja rename to src/server/templates/components/git_form.jinja diff --git a/src/templates/components/navbar.jinja b/src/server/templates/components/navbar.jinja similarity index 100% rename from src/templates/components/navbar.jinja rename to src/server/templates/components/navbar.jinja diff --git a/src/templates/components/result.jinja b/src/server/templates/components/result.jinja similarity index 100% rename from src/templates/components/result.jinja rename to src/server/templates/components/result.jinja diff --git a/src/templates/git.jinja b/src/server/templates/git.jinja similarity index 100% rename from src/templates/git.jinja rename to src/server/templates/git.jinja diff --git a/src/templates/index.jinja b/src/server/templates/index.jinja similarity index 100% rename from src/templates/index.jinja rename to src/server/templates/index.jinja From d243db9107b5c8e51c28371168795f2188717394 Mon Sep 17 00:00:00 2001 From: Filip Christiansen <22807962+filipchristiansen@users.noreply.github.com> Date: Tue, 21 Jan 2025 07:20:20 +0100 Subject: [PATCH 2/7] move static to src/ --- src/server/main.py | 4 ++-- src/server/templates/base.jinja | 12 ++++++------ src/{server => }/static/apple-touch-icon.png | Bin src/{server => }/static/favicon-64.png | Bin src/{server => }/static/favicon.ico | Bin src/{server => }/static/favicon.svg | 0 src/{server => }/static/js/utils.js | 0 src/{server => }/static/og-image.png | Bin src/{server => }/static/robots.txt | 0 9 files changed, 8 insertions(+), 8 deletions(-) rename src/{server => }/static/apple-touch-icon.png (100%) rename src/{server => }/static/favicon-64.png (100%) rename src/{server => }/static/favicon.ico (100%) rename src/{server => }/static/favicon.svg (100%) rename src/{server => }/static/js/utils.js (100%) rename src/{server => }/static/og-image.png (100%) rename src/{server => }/static/robots.txt (100%) diff --git a/src/server/main.py b/src/server/main.py index 0554b1f3..e43773a6 100644 --- a/src/server/main.py +++ b/src/server/main.py @@ -156,7 +156,7 @@ async def rate_limit_exception_handler(request: Request, exc: Exception) -> Resp app.add_exception_handler(RateLimitExceeded, rate_limit_exception_handler) # Mount static files to serve CSS, JS, and other static assets -app.mount("/server/static", StaticFiles(directory="server/static"), name="static") +app.mount("/static", StaticFiles(directory="static"), name="static") # Set up API analytics middleware if an API key is provided if app_analytics_key := os.getenv("API_ANALYTICS_KEY"): @@ -235,7 +235,7 @@ async def robots() -> FileResponse: FileResponse The `robots.txt` file located in the static directory. """ - return FileResponse("server/static/robots.txt") + return FileResponse("static/robots.txt") # Include routers for modular endpoints diff --git a/src/server/templates/base.jinja b/src/server/templates/base.jinja index 99b99a97..a6e30bf5 100644 --- a/src/server/templates/base.jinja +++ b/src/server/templates/base.jinja @@ -3,7 +3,7 @@ - + @@ -11,14 +11,14 @@ content="Gitingest, AI tools, LLM integration, Ingest, Digest, Context, Prompt, Git workflow, codebase extraction, Git repository, Git automation, Summarize, prompt-friendly"> - + + href="/static/favicon-64.png"> + href="/static/apple-touch-icon.png"> @@ -31,12 +31,12 @@ content="Replace 'hub' with 'ingest' in any GitHub URL for a prompt-friendly text."> - + {% block title %}Gitingest{% endblock %} - + {% block extra_head %}{% endblock %} diff --git a/src/server/static/apple-touch-icon.png b/src/static/apple-touch-icon.png similarity index 100% rename from src/server/static/apple-touch-icon.png rename to src/static/apple-touch-icon.png diff --git a/src/server/static/favicon-64.png b/src/static/favicon-64.png similarity index 100% rename from src/server/static/favicon-64.png rename to src/static/favicon-64.png diff --git a/src/server/static/favicon.ico b/src/static/favicon.ico similarity index 100% rename from src/server/static/favicon.ico rename to src/static/favicon.ico diff --git a/src/server/static/favicon.svg b/src/static/favicon.svg similarity index 100% rename from src/server/static/favicon.svg rename to src/static/favicon.svg diff --git a/src/server/static/js/utils.js b/src/static/js/utils.js similarity index 100% rename from src/server/static/js/utils.js rename to src/static/js/utils.js diff --git a/src/server/static/og-image.png b/src/static/og-image.png similarity index 100% rename from src/server/static/og-image.png rename to src/static/og-image.png diff --git a/src/server/static/robots.txt b/src/static/robots.txt similarity index 100% rename from src/server/static/robots.txt rename to src/static/robots.txt From 1a1d587c6b910235b6bd522b4fcf23b685232238 Mon Sep 17 00:00:00 2001 From: Filip Christiansen <22807962+filipchristiansen@users.noreply.github.com> Date: Tue, 21 Jan 2025 07:22:59 +0100 Subject: [PATCH 3/7] merge --- CONTRIBUTING.md | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/CONTRIBUTING.md b/CONTRIBUTING.md index a393d1d9..9781d97a 100644 --- a/CONTRIBUTING.md +++ b/CONTRIBUTING.md @@ -48,18 +48,18 @@ Thanks for your interest in contributing to Gitingest! 🚀 Gitingest aims to be pytest ``` -8. Run the app locally using Docker to test your changes (optional): +8. Navigate to src folder 1. Build the Docker image ``` bash - docker build -t gitingest . + cd src ``` - 2. Run the Docker container: + 2. Run the local web server: ``` bash - docker run -d --name gitingest -p 8000:8000 gitingest + uvicorn server.main:app ``` 3. Open your browser and navigate to `http://localhost:8000` to see the app running. From 9e48837f587b1555e6cb10f1f6dd0e8f808be63e Mon Sep 17 00:00:00 2001 From: Filip Christiansen <22807962+filipchristiansen@users.noreply.github.com> Date: Tue, 21 Jan 2025 12:48:53 +0100 Subject: [PATCH 4/7] fix config --- src/gitingest/cli.py | 2 +- src/gitingest/config.py | 10 ++++++++++ src/gitingest/query_ingestion.py | 2 +- src/gitingest/query_parser.py | 4 ++-- src/gitingest/repository_ingest.py | 2 +- src/server/main.py | 2 +- src/server/query_processor.py | 2 +- src/server/routers/download.py | 2 +- src/server/routers/index.py | 2 +- src/{config.py => server/server_config.py} | 7 +------ 10 files changed, 20 insertions(+), 15 deletions(-) create mode 100644 src/gitingest/config.py rename src/{config.py => server/server_config.py} (67%) diff --git a/src/gitingest/cli.py b/src/gitingest/cli.py index a21a4533..49a6c6c5 100644 --- a/src/gitingest/cli.py +++ b/src/gitingest/cli.py @@ -6,7 +6,7 @@ import click -from config import MAX_FILE_SIZE +from gitingest.config import MAX_FILE_SIZE from gitingest.repository_ingest import ingest diff --git a/src/gitingest/config.py b/src/gitingest/config.py new file mode 100644 index 00000000..88dd01d0 --- /dev/null +++ b/src/gitingest/config.py @@ -0,0 +1,10 @@ +""" Configuration file for the project. """ + +from pathlib import Path + +MAX_FILE_SIZE = 10 * 1024 * 1024 # 10 MB +MAX_DIRECTORY_DEPTH = 20 # Maximum depth of directory traversal +MAX_FILES = 10_000 # Maximum number of files to process +MAX_TOTAL_SIZE_BYTES = 500 * 1024 * 1024 # 500 MB + +TMP_BASE_PATH = Path("/tmp/gitingest") diff --git a/src/gitingest/query_ingestion.py b/src/gitingest/query_ingestion.py index a6f94d23..a0bcfdf7 100644 --- a/src/gitingest/query_ingestion.py +++ b/src/gitingest/query_ingestion.py @@ -6,7 +6,7 @@ import tiktoken -from config import MAX_DIRECTORY_DEPTH, MAX_FILES, MAX_TOTAL_SIZE_BYTES +from gitingest.config import MAX_DIRECTORY_DEPTH, MAX_FILES, MAX_TOTAL_SIZE_BYTES from gitingest.exceptions import ( AlreadyVisitedError, InvalidNotebookError, diff --git a/src/gitingest/query_parser.py b/src/gitingest/query_parser.py index 435a7996..e7aff393 100644 --- a/src/gitingest/query_parser.py +++ b/src/gitingest/query_parser.py @@ -9,7 +9,7 @@ from pathlib import Path from urllib.parse import unquote, urlparse -from config import MAX_FILE_SIZE, TMP_BASE_PATH +from gitingest.config import MAX_FILE_SIZE, TMP_BASE_PATH from gitingest.exceptions import InvalidPatternError from gitingest.ignore_patterns import DEFAULT_IGNORE_PATTERNS from gitingest.repository_clone import _check_repo_exists, fetch_remote_branch_list @@ -163,7 +163,7 @@ async def _parse_repo_source(source: str) -> ParsedQuery: _id = str(uuid.uuid4()) slug = f"{user_name}-{repo_name}" - local_path = Path(TMP_BASE_PATH) / _id / slug + local_path = TMP_BASE_PATH / _id / slug url = f"https://{host}/{user_name}/{repo_name}" parsed = ParsedQuery( diff --git a/src/gitingest/repository_ingest.py b/src/gitingest/repository_ingest.py index 64b33ebb..f92c1c2d 100644 --- a/src/gitingest/repository_ingest.py +++ b/src/gitingest/repository_ingest.py @@ -4,7 +4,7 @@ import inspect import shutil -from config import TMP_BASE_PATH +from gitingest.config import TMP_BASE_PATH from gitingest.query_ingestion import run_ingest_query from gitingest.query_parser import ParsedQuery, parse_query from gitingest.repository_clone import CloneConfig, clone_repo diff --git a/src/server/main.py b/src/server/main.py index e43773a6..58bf2351 100644 --- a/src/server/main.py +++ b/src/server/main.py @@ -17,8 +17,8 @@ from slowapi.errors import RateLimitExceeded from starlette.middleware.trustedhost import TrustedHostMiddleware -from config import DELETE_REPO_AFTER, TMP_BASE_PATH from server.routers import download, dynamic, index +from server.server_config import DELETE_REPO_AFTER, TMP_BASE_PATH from server.server_utils import limiter # Load environment variables from .env file diff --git a/src/server/query_processor.py b/src/server/query_processor.py index d8ec7da1..b28ec047 100644 --- a/src/server/query_processor.py +++ b/src/server/query_processor.py @@ -6,10 +6,10 @@ from fastapi.templating import Jinja2Templates from starlette.templating import _TemplateResponse -from config import EXAMPLE_REPOS, MAX_DISPLAY_SIZE from gitingest.query_ingestion import run_ingest_query from gitingest.query_parser import ParsedQuery, parse_query from gitingest.repository_clone import CloneConfig, clone_repo +from server.server_config import EXAMPLE_REPOS, MAX_DISPLAY_SIZE from server.server_utils import Colors, log_slider_to_size templates = Jinja2Templates(directory="server/templates") diff --git a/src/server/routers/download.py b/src/server/routers/download.py index b4da647c..7afaaf38 100644 --- a/src/server/routers/download.py +++ b/src/server/routers/download.py @@ -3,7 +3,7 @@ from fastapi import APIRouter, HTTPException from fastapi.responses import Response -from config import TMP_BASE_PATH +from server.server_config import TMP_BASE_PATH router = APIRouter() diff --git a/src/server/routers/index.py b/src/server/routers/index.py index 1ae33c85..877d05fe 100644 --- a/src/server/routers/index.py +++ b/src/server/routers/index.py @@ -4,8 +4,8 @@ from fastapi.responses import HTMLResponse from fastapi.templating import Jinja2Templates -from config import EXAMPLE_REPOS from server.query_processor import process_query +from server.server_config import EXAMPLE_REPOS from server.server_utils import limiter router = APIRouter() diff --git a/src/config.py b/src/server/server_config.py similarity index 67% rename from src/config.py rename to src/server/server_config.py index 7365ab8b..6310667d 100644 --- a/src/config.py +++ b/src/server/server_config.py @@ -1,12 +1,7 @@ -""" Configuration file for the project. """ +""" Configuration for the server. """ from pathlib import Path -MAX_FILE_SIZE = 10 * 1024 * 1024 # 10 MB -MAX_DIRECTORY_DEPTH = 20 # Maximum depth of directory traversal -MAX_FILES = 10_000 # Maximum number of files to process -MAX_TOTAL_SIZE_BYTES = 500 * 1024 * 1024 # 500 MB - MAX_DISPLAY_SIZE: int = 300_000 TMP_BASE_PATH = Path("/tmp/gitingest") DELETE_REPO_AFTER: int = 60 * 60 # In seconds From 693ff27f5c7606e9595708973843bb41219ebc20 Mon Sep 17 00:00:00 2001 From: Romain Courtois Date: Fri, 24 Jan 2025 05:54:57 +0000 Subject: [PATCH 5/7] move utils in server --- src/gitingest/config.py | 1 + src/server/main.py | 8 +- src/server/routers/download.py | 2 +- src/server/routers/dynamic.py | 2 +- src/server/server_config.py | 4 +- src/server/server_utils.py | 141 ++++++++++++++++++++++++++++++++- src/utils.py | 138 -------------------------------- tests/test_cli.py | 2 +- 8 files changed, 148 insertions(+), 150 deletions(-) delete mode 100644 src/utils.py diff --git a/src/gitingest/config.py b/src/gitingest/config.py index 88dd01d0..291942f3 100644 --- a/src/gitingest/config.py +++ b/src/gitingest/config.py @@ -7,4 +7,5 @@ MAX_FILES = 10_000 # Maximum number of files to process MAX_TOTAL_SIZE_BYTES = 500 * 1024 * 1024 # 500 MB +OUTPUT_FILE_PATH = "digest.txt" TMP_BASE_PATH = Path("/tmp/gitingest") diff --git a/src/server/main.py b/src/server/main.py index bbf1486f..fbfb9e94 100644 --- a/src/server/main.py +++ b/src/server/main.py @@ -10,9 +10,10 @@ from slowapi.errors import RateLimitExceeded from starlette.middleware.trustedhost import TrustedHostMiddleware +from gitingest.config import TMP_BASE_PATH from server.routers import download, dynamic, index -from server.server_config import DELETE_REPO_AFTER, TMP_BASE_PATH -from server.server_utils import limiter +from server.server_config import DELETE_REPO_AFTER, templates +from server.server_utils import lifespan, limiter, rate_limit_exception_handler # Load environment variables from .env file load_dotenv() @@ -43,9 +44,6 @@ # Add middleware to enforce allowed hosts app.add_middleware(TrustedHostMiddleware, allowed_hosts=allowed_hosts) -# Set up template rendering -templates = Jinja2Templates(directory="server/templates") - @app.get("/health") async def health_check() -> dict[str, str]: diff --git a/src/server/routers/download.py b/src/server/routers/download.py index 7afaaf38..b868444d 100644 --- a/src/server/routers/download.py +++ b/src/server/routers/download.py @@ -3,7 +3,7 @@ from fastapi import APIRouter, HTTPException from fastapi.responses import Response -from server.server_config import TMP_BASE_PATH +from gitingest.config import TMP_BASE_PATH router = APIRouter() diff --git a/src/server/routers/dynamic.py b/src/server/routers/dynamic.py index c4dd560d..74febf8d 100644 --- a/src/server/routers/dynamic.py +++ b/src/server/routers/dynamic.py @@ -4,10 +4,10 @@ from fastapi.responses import HTMLResponse from server.query_processor import process_query +from server.server_config import templates from server.server_utils import limiter router = APIRouter() -templates = Jinja2Templates(directory="server/templates") @router.get("/{full_path:path}") diff --git a/src/server/server_config.py b/src/server/server_config.py index da9ab217..8c34c399 100644 --- a/src/server/server_config.py +++ b/src/server/server_config.py @@ -7,8 +7,6 @@ MAX_DISPLAY_SIZE: int = 300_000 DELETE_REPO_AFTER: int = 60 * 60 # In seconds -OUTPUT_FILE_PATH = "digest.txt" -TMP_BASE_PATH = Path("/tmp/gitingest") EXAMPLE_REPOS: list[dict[str, str]] = [ {"name": "Gitingest", "url": "https://github.com/cyclotruc/gitingest"}, @@ -18,4 +16,4 @@ {"name": "ApiAnalytics", "url": "https://github.com/tom-draper/api-analytics"}, ] -templates = Jinja2Templates(directory="templates") +templates = Jinja2Templates(directory="server/templates") diff --git a/src/server/server_utils.py b/src/server/server_utils.py index 432bbb2a..97b49751 100644 --- a/src/server/server_utils.py +++ b/src/server/server_utils.py @@ -1,14 +1,153 @@ """ Utility functions for the server. """ +import asyncio import math +import shutil +import time +from contextlib import asynccontextmanager +from pathlib import Path -from slowapi import Limiter +from fastapi import FastAPI, Request +from fastapi.responses import Response +from slowapi import Limiter, _rate_limit_exceeded_handler +from slowapi.errors import RateLimitExceeded from slowapi.util import get_remote_address +from gitingest.config import TMP_BASE_PATH +from server.server_config import DELETE_REPO_AFTER + # Initialize a rate limiter limiter = Limiter(key_func=get_remote_address) + + + + +async def rate_limit_exception_handler(request: Request, exc: Exception) -> Response: + """ + Custom exception handler for rate-limiting errors. + + Parameters + ---------- + request : Request + The incoming HTTP request. + exc : Exception + The exception raised, expected to be RateLimitExceeded. + + Returns + ------- + Response + A response indicating that the rate limit has been exceeded. + + Raises + ------ + exc + If the exception is not a RateLimitExceeded error, it is re-raised. + """ + if isinstance(exc, RateLimitExceeded): + # Delegate to the default rate limit handler + return _rate_limit_exceeded_handler(request, exc) + # Re-raise other exceptions + raise exc + + +@asynccontextmanager +async def lifespan(_: FastAPI): + """ + Lifecycle manager for handling startup and shutdown events for the FastAPI application. + + Parameters + ---------- + _ : FastAPI + The FastAPI application instance (unused). + + Yields + ------- + None + Yields control back to the FastAPI application while the background task runs. + """ + task = asyncio.create_task(_remove_old_repositories()) + + yield + # Cancel the background task on shutdown + task.cancel() + try: + await task + except asyncio.CancelledError: + pass + + +async def _remove_old_repositories(): + """ + Periodically remove old repository folders. + + Background task that runs periodically to clean up old repository directories. + + This task: + - Scans the TMP_BASE_PATH directory every 60 seconds + - Removes directories older than DELETE_REPO_AFTER seconds + - Before deletion, logs repository URLs to history.txt if a matching .txt file exists + - Handles errors gracefully if deletion fails + + The repository URL is extracted from the first .txt file in each directory, + assuming the filename format: "owner-repository.txt" + """ + while True: + try: + if not TMP_BASE_PATH.exists(): + await asyncio.sleep(60) + continue + + current_time = time.time() + + for folder in TMP_BASE_PATH.iterdir(): + if folder.is_dir(): + continue + + # Skip if folder is not old enough + if current_time - folder.stat().st_ctime <= DELETE_REPO_AFTER: + continue + + await _process_folder(folder) + + except Exception as e: + print(f"Error in _remove_old_repositories: {e}") + + await asyncio.sleep(60) + + +async def _process_folder(folder: Path) -> None: + """ + Process a single folder for deletion and logging. + + Parameters + ---------- + folder : Path + The path to the folder to be processed. + """ + # Try to log repository URL before deletion + try: + txt_files = [f for f in folder.iterdir() if f.suffix == ".txt"] + + # Extract owner and repository name from the filename + if txt_files and "-" in (filename := txt_files[0].stem): + owner, repo = filename.split("-", 1) + repo_url = f"{owner}/{repo}" + + with open("history.txt", mode="a", encoding="utf-8") as history: + history.write(f"{repo_url}\n") + + except Exception as e: + print(f"Error logging repository URL for {folder}: {e}") + + # Delete the folder + try: + shutil.rmtree(folder) + except Exception as e: + print(f"Error deleting {folder}: {e}") + + def log_slider_to_size(position: int) -> int: """ Convert a slider position to a file size in bytes using a logarithmic scale. diff --git a/src/utils.py b/src/utils.py deleted file mode 100644 index 7c968dc0..00000000 --- a/src/utils.py +++ /dev/null @@ -1,138 +0,0 @@ -""" Utility functions for the FastAPI server. """ - -import asyncio -import shutil -import time -from contextlib import asynccontextmanager -from pathlib import Path - -from fastapi import FastAPI, Request -from fastapi.responses import Response -from slowapi import _rate_limit_exceeded_handler -from slowapi.errors import RateLimitExceeded - -from config import DELETE_REPO_AFTER, TMP_BASE_PATH - - -async def rate_limit_exception_handler(request: Request, exc: Exception) -> Response: - """ - Custom exception handler for rate-limiting errors. - - Parameters - ---------- - request : Request - The incoming HTTP request. - exc : Exception - The exception raised, expected to be RateLimitExceeded. - - Returns - ------- - Response - A response indicating that the rate limit has been exceeded. - - Raises - ------ - exc - If the exception is not a RateLimitExceeded error, it is re-raised. - """ - if isinstance(exc, RateLimitExceeded): - # Delegate to the default rate limit handler - return _rate_limit_exceeded_handler(request, exc) - # Re-raise other exceptions - raise exc - - -@asynccontextmanager -async def lifespan(_: FastAPI): - """ - Lifecycle manager for handling startup and shutdown events for the FastAPI application. - - Parameters - ---------- - _ : FastAPI - The FastAPI application instance (unused). - - Yields - ------- - None - Yields control back to the FastAPI application while the background task runs. - """ - task = asyncio.create_task(_remove_old_repositories()) - - yield - # Cancel the background task on shutdown - task.cancel() - try: - await task - except asyncio.CancelledError: - pass - - -async def _remove_old_repositories(): - """ - Periodically remove old repository folders. - - Background task that runs periodically to clean up old repository directories. - - This task: - - Scans the TMP_BASE_PATH directory every 60 seconds - - Removes directories older than DELETE_REPO_AFTER seconds - - Before deletion, logs repository URLs to history.txt if a matching .txt file exists - - Handles errors gracefully if deletion fails - - The repository URL is extracted from the first .txt file in each directory, - assuming the filename format: "owner-repository.txt" - """ - while True: - try: - if not TMP_BASE_PATH.exists(): - await asyncio.sleep(60) - continue - - current_time = time.time() - - for folder in TMP_BASE_PATH.iterdir(): - if folder.is_dir(): - continue - - # Skip if folder is not old enough - if current_time - folder.stat().st_ctime <= DELETE_REPO_AFTER: - continue - - await _process_folder(folder) - - except Exception as e: - print(f"Error in _remove_old_repositories: {e}") - - await asyncio.sleep(60) - - -async def _process_folder(folder: Path) -> None: - """ - Process a single folder for deletion and logging. - - Parameters - ---------- - folder : Path - The path to the folder to be processed. - """ - # Try to log repository URL before deletion - try: - txt_files = [f for f in folder.iterdir() if f.suffix == ".txt"] - - # Extract owner and repository name from the filename - if txt_files and "-" in (filename := txt_files[0].stem): - owner, repo = filename.split("-", 1) - repo_url = f"{owner}/{repo}" - - with open("history.txt", mode="a", encoding="utf-8") as history: - history.write(f"{repo_url}\n") - - except Exception as e: - print(f"Error logging repository URL for {folder}: {e}") - - # Delete the folder - try: - shutil.rmtree(folder) - except Exception as e: - print(f"Error deleting {folder}: {e}") diff --git a/tests/test_cli.py b/tests/test_cli.py index 86bd2271..0b652390 100644 --- a/tests/test_cli.py +++ b/tests/test_cli.py @@ -4,8 +4,8 @@ from click.testing import CliRunner -from config import MAX_FILE_SIZE, OUTPUT_FILE_PATH from gitingest.cli import main +from gitingest.config import MAX_FILE_SIZE, OUTPUT_FILE_PATH def test_cli_with_default_options(): From 4c26757408f5b2a6fdfa0287be61bcabb0f5c400 Mon Sep 17 00:00:00 2001 From: Romain Courtois Date: Fri, 24 Jan 2025 06:06:26 +0000 Subject: [PATCH 6/7] refactor: remove unnecessary blank lines in query_processor, index, and server_utils --- src/server/query_processor.py | 1 - src/server/routers/index.py | 1 - src/server/server_utils.py | 4 ---- 3 files changed, 6 deletions(-) diff --git a/src/server/query_processor.py b/src/server/query_processor.py index 21eb991d..69fcfc58 100644 --- a/src/server/query_processor.py +++ b/src/server/query_processor.py @@ -5,7 +5,6 @@ from fastapi import Request from starlette.templating import _TemplateResponse - from gitingest.query_ingestion import run_ingest_query from gitingest.query_parser import ParsedQuery, parse_query from gitingest.repository_clone import CloneConfig, clone_repo diff --git a/src/server/routers/index.py b/src/server/routers/index.py index ae2c6916..5b08a244 100644 --- a/src/server/routers/index.py +++ b/src/server/routers/index.py @@ -10,7 +10,6 @@ router = APIRouter() - @router.get("/", response_class=HTMLResponse) async def home(request: Request) -> HTMLResponse: """ diff --git a/src/server/server_utils.py b/src/server/server_utils.py index 97b49751..a316346e 100644 --- a/src/server/server_utils.py +++ b/src/server/server_utils.py @@ -20,10 +20,6 @@ limiter = Limiter(key_func=get_remote_address) - - - - async def rate_limit_exception_handler(request: Request, exc: Exception) -> Response: """ Custom exception handler for rate-limiting errors. From e3be340315fad0b1a2887299e90fc638529c0f49 Mon Sep 17 00:00:00 2001 From: Romain Courtois Date: Fri, 24 Jan 2025 06:06:56 +0000 Subject: [PATCH 7/7] refactor: remove unused imports in main.py and server_config.py --- src/server/main.py | 3 +-- src/server/server_config.py | 2 -- 2 files changed, 1 insertion(+), 4 deletions(-) diff --git a/src/server/main.py b/src/server/main.py index fbfb9e94..50fabc92 100644 --- a/src/server/main.py +++ b/src/server/main.py @@ -10,9 +10,8 @@ from slowapi.errors import RateLimitExceeded from starlette.middleware.trustedhost import TrustedHostMiddleware -from gitingest.config import TMP_BASE_PATH from server.routers import download, dynamic, index -from server.server_config import DELETE_REPO_AFTER, templates +from server.server_config import templates from server.server_utils import lifespan, limiter, rate_limit_exception_handler # Load environment variables from .env file diff --git a/src/server/server_config.py b/src/server/server_config.py index 8c34c399..978a9ba3 100644 --- a/src/server/server_config.py +++ b/src/server/server_config.py @@ -1,7 +1,5 @@ """ Configuration for the server. """ -from pathlib import Path - from fastapi.templating import Jinja2Templates MAX_DISPLAY_SIZE: int = 300_000