Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
4 changes: 4 additions & 0 deletions src/config.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,8 @@

from pathlib import Path

from fastapi.templating import Jinja2Templates

MAX_FILE_SIZE = 10 * 1024 * 1024 # 10 MB
MAX_DIRECTORY_DEPTH = 20 # Maximum depth of directory traversal
MAX_FILES = 10_000 # Maximum number of files to process
Expand All @@ -20,3 +22,5 @@
{"name": "Tldraw", "url": "https://github.com/tldraw/tldraw"},
{"name": "ApiAnalytics", "url": "https://github.com/tom-draper/api-analytics"},
]

templates = Jinja2Templates(directory="templates")
2 changes: 1 addition & 1 deletion src/gitingest/query_parser.py
Original file line number Diff line number Diff line change
Expand Up @@ -227,7 +227,7 @@ async def _configure_branch_and_subpath(remaining_parts: list[str], url: str) ->
# Fetch the list of branches from the remote repository
branches: list[str] = await fetch_remote_branch_list(url)
except RuntimeError as e:
warnings.warn(f"Warning: Failed to fetch branch list: {e}")
warnings.warn(f"Warning: Failed to fetch branch list: {e}", RuntimeWarning)
return remaining_parts.pop(0)

branch = []
Expand Down
139 changes: 3 additions & 136 deletions src/main.py
Original file line number Diff line number Diff line change
@@ -1,157 +1,27 @@
""" Main module for the FastAPI application. """

import asyncio
import os
import shutil
import time
from contextlib import asynccontextmanager
from pathlib import Path

from api_analytics.fastapi import Analytics
from dotenv import load_dotenv
from fastapi import FastAPI, Request
from fastapi.responses import FileResponse, HTMLResponse, Response
from fastapi.responses import FileResponse, HTMLResponse
from fastapi.staticfiles import StaticFiles
from fastapi.templating import Jinja2Templates
from slowapi import _rate_limit_exceeded_handler
from slowapi.errors import RateLimitExceeded
from starlette.middleware.trustedhost import TrustedHostMiddleware

from config import DELETE_REPO_AFTER, TMP_BASE_PATH
from config import templates
from routers import download, dynamic, index
from server_utils import limiter
from utils import lifespan, rate_limit_exception_handler

# Load environment variables from .env file
load_dotenv()


async def remove_old_repositories():
"""
Background task that runs periodically to clean up old repository directories.

This task:
- Scans the TMP_BASE_PATH directory every 60 seconds
- Removes directories older than DELETE_REPO_AFTER seconds
- Before deletion, logs repository URLs to history.txt if a matching .txt file exists
- Handles errors gracefully if deletion fails

The repository URL is extracted from the first .txt file in each directory,
assuming the filename format: "owner-repository.txt"
"""
while True:
try:
if not TMP_BASE_PATH.exists():
await asyncio.sleep(60)
continue

current_time = time.time()

for folder in TMP_BASE_PATH.iterdir():
if not folder.is_dir():
continue

# Skip if folder is not old enough
if current_time - folder.stat().st_ctime <= DELETE_REPO_AFTER:
continue

await process_folder(folder)

except Exception as e:
print(f"Error in remove_old_repositories: {e}")

await asyncio.sleep(60)


async def process_folder(folder: Path) -> None:
"""
Process a single folder for deletion and logging.

Parameters
----------
folder : Path
The path to the folder to be processed.
"""
# Try to log repository URL before deletion
try:
txt_files = [f for f in folder.iterdir() if f.suffix == ".txt"]

# Extract owner and repository name from the filename
if txt_files and "-" in (filename := txt_files[0].stem):
owner, repo = filename.split("-", 1)
repo_url = f"{owner}/{repo}"
with open("history.txt", mode="a", encoding="utf-8") as history:
history.write(f"{repo_url}\n")

except Exception as e:
print(f"Error logging repository URL for {folder}: {e}")

# Delete the folder
try:
shutil.rmtree(folder)
except Exception as e:
print(f"Error deleting {folder}: {e}")


@asynccontextmanager
async def lifespan(_: FastAPI):
"""
Lifecycle manager for the FastAPI application.
Handles startup and shutdown events.

Parameters
----------
_ : FastAPI
The FastAPI application instance (unused).

Yields
-------
None
Yields control back to the FastAPI application while the background task runs.
"""
task = asyncio.create_task(remove_old_repositories())

yield
# Cancel the background task on shutdown
task.cancel()
try:
await task
except asyncio.CancelledError:
pass


# Initialize the FastAPI application with lifespan
app = FastAPI(lifespan=lifespan)
app.state.limiter = limiter


async def rate_limit_exception_handler(request: Request, exc: Exception) -> Response:
"""
Custom exception handler for rate-limiting errors.

Parameters
----------
request : Request
The incoming HTTP request.
exc : Exception
The exception raised, expected to be RateLimitExceeded.

Returns
-------
Response
A response indicating that the rate limit has been exceeded.

Raises
------
exc
If the exception is not a RateLimitExceeded error, it is re-raised.
"""
if isinstance(exc, RateLimitExceeded):
# Delegate to the default rate limit handler
return _rate_limit_exceeded_handler(request, exc)
# Re-raise other exceptions
raise exc


# Register the custom exception handler for rate limits
app.add_exception_handler(RateLimitExceeded, rate_limit_exception_handler)

Expand All @@ -174,9 +44,6 @@ async def rate_limit_exception_handler(request: Request, exc: Exception) -> Resp
# Add middleware to enforce allowed hosts
app.add_middleware(TrustedHostMiddleware, allowed_hosts=allowed_hosts)

# Set up template rendering
templates = Jinja2Templates(directory="templates")


@app.get("/health")
async def health_check() -> dict[str, str]:
Expand Down
5 changes: 1 addition & 4 deletions src/query_processor.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,17 +3,14 @@
from functools import partial

from fastapi import Request
from fastapi.templating import Jinja2Templates
from starlette.templating import _TemplateResponse

from config import EXAMPLE_REPOS, MAX_DISPLAY_SIZE
from config import EXAMPLE_REPOS, MAX_DISPLAY_SIZE, templates
from gitingest.query_ingestion import run_ingest_query
from gitingest.query_parser import ParsedQuery, parse_query
from gitingest.repository_clone import CloneConfig, clone_repo
from server_utils import Colors, log_slider_to_size

templates = Jinja2Templates(directory="templates")


async def process_query(
request: Request,
Expand Down
3 changes: 1 addition & 2 deletions src/routers/dynamic.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,13 +2,12 @@

from fastapi import APIRouter, Form, Request
from fastapi.responses import HTMLResponse
from fastapi.templating import Jinja2Templates

from config import templates
from query_processor import process_query
from server_utils import limiter

router = APIRouter()
templates = Jinja2Templates(directory="templates")


@router.get("/{full_path:path}")
Expand Down
4 changes: 1 addition & 3 deletions src/routers/index.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,14 +2,12 @@

from fastapi import APIRouter, Form, Request
from fastapi.responses import HTMLResponse
from fastapi.templating import Jinja2Templates

from config import EXAMPLE_REPOS
from config import EXAMPLE_REPOS, templates
from query_processor import process_query
from server_utils import limiter

router = APIRouter()
templates = Jinja2Templates(directory="templates")


@router.get("/", response_class=HTMLResponse)
Expand Down
138 changes: 138 additions & 0 deletions src/utils.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,138 @@
""" Utility functions for the FastAPI server. """

import asyncio
import shutil
import time
from contextlib import asynccontextmanager
from pathlib import Path

from fastapi import FastAPI, Request
from fastapi.responses import Response
from slowapi import _rate_limit_exceeded_handler
from slowapi.errors import RateLimitExceeded

from config import DELETE_REPO_AFTER, TMP_BASE_PATH


async def rate_limit_exception_handler(request: Request, exc: Exception) -> Response:
"""
Custom exception handler for rate-limiting errors.

Parameters
----------
request : Request
The incoming HTTP request.
exc : Exception
The exception raised, expected to be RateLimitExceeded.

Returns
-------
Response
A response indicating that the rate limit has been exceeded.

Raises
------
exc
If the exception is not a RateLimitExceeded error, it is re-raised.
"""
if isinstance(exc, RateLimitExceeded):
# Delegate to the default rate limit handler
return _rate_limit_exceeded_handler(request, exc)
# Re-raise other exceptions
raise exc


@asynccontextmanager
async def lifespan(_: FastAPI):
"""
Lifecycle manager for handling startup and shutdown events for the FastAPI application.

Parameters
----------
_ : FastAPI
The FastAPI application instance (unused).

Yields
-------
None
Yields control back to the FastAPI application while the background task runs.
"""
task = asyncio.create_task(_remove_old_repositories())

yield
# Cancel the background task on shutdown
task.cancel()
try:
await task
except asyncio.CancelledError:
pass


async def _remove_old_repositories():
"""
Periodically remove old repository folders.

Background task that runs periodically to clean up old repository directories.

This task:
- Scans the TMP_BASE_PATH directory every 60 seconds
- Removes directories older than DELETE_REPO_AFTER seconds
- Before deletion, logs repository URLs to history.txt if a matching .txt file exists
- Handles errors gracefully if deletion fails

The repository URL is extracted from the first .txt file in each directory,
assuming the filename format: "owner-repository.txt"
"""
while True:
try:
if not TMP_BASE_PATH.exists():
await asyncio.sleep(60)
continue

current_time = time.time()

for folder in TMP_BASE_PATH.iterdir():
if folder.is_dir():
continue

# Skip if folder is not old enough
if current_time - folder.stat().st_ctime <= DELETE_REPO_AFTER:
continue

await _process_folder(folder)

except Exception as e:
print(f"Error in _remove_old_repositories: {e}")

await asyncio.sleep(60)


async def _process_folder(folder: Path) -> None:
"""
Process a single folder for deletion and logging.

Parameters
----------
folder : Path
The path to the folder to be processed.
"""
# Try to log repository URL before deletion
try:
txt_files = [f for f in folder.iterdir() if f.suffix == ".txt"]

# Extract owner and repository name from the filename
if txt_files and "-" in (filename := txt_files[0].stem):
owner, repo = filename.split("-", 1)
repo_url = f"{owner}/{repo}"

with open("history.txt", mode="a", encoding="utf-8") as history:
history.write(f"{repo_url}\n")

except Exception as e:
print(f"Error logging repository URL for {folder}: {e}")

# Delete the folder
try:
shutil.rmtree(folder)
except Exception as e:
print(f"Error deleting {folder}: {e}")
Loading
Loading