Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
27 changes: 26 additions & 1 deletion src/gitingest/cli.py
Original file line number Diff line number Diff line change
Expand Up @@ -17,7 +17,32 @@ def main(
exclude_pattern: tuple[str, ...],
include_pattern: tuple[str, ...],
) -> None:
"""Analyze a directory and create a text dump of its contents."""
"""
Analyze a directory or repository and create a text dump of its contents.

This command analyzes the contents of a specified source directory or repository,
applies custom include and exclude patterns, and generates a text summary of the analysis
which is then written to an output file.

Parameters
----------
source : str
The source directory or repository to analyze.
output : str | None
The path where the output file will be written. If not specified, the output will be written
to a file named `<repo_name>.txt` in the current directory.
max_size : int
The maximum file size to process, in bytes. Files larger than this size will be ignored.
exclude_pattern : tuple[str, ...]
A tuple of patterns to exclude during the analysis. Files matching these patterns will be ignored.
include_pattern : tuple[str, ...]
A tuple of patterns to include during the analysis. Only files matching these patterns will be processed.

Raises
------
click.Abort
If there is an error during the execution of the command, this exception is raised to abort the process.
"""
try:
# Combine default and custom ignore patterns
exclude_patterns = list(exclude_pattern)
Expand Down
31 changes: 27 additions & 4 deletions src/gitingest/clone.py
Original file line number Diff line number Diff line change
@@ -1,13 +1,32 @@
import asyncio
from dataclasses import dataclass

from gitingest.utils import AsyncTimeoutError, async_timeout
from gitingest.exceptions import AsyncTimeoutError
from gitingest.utils import async_timeout

CLONE_TIMEOUT: int = 20


@dataclass
class CloneConfig:
"""
Configuration for cloning a Git repository.

This class holds the necessary parameters for cloning a repository to a local path, including
the repository's URL, the target local path, and optional parameters for a specific commit or branch.

Attributes
----------
url : str
The URL of the Git repository to clone.
local_path : str
The local directory where the repository will be cloned.
commit : str | None, optional
The specific commit hash to check out after cloning (default is None).
branch : str | None, optional
The branch to clone (default is None).
"""

url: str
local_path: str
commit: str | None = None
Expand All @@ -17,7 +36,11 @@ class CloneConfig:
@async_timeout(CLONE_TIMEOUT)
async def clone_repo(config: CloneConfig) -> tuple[bytes, bytes]:
"""
Clones a repository to a local path based on the provided query parameters.
Clones a repository to a local path based on the provided configuration.

This function handles the process of cloning a Git repository to the local file system.
It can clone a specific branch or commit if provided, and it raises exceptions if
any errors occur during the cloning process.

Parameters
----------
Expand All @@ -30,7 +53,7 @@ async def clone_repo(config: CloneConfig) -> tuple[bytes, bytes]:

Returns
-------
Tuple[bytes, bytes]
tuple[bytes, bytes]
A tuple containing the stdout and stderr of the git commands executed.

Raises
Expand Down Expand Up @@ -123,7 +146,7 @@ async def _run_git_command(*args: str) -> tuple[bytes, bytes]:

Returns
-------
Tuple[bytes, bytes]
tuple[bytes, bytes]
A tuple containing the stdout and stderr of the git command.

Raises
Expand Down
29 changes: 29 additions & 0 deletions src/gitingest/exceptions.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,29 @@
class InvalidPatternError(ValueError):
"""
Exception raised when a pattern contains invalid characters.

This exception is used to signal that a pattern provided for some operation
contains characters that are not allowed. The valid characters for the pattern
include alphanumeric characters, dash (-), underscore (_), dot (.), forward slash (/),
plus (+), and asterisk (*).

Parameters
----------
pattern : str
The invalid pattern that caused the error.
"""

def __init__(self, pattern: str) -> None:
super().__init__(
f"Pattern '{pattern}' contains invalid characters. Only alphanumeric characters, dash (-), "
"underscore (_), dot (.), forward slash (/), plus (+), and asterisk (*) are allowed."
)


class AsyncTimeoutError(Exception):
"""
Raised when an async operation exceeds its timeout limit.

This exception is used by the `async_timeout` decorator to signal that the wrapped
asynchronous function has exceeded the specified time limit for execution.
"""
36 changes: 34 additions & 2 deletions src/gitingest/ingest.py
Original file line number Diff line number Diff line change
Expand Up @@ -15,7 +15,39 @@ def ingest(
exclude_patterns: list[str] | str | None = None,
output: str | None = None,
) -> tuple[str, str, str]:
"""
Main entry point for ingesting a source and processing its contents.

This function analyzes a source (URL or local path), clones the corresponding repository (if applicable),
and processes its files according to the specified query parameters. It returns a summary, a tree-like
structure of the files, and the content of the files. The results can optionally be written to an output file.

Parameters
----------
source : str
The source to analyze, which can be a URL (for a GitHub repository) or a local directory path.
max_file_size : int, optional
The maximum allowed file size for file ingestion. Files larger than this size are ignored, by default 10*1024*1024 (10 MB).
include_patterns : list[str] | str | None, optional
A pattern or list of patterns specifying which files to include in the analysis. If `None`, all files are included.
exclude_patterns : list[str] | str | None, optional
A pattern or list of patterns specifying which files to exclude from the analysis. If `None`, no files are excluded.
output : str | None, optional
The file path where the summary and content should be written. If `None`, the results are not written to a file.

Returns
-------
tuple[str, str, str]
A tuple containing:
- A summary string of the analyzed repository or directory.
- A tree-like string representation of the file structure.
- The content of the files in the repository or directory.

Raises
------
TypeError
If `clone_repo` does not return a coroutine, or if the `source` is of an unsupported type.
"""
try:
query = parse_query(
source=source,
Expand All @@ -42,8 +74,8 @@ def ingest(

summary, tree, content = ingest_from_query(query)

if output:
with open(f"{output}", "w") as f:
if output is not None:
with open(output, "w") as f:
f.write(tree + "\n" + content)

return summary, tree, content
Expand Down
Loading
Loading