Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions pyproject.toml
Original file line number Diff line number Diff line change
Expand Up @@ -76,5 +76,6 @@ pythonpath = ["src"]
testpaths = ["tests/"]
python_files = "test_*.py"
asyncio_mode = "auto"
asyncio_default_fixture_loop_scope = "function"
python_classes = "Test*"
python_functions = "test_*"
6 changes: 3 additions & 3 deletions src/gitingest/__init__.py
Original file line number Diff line number Diff line change
@@ -1,8 +1,8 @@
""" Gitingest: A package for ingesting data from Git repositories. """

from gitingest.cloning import clone_repo
from gitingest.cloning import clone
from gitingest.entrypoint import ingest, ingest_async
from gitingest.ingestion import ingest_query
from gitingest.query_parsing import parse_query
from gitingest.repository_ingest import ingest, ingest_async

__all__ = ["ingest_query", "clone_repo", "parse_query", "ingest", "ingest_async"]
__all__ = ["ingest_query", "clone", "parse_query", "ingest", "ingest_async"]
2 changes: 1 addition & 1 deletion src/gitingest/cli.py
Original file line number Diff line number Diff line change
Expand Up @@ -8,7 +8,7 @@
import click

from gitingest.config import MAX_FILE_SIZE, OUTPUT_FILE_NAME
from gitingest.repository_ingest import ingest_async
from gitingest.entrypoint import ingest_async


@click.command()
Expand Down
34 changes: 2 additions & 32 deletions src/gitingest/cloning.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,47 +2,17 @@

import asyncio
import os
from dataclasses import dataclass
from pathlib import Path
from typing import List, Optional, Tuple

from gitingest.ingestion_schema import CloneConfig
from gitingest.utils.timeout_wrapper import async_timeout

TIMEOUT: int = 60


@dataclass
class CloneConfig:
"""
Configuration for cloning a Git repository.

This class holds the necessary parameters for cloning a repository to a local path, including
the repository's URL, the target local path, and optional parameters for a specific commit or branch.

Attributes
----------
url : str
The URL of the Git repository to clone.
local_path : str
The local directory where the repository will be cloned.
commit : str, optional
The specific commit hash to check out after cloning (default is None).
branch : str, optional
The branch to clone (default is None).
subpath : str
The subpath to clone from the repository (default is "/").
"""

url: str
local_path: str
commit: Optional[str] = None
branch: Optional[str] = None
subpath: str = "/"
blob: bool = False


@async_timeout(TIMEOUT)
async def clone_repo(config: CloneConfig) -> None:
async def clone(config: CloneConfig) -> None:
"""
Clone a repository to a local path based on the provided configuration.

Expand Down
22 changes: 11 additions & 11 deletions src/gitingest/repository_ingest.py → src/gitingest/entrypoint.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,10 +5,10 @@
import shutil
from typing import Optional, Set, Tuple, Union

from gitingest.cloning import clone_repo
from gitingest.cloning import clone
from gitingest.config import TMP_BASE_PATH
from gitingest.ingestion import ingest_query
from gitingest.query_parsing import ParsedQuery, parse_query
from gitingest.query_parsing import IngestionQuery, parse_query


async def ingest_async(
Expand Down Expand Up @@ -53,37 +53,37 @@ async def ingest_async(
Raises
------
TypeError
If `clone_repo` does not return a coroutine, or if the `source` is of an unsupported type.
If `clone` does not return a coroutine, or if the `source` is of an unsupported type.
"""
repo_cloned = False

try:
parsed_query: ParsedQuery = await parse_query(
query: IngestionQuery = await parse_query(
source=source,
max_file_size=max_file_size,
from_web=False,
include_patterns=include_patterns,
ignore_patterns=exclude_patterns,
)

if parsed_query.url:
selected_branch = branch if branch else parsed_query.branch # prioritize branch argument
parsed_query.branch = selected_branch
if query.url:
selected_branch = branch if branch else query.branch # prioritize branch argument
query.branch = selected_branch

clone_config = parsed_query.extact_clone_config()
clone_coroutine = clone_repo(clone_config)
clone_config = query.extract_clone_config()
clone_coroutine = clone(clone_config)

if inspect.iscoroutine(clone_coroutine):
if asyncio.get_event_loop().is_running():
await clone_coroutine
else:
asyncio.run(clone_coroutine)
else:
raise TypeError("clone_repo did not return a coroutine as expected.")
raise TypeError("clone did not return a coroutine as expected.")

repo_cloned = True

summary, tree, content = ingest_query(parsed_query)
summary, tree, content = ingest_query(query)

if output is not None:
with open(output, "w", encoding="utf-8") as f:
Expand Down
14 changes: 7 additions & 7 deletions src/gitingest/ingestion.py
Original file line number Diff line number Diff line change
Expand Up @@ -7,7 +7,7 @@
from gitingest.config import MAX_DIRECTORY_DEPTH, MAX_FILES, MAX_TOTAL_SIZE_BYTES
from gitingest.filesystem_schema import FileSystemNode, FileSystemNodeType, FileSystemStats
from gitingest.output_formatters import format_node
from gitingest.query_parsing import ParsedQuery
from gitingest.query_parsing import IngestionQuery
from gitingest.utils.ingestion_utils import _should_exclude, _should_include
from gitingest.utils.path_utils import _is_safe_symlink

Expand All @@ -17,7 +17,7 @@
import tomli as tomllib


def ingest_query(query: ParsedQuery) -> Tuple[str, str, str]:
def ingest_query(query: IngestionQuery) -> Tuple[str, str, str]:
"""
Run the ingestion process for a parsed query.

Expand All @@ -27,7 +27,7 @@ def ingest_query(query: ParsedQuery) -> Tuple[str, str, str]:

Parameters
----------
query : ParsedQuery
query : IngestionQuery
The parsed query object containing information about the repository and query parameters.

Returns
Expand Down Expand Up @@ -87,7 +87,7 @@ def ingest_query(query: ParsedQuery) -> Tuple[str, str, str]:
return format_node(root_node, query)


def apply_gitingest_file(path: Path, query: ParsedQuery) -> None:
def apply_gitingest_file(path: Path, query: IngestionQuery) -> None:
"""
Apply the .gitingest file to the query object.

Expand All @@ -98,7 +98,7 @@ def apply_gitingest_file(path: Path, query: ParsedQuery) -> None:
----------
path : Path
The path of the directory to ingest.
query : ParsedQuery
query : IngestionQuery
The parsed query object containing information about the repository and query parameters.
It should have an attribute `ignore_patterns` which is either None or a set of strings.
"""
Expand Down Expand Up @@ -154,7 +154,7 @@ def apply_gitingest_file(path: Path, query: ParsedQuery) -> None:

def _process_node(
node: FileSystemNode,
query: ParsedQuery,
query: IngestionQuery,
stats: FileSystemStats,
) -> None:
"""
Expand All @@ -167,7 +167,7 @@ def _process_node(
----------
node : FileSystemNode
The current directory or file node being processed.
query : ParsedQuery
query : IngestionQuery
The parsed query object containing information about the repository and query parameters.
stats : FileSystemStats
Statistics tracking object for the total file count and size.
Expand Down
90 changes: 90 additions & 0 deletions src/gitingest/ingestion_schema.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,90 @@
""" This module contains the dataclasses for the ingestion process. """

from dataclasses import dataclass
from pathlib import Path
from typing import Optional, Set

from pydantic import BaseModel, Field

from gitingest.config import MAX_FILE_SIZE


@dataclass
class CloneConfig:
"""
Configuration for cloning a Git repository.

This class holds the necessary parameters for cloning a repository to a local path, including
the repository's URL, the target local path, and optional parameters for a specific commit or branch.

Attributes
----------
url : str
The URL of the Git repository to clone.
local_path : str
The local directory where the repository will be cloned.
commit : str, optional
The specific commit hash to check out after cloning (default is None).
branch : str, optional
The branch to clone (default is None).
subpath : str
The subpath to clone from the repository (default is "/").
"""

url: str
local_path: str
commit: Optional[str] = None
branch: Optional[str] = None
subpath: str = "/"
blob: bool = False


class IngestionQuery(BaseModel): # pylint: disable=too-many-instance-attributes
"""
Pydantic model to store the parsed details of the repository or file path.
"""

user_name: Optional[str] = None
repo_name: Optional[str] = None
local_path: Path
url: Optional[str] = None
slug: str
id: str
subpath: str = "/"
type: Optional[str] = None
branch: Optional[str] = None
commit: Optional[str] = None
max_file_size: int = Field(default=MAX_FILE_SIZE)
ignore_patterns: Optional[Set[str]] = None
include_patterns: Optional[Set[str]] = None

class Config:
"""Pydantic model configuration."""

arbitrary_types_allowed = True

def extract_clone_config(self) -> CloneConfig:
"""
Extract the relevant fields for the CloneConfig object.

Returns
-------
CloneConfig
A CloneConfig object containing the relevant fields.

Raises
------
ValueError
If the 'url' parameter is not provided.
"""
if not self.url:
raise ValueError("The 'url' parameter is required.")

return CloneConfig(
url=self.url,
local_path=str(self.local_path),
commit=self.commit,
branch=self.branch,
subpath=self.subpath,
blob=self.type == "blob",
)
14 changes: 7 additions & 7 deletions src/gitingest/output_formatters.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,10 +5,10 @@
import tiktoken

from gitingest.filesystem_schema import FileSystemNode, FileSystemNodeType
from gitingest.query_parsing import ParsedQuery
from gitingest.query_parsing import IngestionQuery


def format_node(node: FileSystemNode, query: ParsedQuery) -> Tuple[str, str, str]:
def format_node(node: FileSystemNode, query: IngestionQuery) -> Tuple[str, str, str]:
"""
Generate a summary, directory structure, and file contents for a given file system node.

Expand All @@ -18,7 +18,7 @@ def format_node(node: FileSystemNode, query: ParsedQuery) -> Tuple[str, str, str
----------
node : FileSystemNode
The file system node to be summarized.
query : ParsedQuery
query : IngestionQuery
The parsed query object containing information about the repository and query parameters.

Returns
Expand Down Expand Up @@ -47,15 +47,15 @@ def format_node(node: FileSystemNode, query: ParsedQuery) -> Tuple[str, str, str
return summary, tree, content


def _create_summary_prefix(query: ParsedQuery, single_file: bool = False) -> str:
def _create_summary_prefix(query: IngestionQuery, single_file: bool = False) -> str:
"""
Create a prefix string for summarizing a repository or local directory.

Includes repository name (if provided), commit/branch details, and subpath if relevant.

Parameters
----------
query : ParsedQuery
query : IngestionQuery
The parsed query object containing information about the repository and query parameters.
single_file : bool
A flag indicating whether the summary is for a single file, by default False.
Expand Down Expand Up @@ -108,7 +108,7 @@ def _gather_file_contents(node: FileSystemNode) -> str:
return "\n".join(_gather_file_contents(child) for child in node.children)


def _create_tree_structure(query: ParsedQuery, node: FileSystemNode, prefix: str = "", is_last: bool = True) -> str:
def _create_tree_structure(query: IngestionQuery, node: FileSystemNode, prefix: str = "", is_last: bool = True) -> str:
"""
Generate a tree-like string representation of the file structure.

Expand All @@ -117,7 +117,7 @@ def _create_tree_structure(query: ParsedQuery, node: FileSystemNode, prefix: str

Parameters
----------
query : ParsedQuery
query : IngestionQuery
The parsed query object containing information about the repository and query parameters.
node : FileSystemNode
The current directory or file node being processed.
Expand Down
Loading
Loading