Skip to content

Commit

Permalink
Add list_repo_commits to list git history of a repo (#1331)
Browse files Browse the repository at this point in the history
* Add list_repo_commits to list git history of a repo

* code style

* add docs

* code quality

* Update src/huggingface_hub/hf_api.py

Co-authored-by: Julien Chaumond <julien@huggingface.co>

---------

Co-authored-by: Julien Chaumond <julien@huggingface.co>
  • Loading branch information
Wauplin and julien-c committed Feb 16, 2023
1 parent ad31403 commit 23b1312
Show file tree
Hide file tree
Showing 5 changed files with 196 additions and 2 deletions.
4 changes: 4 additions & 0 deletions docs/source/package_reference/hf_api.mdx
Original file line number Diff line number Diff line change
Expand Up @@ -41,6 +41,10 @@ models = hf_api.list_models()

[[autodoc]] huggingface_hub.hf_api.GitRefInfo

### GitCommitInfo

[[autodoc]] huggingface_hub.hf_api.GitCommitInfo

### GitRefs

[[autodoc]] huggingface_hub.hf_api.GitRefs
Expand Down
4 changes: 4 additions & 0 deletions src/huggingface_hub/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -107,6 +107,7 @@
"CommitOperationAdd",
"CommitOperationDelete",
"DatasetSearchArguments",
"GitCommitInfo",
"GitRefInfo",
"GitRefs",
"HfApi",
Expand Down Expand Up @@ -141,6 +142,7 @@
"list_liked_repos",
"list_metrics",
"list_models",
"list_repo_commits",
"list_repo_files",
"list_repo_refs",
"list_spaces",
Expand Down Expand Up @@ -360,6 +362,7 @@ def __dir__():
CommitOperationAdd, # noqa: F401
CommitOperationDelete, # noqa: F401
DatasetSearchArguments, # noqa: F401
GitCommitInfo, # noqa: F401
GitRefInfo, # noqa: F401
GitRefs, # noqa: F401
HfApi, # noqa: F401
Expand Down Expand Up @@ -394,6 +397,7 @@ def __dir__():
list_liked_repos, # noqa: F401
list_metrics, # noqa: F401
list_models, # noqa: F401
list_repo_commits, # noqa: F401
list_repo_files, # noqa: F401
list_repo_refs, # noqa: F401
list_spaces, # noqa: F401
Expand Down
126 changes: 125 additions & 1 deletion src/huggingface_hub/hf_api.py
Original file line number Diff line number Diff line change
Expand Up @@ -17,6 +17,7 @@
import re
import warnings
from dataclasses import dataclass, field
from datetime import datetime
from itertools import islice
from pathlib import Path
from typing import Any, BinaryIO, Dict, Iterable, Iterator, List, Optional, Tuple, Union
Expand Down Expand Up @@ -715,6 +716,49 @@ class GitRefs:
tags: List[GitRefInfo]


@dataclass
class GitCommitInfo:
"""
Contains information about a git commit for a repo on the Hub. Check out [`list_repo_commits`] for more details.
Args:
commit_id (`str`):
OID of the commit (e.g. `"e7da7f221d5bf496a48136c0cd264e630fe9fcc8"`)
authors (`List[str]`):
List of authors of the commit.
created_at (`datetime`):
Datetime when the commit was created.
title (`str`):
Title of the commit. This is a free-text value entered by the authors.
message (`str`):
Description of the commit. This is a free-text value entered by the authors.
formatted_title (`str`):
Title of the commit formatted as HTML. Only returned if `formatted=True` is set.
formatted_message (`str`):
Description of the commit formatted as HTML. Only returned if `formatted=True` is set.
"""

commit_id: str

authors: List[str]
created_at: datetime
title: str
message: str

formatted_title: Optional[str]
formatted_message: Optional[str]

def __init__(self, data: Dict) -> None:
self.commit_id = data["id"]
self.authors = [author["user"] for author in data["authors"]]
self.created_at = parse_datetime(data["date"])
self.title = data["title"]
self.message = data["message"]

self.formatted_title = data.get("formatted", {}).get("title")
self.formatted_message = data.get("formatted", {}).get("message")


@dataclass
class UserLikes:
"""
Expand Down Expand Up @@ -1891,6 +1935,84 @@ def list_repo_refs(
tags=[GitRefInfo(item) for item in data["tags"]],
)

@validate_hf_hub_args
def list_repo_commits(
self,
repo_id: str,
*,
repo_type: Optional[str] = None,
token: Optional[Union[bool, str]] = None,
revision: Optional[str] = None,
formatted: bool = False,
) -> List[GitCommitInfo]:
"""
Get the list of commits of a given revision for a repo on the Hub.
Commits are sorted by date (last commit first).
Args:
repo_id (`str`):
A namespace (user or an organization) and a repo name separated by a `/`.
repo_type (`str`, *optional*):
Set to `"dataset"` or `"space"` if listing commits from a dataset or a Space, `None` or `"model"` if
listing from a model. Default is `None`.
token (`bool` or `str`, *optional*):
A valid authentication token (see https://huggingface.co/settings/token).
If `None` or `True` and machine is logged in (through `huggingface-cli login`
or [`~huggingface_hub.login`]), token will be retrieved from the cache.
If `False`, token is not sent in the request header.
revision (`str`, *optional*):
The git revision to commit from. Defaults to the head of the `"main"` branch.
formatted (`bool`):
Whether to return the HTML-formatted title and description of the commits. Defaults to False.
Example:
```py
>>> from huggingface_hub import HfApi
>>> api = HfApi()
# Commits are sorted by date (last commit first)
>>> initial_commit = api.list_repo_commits("gpt2")[-1]
# Initial commit is always a system commit containing the `.gitattributes` file.
>>> initial_commit
GitCommitInfo(
commit_id='9b865efde13a30c13e0a33e536cf3e4a5a9d71d8',
authors=['system'],
created_at=datetime.datetime(2019, 2, 18, 10, 36, 15, tzinfo=datetime.timezone.utc),
title='initial commit',
message='',
formatted_title=None,
formatted_message=None
)
# Create an empty branch by deriving from initial commit
>>> api.create_branch("gpt2", "new_empty_branch", revision=initial_commit.commit_id)
```
Returns:
List[[`GitCommitInfo`]]: list of objects containing information about the commits for a repo on the Hub.
Raises:
[`~utils.RepositoryNotFoundError`]:
If repository is not found (error 404): wrong repo_id/repo_type, private but not authenticated or repo
does not exist.
[`~utils.RevisionNotFoundError`]:
If revision is not found (error 404) on the repo.
"""
repo_type = repo_type or REPO_TYPE_MODEL
revision = quote(revision, safe="") if revision is not None else DEFAULT_REVISION

# Paginate over results and return the list of commits.
return [
GitCommitInfo(item)
for item in paginate(
f"{self.endpoint}/api/{repo_type}s/{repo_id}/commits/{revision}",
headers=self._build_hf_headers(token=token),
params={"expand[]": "formatted"} if formatted else {},
)
]

@validate_hf_hub_args
def create_repo(
self,
Expand Down Expand Up @@ -2817,7 +2939,8 @@ def create_branch(
exist_ok: bool = False,
) -> None:
"""
Create a new branch from `main` on a repo on the Hub.
Create a new branch for a repo on the Hub, starting from the specified revision (defaults to `main`).
To find a revision suiting your needs, you can use [`list_repo_refs`] or [`list_repo_commits`].
Args:
repo_id (`str`):
Expand Down Expand Up @@ -4039,6 +4162,7 @@ def _parse_revision_from_pr_url(pr_url: str) -> str:
repo_info = api.repo_info
list_repo_files = api.list_repo_files
list_repo_refs = api.list_repo_refs
list_repo_commits = api.list_repo_commits

list_metrics = api.list_metrics

Expand Down
2 changes: 1 addition & 1 deletion tests/test_command_delete_cache.py
Original file line number Diff line number Diff line change
Expand Up @@ -204,7 +204,7 @@ def _input_answers():

# Check printed instructions
printed = output.getvalue()
self.assertTrue(printed.startswith("TUI is disabled. In other to")) # ...
self.assertTrue(printed.startswith("TUI is disabled. In order to")) # ...
self.assertIn(tmp_path, printed)

# Check input called twice
Expand Down
62 changes: 62 additions & 0 deletions tests/test_hf_api.py
Original file line number Diff line number Diff line change
Expand Up @@ -2342,6 +2342,68 @@ def test_list_refs_bigcode(self) -> None:
)


class ListGitCommitsTest(unittest.TestCase):
@classmethod
def setUpClass(cls) -> None:
cls.api = HfApi(token=TOKEN)
# Create repo (with initial commit)
cls.repo_id = cls.api.create_repo(repo_name()).repo_id

# Create a commit on `main` branch
cls.api.upload_file(repo_id=cls.repo_id, path_or_fileobj=b"content", path_in_repo="content.txt")

# Create a commit in a PR
cls.api.upload_file(repo_id=cls.repo_id, path_or_fileobj=b"on_pr", path_in_repo="on_pr.txt", create_pr=True)

# Create another commit on `main` branch
cls.api.upload_file(repo_id=cls.repo_id, path_or_fileobj=b"on_main", path_in_repo="on_main.txt")
return super().setUpClass()

@classmethod
def tearDownClass(cls) -> None:
cls.api.delete_repo(cls.repo_id)
return super().tearDownClass()

def test_list_commits_on_main(self) -> None:
commits = self.api.list_repo_commits(self.repo_id)

# "on_pr" commit not returned
self.assertEquals(len(commits), 3)
self.assertTrue(all("on_pr" not in commit.title for commit in commits))

# USER is always the author
self.assertTrue(all(commit.authors == [USER] for commit in commits))

# latest commit first
self.assertEquals(commits[0].title, "Upload on_main.txt with huggingface_hub")

# Formatted field not returned by default
for commit in commits:
self.assertIsNone(commit.formatted_title)
self.assertIsNone(commit.formatted_message)

def test_list_commits_on_pr(self) -> None:
commits = self.api.list_repo_commits(self.repo_id, revision="refs/pr/1")

# "on_pr" commit returned but not the "on_main" one
self.assertEquals(len(commits), 3)
self.assertTrue(all("on_main" not in commit.title for commit in commits))
self.assertEquals(commits[0].title, "Upload on_pr.txt with huggingface_hub")

def test_list_commits_include_formatted(self) -> None:
for commit in self.api.list_repo_commits(self.repo_id, formatted=True):
self.assertIsNotNone(commit.formatted_title)
self.assertIsNotNone(commit.formatted_message)

def test_list_commits_on_missing_repo(self) -> None:
with self.assertRaises(RepositoryNotFoundError):
self.api.list_repo_commits("missing_repo_id")

def test_list_commits_on_missing_revision(self) -> None:
with self.assertRaises(RevisionNotFoundError):
self.api.list_repo_commits(self.repo_id, revision="missing_revision")


@patch("huggingface_hub.hf_api.build_hf_headers")
class HfApiTokenAttributeTest(unittest.TestCase):
def test_token_passed(self, mock_build_hf_headers: Mock) -> None:
Expand Down

0 comments on commit 23b1312

Please sign in to comment.