Skip to content

Commit

Permalink
Address Eliott's comments
Browse files Browse the repository at this point in the history
  • Loading branch information
LysandreJik committed Apr 12, 2022
1 parent 17a1331 commit 5f48d6c
Showing 1 changed file with 22 additions and 13 deletions.
35 changes: 22 additions & 13 deletions src/huggingface_hub/repository.py
Original file line number Diff line number Diff line change
Expand Up @@ -239,7 +239,7 @@ def is_binary_file(filename: Union[str, Path]) -> bool:
"""
try:
with open(filename) as f:
content = f.read(512)
content = f.read()

# Check for the presence of the null character in the string
return "\x00" in content
Expand Down Expand Up @@ -1023,15 +1023,22 @@ def auto_track_binary_files(self, pattern: Optional[str] = ".") -> List[str]:
continue

path_to_file = os.path.join(os.getcwd(), self.local_dir, filename)
is_binary = is_binary_file(path_to_file)

if (
is_binary
and not is_tracked_with_lfs(path_to_file)
and not is_git_ignored(path_to_file)
):
self.lfs_track(filename)
files_to_be_tracked_with_lfs.append(filename)
if not (is_tracked_with_lfs(path_to_file) or is_git_ignored(path_to_file)):
size_in_mb = os.path.getsize(path_to_file) / (1024 * 1024)

if size_in_mb >= 10:
logger.warning(
"Parsing a large file to check if binary or not. Tracking large "
"files using `repository.auto_track_large_files` is recommended "
"so as to not load the full file in memory."
)

is_binary = is_binary_file(path_to_file)

if is_binary:
self.lfs_track(filename)
files_to_be_tracked_with_lfs.append(filename)

# Cleanup the .gitattributes if files were deleted
self.lfs_untrack(deleted_files)
Expand Down Expand Up @@ -1152,10 +1159,12 @@ def git_add(
be automatically tracked.
"""
if auto_lfs_track:
tracked_files = [
*self.auto_track_large_files(pattern),
*self.auto_track_binary_files(pattern),
]
# Track files according to their size (>=10MB)
tracked_files = self.auto_track_large_files(pattern)

# Read the remaining files and track them if they're binary
tracked_files.extend(self.auto_track_binary_files(pattern))

if tracked_files:
logger.warning(
f"Adding files tracked by Git LFS: {tracked_files}. This may take a bit of time if the files are large."
Expand Down

0 comments on commit 5f48d6c

Please sign in to comment.