Skip to content

Commit

Permalink
version 0.1.1
Browse files Browse the repository at this point in the history
  • Loading branch information
dfalbel authored and cran-robot committed Aug 18, 2023
1 parent aeb183a commit 03234be
Show file tree
Hide file tree
Showing 16 changed files with 316 additions and 16 deletions.
6 changes: 3 additions & 3 deletions DESCRIPTION
@@ -1,6 +1,6 @@
Package: hfhub
Title: Hugging Face Hub Interface
Version: 0.1.0
Version: 0.1.1
Authors@R: c(
person("Daniel", "Falbel", , "daniel@posit.co", role = c("aut", "cre")),
person(family = "Posit", role = c("cph"))
Expand All @@ -15,9 +15,9 @@ Suggests: testthat (>= 3.0.0), jsonlite
Config/testthat/edition: 3
URL: https://mlverse.github.io/hfhub/
NeedsCompilation: no
Packaged: 2023-06-26 11:09:07 UTC; dfalbel
Packaged: 2023-08-18 19:06:18 UTC; dfalbel
Author: Daniel Falbel [aut, cre],
Posit [cph]
Maintainer: Daniel Falbel <daniel@posit.co>
Repository: CRAN
Date/Publication: 2023-06-28 13:50:09 UTC
Date/Publication: 2023-08-18 19:22:33 UTC
21 changes: 15 additions & 6 deletions MD5
@@ -1,9 +1,18 @@
c8675253488fffac6ce31802adf8508c *DESCRIPTION
c3c13c56c270bf87207d34b6060912a3 *DESCRIPTION
c00a56ab2cd9f42d6093d90902bb6ca4 *LICENSE
c1ac58247284a1b48282cee2fd037b75 *NAMESPACE
c510858a2133bf533c1f61bfa48964e9 *R/hub_download.R
800a7b7291d78ab44afed36380b82f56 *README.md
bf162e519ac05982d2441c6d12c121d1 *NAMESPACE
f6b17274a15b2b99da54bab81f708fe1 *NEWS.md
cce5842d81904b14cede6df1eec00fb9 *R/hub_download.R
965af51b30d0206585b48d45eecc3317 *R/hub_info.R
8f37ac9a8c4d47d0d06fce16a8acf3ac *R/hub_snapshot.R
e95d51b44a49984183c3189f34c6de00 *README.md
0a0fc6912d3ec81445de059a6a6e0e77 *man/WEIGHTS_NAME.Rd
897cc4fefb320a17d5edb68012568a0c *man/hub_download.Rd
76dbd3dcaec77ef71bde80294950d27d *man/hub_download.Rd
e8b6a748f25ef95207bdec426bfca0fb *man/hub_repo_info.Rd
a21f1413d9ab25c60bb61e4932b6bff4 *man/hub_snapshot.Rd
c85fb8907959efec8f948eefc70954a0 *tests/testthat.R
4b4d24a049b4158cf67a4fed97e25e57 *tests/testthat/test-hub_download.R
9cae81c53cf166aa530ef9b222fae345 *tests/testthat/_snaps/hub_snapshot.md
e3fd1e055a0a9a459feb3a1c03352425 *tests/testthat/helper-skips.R
219b1277617a84b73adf68ce574376a6 *tests/testthat/test-hub_download.R
fb5930d728bca7bec2588cda0d631541 *tests/testthat/test-hub_info.R
0cdf80eabb8dd5286bf3170affbdc5e3 *tests/testthat/test-hub_snapshot.R
3 changes: 3 additions & 0 deletions NAMESPACE
Expand Up @@ -2,5 +2,8 @@

export(WEIGHTS_INDEX_NAME)
export(WEIGHTS_NAME)
export(hub_dataset_info)
export(hub_download)
export(hub_repo_info)
export(hub_snapshot)
importFrom(rlang,"%||%")
7 changes: 7 additions & 0 deletions NEWS.md
@@ -0,0 +1,7 @@
# hfhub 0.1.1

# hfhub 0.1.0.9000

* Added a `NEWS.md` file to track changes to the package.
* Added `hub_snapshot` to alllow downloading an entire repository at once (#2).
* Added support for authentication using `HUGGING_FACE_HUB_TOKEN`. (#5)
38 changes: 32 additions & 6 deletions R/hub_download.R
Expand Up @@ -8,7 +8,8 @@
#' @param force_download For re-downloading of files that are cached.
#' @param ... currenytly unused.
#'
#' @returns The file path of the downloaded or cached file.
#' @returns The file path of the downloaded or cached file. The snapshot path is returned
#' as an attribute.
#' @examples
#' try({
#' withr::with_envvar(c(HUGGINGFACE_HUB_CACHE = tempdir()), {
Expand Down Expand Up @@ -142,12 +143,13 @@ hub_download <- function(repo_id, filename, ..., revision = "main", repo_type =
type = "download",
)
progress <- function(down, up) {
if (down[1] !=0) {
if (down[1] != 0) {
cli::cli_progress_update(total = down[1], set = down[2], id = bar_id)
}
TRUE
}
handle <- curl::new_handle(noprogress = FALSE, progressfunction = progress)
curl::handle_setheaders(handle, .list = hub_headers())
curl::curl_download(url, tmp, handle = handle, quiet = FALSE)
cli::cli_progress_done(id = bar_id)
}, error = function(err) {
Expand All @@ -156,19 +158,25 @@ hub_download <- function(repo_id, filename, ..., revision = "main", repo_type =
fs::file_move(tmp, blob_path)

# fs::link_create doesn't work for linking files on windows.
try(fs::file_delete(pointer_path), silent = TRUE) # delete the link to avoid warnings
file.symlink(blob_path, pointer_path)
})

pointer_path
}

hub_url <- function(repo_id, filename, ..., revision = "main", repo_type = "model") {
glue::glue("https://huggingface.co/{repo_id}/resolve/{revision}/{filename}")
if (repo_type == "model") {
glue::glue("https://huggingface.co/{repo_id}/resolve/{revision}/{filename}")
} else {
glue::glue("https://huggingface.co/{repo_type}s/{repo_id}/resolve/{revision}/{filename}")
}
}

get_pointer_path <- function(storage_folder, revision, relative_filename) {
snapshot_path <- fs::path(storage_folder, "snapshots")
pointer_path <- fs::path(snapshot_path, revision, relative_filename)
snapshot_path <- fs::path(storage_folder, "snapshots", revision)
pointer_path <- fs::path(snapshot_path, relative_filename)
attr(pointer_path, "snapshot_path") <- snapshot_path
pointer_path
}

Expand All @@ -177,12 +185,30 @@ repo_folder_name <- function(repo_id, repo_type = "model") {
glue::glue("{repo_type}s{REPO_ID_SEPARATOR()}{repo_id}")
}

hub_headers <- function() {
headers <- c("user-agent" = "hfhub/0.0.1")

token <- Sys.getenv("HUGGING_FACE_HUB_TOKEN", unset = "")
if (!nzchar(token))
token <- Sys.getenv("HUGGINGFACE_HUB_TOKEN", unset = "")

if (nzchar(token)) {
headers["authorization"] <- paste0("Bearer ", token)
}

headers
}

#' @importFrom rlang %||%
get_file_metadata <- function(url) {

headers <- hub_headers()
headers["Accept-Encoding"] <- "identity"

req <- reqst(httr::HEAD,
url = url,
httr::config(followlocation = FALSE),
httr::add_headers("Accept-Encoding" = "identity", "user-agent" = "hfhub/0.0.1"),
httr::add_headers(.headers = headers),
follow_relative_redirects = TRUE
)
list(
Expand Down
43 changes: 43 additions & 0 deletions R/hub_info.R
@@ -0,0 +1,43 @@
#' Queries information about Hub repositories
#'
#' @inheritParams hub_download
#' @param files_metadata Obtain files metadata information when querying repository information.
#' @export
hub_repo_info <- function(repo_id, ..., repo_type = NULL, revision = NULL, files_metadata = FALSE) {
if (is.null(repo_type) || repo_type == "model") {
path <- glue::glue("https://huggingface.co/api/models/{repo_id}")
} else {
path <- glue::glue("https://huggingface.co/api/{repo_type}s/{repo_id}")
}

if (!is.null(revision)) {
path <- glue::glue("{path}/revision/{revision}")
}

params <- list()
if (files_metadata) {
params$blobs <- TRUE
}

headers <- hub_headers()

results <- httr::GET(
path,
query = params,
httr::add_headers(.headers = headers)
)

httr::content(results)
}

#' @describeIn hub_repo_info Query information from a Hub Dataset
#' @export
hub_dataset_info <- function(repo_id, ..., revision = NULL, files_metadata = FALSE) {
hub_repo_info(
repo_id,
revision = revision,
repo_type = "dataset",
files_metadata = files_metadata
)
}

53 changes: 53 additions & 0 deletions R/hub_snapshot.R
@@ -0,0 +1,53 @@
#' Snapshot the entire repository
#'
#' Downloads and stores all files from a Hugging Face Hub repository.
#' @inheritParams hub_download
#' @param allow_patterns A character vector containing patters that are used to
#' filter allowed files to snapshot.
#' @param ignore_patterns A character vector contaitning patterns to reject files
#' from being downloaded.
#'
#' @export
hub_snapshot <- function(repo_id, ..., revision = "main", repo_type = "model",
local_files_only = FALSE, force_download = FALSE,
allow_patterns = NULL, ignore_patterns = NULL) {
info <- hub_repo_info(repo_id, repo_type = repo_type)
all_files <- sapply(info$siblings, function(x) x$rfilename)

allowed_files <- all_files
if (!is.null(allow_patterns)) {
allowed_files <- lapply(allow_patterns, function(x) {
all_files[grepl(allow_patterns, all_files)]
})
allowed_files <- unique(unlist(allowed_files))
}

files <- allowed_files
if (!is.null(ignore_patterns)) {
for (pattern in ignore_patterns) {
files <- files[!grepl(pattern, files)]
}
}

id <- cli::cli_progress_bar(
name = "Downloading files",
type = "tasks",
total = length(files),
clear = FALSE
)

i <- 0
cli::cli_progress_step("Snapshotting files {i}/{length(files)}")
for (i in seq_along(files)) {
d <- hub_download(
repo_id = repo_id,
filename = files[i],
revision = info$sha,
repo_type = repo_type,
local_files_only = local_files_only,
force_download = force_download
)
}

attr(d, "snapshot_path")
}
6 changes: 6 additions & 0 deletions README.md
Expand Up @@ -32,3 +32,9 @@ library(hfhub)
path <- hub_download("gpt2", "config.json")
str(jsonlite::fromJSON(path))
```

## Authentication

You can set the `HUGGING_FACE_HUB_TOKEN` environment variable with the value
of a token obtained in the Access Token section of the Hugging Face account setting page.
This will allow you to download private files from Hugging Face Hub.
3 changes: 2 additions & 1 deletion man/hub_download.Rd

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

36 changes: 36 additions & 0 deletions man/hub_repo_info.Rd

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

39 changes: 39 additions & 0 deletions man/hub_snapshot.Rd

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

10 changes: 10 additions & 0 deletions tests/testthat/_snaps/hub_snapshot.md
@@ -0,0 +1,10 @@
# snapshot

Code
p <- hub_snapshot("dfalbel/cran-packages", repo_type = "dataset",
allow_patterns = "\\.R")
Message <cliMessage>
i Snapshotting files 0/4
v Snapshotting files 4/4 [0ms]

9 changes: 9 additions & 0 deletions tests/testthat/helper-skips.R
@@ -0,0 +1,9 @@
skip_if_no_token <- function() {
token <- Sys.getenv("HUGGINGFACE_HUB_TOKEN", "")
if (token == "") {
token <- Sys.getenv("HUGGING_FACE_HUB_TOKEN", "")
}

if (token == "")
skip("No auth token set.")
}
22 changes: 22 additions & 0 deletions tests/testthat/test-hub_download.R
Expand Up @@ -27,3 +27,25 @@ test_that("hub_download", {
})
expect_equal(list.files(tmp), "models--gpt2")
})

test_that("can download from private repo", {

skip_if_no_token()

expect_error(regexp = NA, {
hub_download(
repo_id = "dfalbel/test-hfhub",
filename = ".gitattributes",
force_download = TRUE
)
})

expect_error(regexp = NA, {
hub_download(
repo_id = "dfalbel/test-hfhub",
filename = "hello.safetensors",
force_download = TRUE
)
})

})

0 comments on commit 03234be

Please sign in to comment.