Skip to content

Commit

Permalink
Make resources.Get use a file cache for remote resources
Browse files Browse the repository at this point in the history
Closes #9228
  • Loading branch information
vanbroup committed Dec 2, 2021
1 parent 133e4bf commit 6675341
Show file tree
Hide file tree
Showing 5 changed files with 94 additions and 49 deletions.
17 changes: 12 additions & 5 deletions cache/filecache/filecache_config.go
Original file line number Diff line number Diff line change
Expand Up @@ -42,11 +42,12 @@ var defaultCacheConfig = Config{
}

const (
cacheKeyGetJSON = "getjson"
cacheKeyGetCSV = "getcsv"
cacheKeyImages = "images"
cacheKeyAssets = "assets"
cacheKeyModules = "modules"
cacheKeyGetJSON = "getjson"
cacheKeyGetCSV = "getcsv"
cacheKeyImages = "images"
cacheKeyAssets = "assets"
cacheKeyModules = "modules"
cacheGetResource = "getresource"
)

type Configs map[string]Config
Expand All @@ -70,6 +71,7 @@ var defaultCacheConfigs = Configs{
MaxAge: -1,
Dir: resourcesGenDir,
},
cacheGetResource: defaultCacheConfig,
}

type Config struct {
Expand Down Expand Up @@ -111,6 +113,11 @@ func (f Caches) AssetsCache() *Cache {
return f[cacheKeyAssets]
}

// GetResourceCache gets the file cache for remote resources.
func (f Caches) GetResourceCache() *Cache {
return f[cacheGetResource]
}

func DecodeConfig(fs afero.Fs, cfg config.Provider) (Configs, error) {
c := make(Configs)
valid := make(map[string]bool)
Expand Down
16 changes: 11 additions & 5 deletions cache/filecache/filecache_config_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -50,7 +50,8 @@ maxAge = "11h"
dir = "/path/to/c2"
[caches.images]
dir = "/path/to/c3"
[caches.getResource]
dir = "/path/to/c4"
`

cfg, err := config.FromConfigString(configStr, "toml")
Expand All @@ -59,7 +60,7 @@ dir = "/path/to/c3"
decoded, err := DecodeConfig(fs, cfg)
c.Assert(err, qt.IsNil)

c.Assert(len(decoded), qt.Equals, 5)
c.Assert(len(decoded), qt.Equals, 6)

c2 := decoded["getcsv"]
c.Assert(c2.MaxAge.String(), qt.Equals, "11h0m0s")
Expand All @@ -68,6 +69,10 @@ dir = "/path/to/c3"
c3 := decoded["images"]
c.Assert(c3.MaxAge, qt.Equals, time.Duration(-1))
c.Assert(c3.Dir, qt.Equals, filepath.FromSlash("/path/to/c3/filecache/images"))

c4 := decoded["getresource"]
c.Assert(c4.MaxAge, qt.Equals, time.Duration(-1))
c.Assert(c4.Dir, qt.Equals, filepath.FromSlash("/path/to/c4/filecache/getresource"))
}

func TestDecodeConfigIgnoreCache(t *testing.T) {
Expand All @@ -94,7 +99,8 @@ maxAge = 3456
dir = "/path/to/c2"
[caches.images]
dir = "/path/to/c3"
[caches.getResource]
dir = "/path/to/c4"
`

cfg, err := config.FromConfigString(configStr, "toml")
Expand All @@ -103,7 +109,7 @@ dir = "/path/to/c3"
decoded, err := DecodeConfig(fs, cfg)
c.Assert(err, qt.IsNil)

c.Assert(len(decoded), qt.Equals, 5)
c.Assert(len(decoded), qt.Equals, 6)

for _, v := range decoded {
c.Assert(v.MaxAge, qt.Equals, time.Duration(0))
Expand All @@ -129,7 +135,7 @@ func TestDecodeConfigDefault(t *testing.T) {

c.Assert(err, qt.IsNil)

c.Assert(len(decoded), qt.Equals, 5)
c.Assert(len(decoded), qt.Equals, 6)

imgConfig := decoded[cacheKeyImages]
jsonConfig := decoded[cacheKeyGetJSON]
Expand Down
3 changes: 3 additions & 0 deletions docs/content/en/getting-started/configuration.md
Original file line number Diff line number Diff line change
Expand Up @@ -657,6 +657,9 @@ maxAge = -1
[caches.getcsv]
dir = ":cacheDir/:project"
maxAge = -1
[caches.getresource]
dir = ":cacheDir/:project"
maxAge = -1
[caches.images]
dir = ":resourceDir/_gen"
maxAge = -1
Expand Down
10 changes: 10 additions & 0 deletions docs/content/en/hugo-pipes/introduction.md
Original file line number Diff line number Diff line change
Expand Up @@ -55,6 +55,16 @@ You can also change the request method and set the request body:
)}}
```

#### Cache of remote resources

Each downloaded URL will be cached in the default folder `$TMPDIR/hugo_cache/`. The variable `$TMPDIR` will be resolved to your system-dependent temporary directory.

With the command-line flag `--cacheDir`, you can specify any folder on your system as a caching directory.

You can also set `cacheDir` or `caches.getresource` in the [main configuration file][config].

If you don't like caching at all, you can fully disable caching with the command line flag `--ignoreCache`.

### Asset publishing

Assets will only be published (to `/public`) if `.Permalink` or `.RelPermalink` is used.
Expand Down
97 changes: 58 additions & 39 deletions resources/resource_factories/create/create.go
Original file line number Diff line number Diff line change
Expand Up @@ -16,12 +16,14 @@
package create

import (
"bufio"
"bytes"
"fmt"
"io"
"io/ioutil"
"mime"
"net/http"
"net/http/httputil"
"net/url"
"path"
"path/filepath"
Expand All @@ -32,6 +34,7 @@ import (

"github.com/gohugoio/hugo/hugofs"

"github.com/gohugoio/hugo/cache/filecache"
"github.com/gohugoio/hugo/common/hugio"
"github.com/gohugoio/hugo/common/maps"
"github.com/gohugoio/hugo/common/types"
Expand All @@ -45,8 +48,9 @@ import (
// Client contains methods to create Resource objects.
// tasks to Resource objects.
type Client struct {
rs *resources.Spec
httpClient *http.Client
rs *resources.Spec
httpClient *http.Client
cacheGetResource *filecache.Cache
}

// New creates a new Client with the given specification.
Expand All @@ -56,6 +60,7 @@ func New(rs *resources.Spec) *Client {
httpClient: &http.Client{
Timeout: 10 * time.Second,
},
cacheGetResource: rs.FileCaches.GetResourceCache(),
}
}

Expand Down Expand Up @@ -156,10 +161,7 @@ func (c *Client) FromRemote(uri string, options map[string]interface{}) (resourc

resourceID := helpers.HashString(uri, options)

// This caches to memory and will, in server mode, not be evicted unless the resourceID changes
// or the server restarts.
// There is ongoing work to improve this.
return c.rs.ResourceCache.GetOrCreate(resourceID, func() (resource.Resource, error) {
_, httpResponse, err := c.cacheGetResource.GetOrCreate(resourceID, func() (io.ReadCloser, error) {
method, reqBody, err := getMethodAndBody(options)
if err != nil {
return nil, errors.Wrapf(err, "failed to get method or body for resource %s", uri)
Expand Down Expand Up @@ -187,51 +189,68 @@ func (c *Client) FromRemote(uri string, options map[string]interface{}) (resourc
return nil, errors.Errorf("failed to retrieve remote resource: %s", http.StatusText(res.StatusCode))
}

body, err := ioutil.ReadAll(res.Body)
httpResponse, err := httputil.DumpResponse(res, true)
if err != nil {
return nil, errors.Wrapf(err, "failed to read remote resource %s", uri)
return nil, err
}

filename := path.Base(rURL.Path)
if _, params, _ := mime.ParseMediaType(res.Header.Get("Content-Disposition")); params != nil {
if _, ok := params["filename"]; ok {
filename = params["filename"]
}
}
return hugio.ToReadCloser(bytes.NewReader(httpResponse)), nil
})
if err != nil {
return nil, err
}
defer httpResponse.Close()

res, err := http.ReadResponse(bufio.NewReader(httpResponse), nil)
if err != nil {
return nil, err
}

body, err := ioutil.ReadAll(res.Body)
if err != nil {
return nil, errors.Wrapf(err, "failed to read remote resource %s", uri)
}

var contentType string
if arr, _ := mime.ExtensionsByType(res.Header.Get("Content-Type")); len(arr) == 1 {
contentType = arr[0]
filename := path.Base(rURL.Path)
if _, params, _ := mime.ParseMediaType(res.Header.Get("Content-Disposition")); params != nil {
if _, ok := params["filename"]; ok {
filename = params["filename"]
}
}

// If content type was not determined by header, look for a file extention
if contentType == "" {
if ext := path.Ext(filename); ext != "" {
contentType = ext
}
var contentType string
if arr, _ := mime.ExtensionsByType(res.Header.Get("Content-Type")); len(arr) == 1 {
contentType = arr[0]
}

// If content type was not determined by header, look for a file extention
if contentType == "" {
if ext := path.Ext(filename); ext != "" {
contentType = ext
}
}

// If content type was not determined by header or file extention, try using content itself
if contentType == "" {
if ct := http.DetectContentType(body); ct != "application/octet-stream" {
if arr, _ := mime.ExtensionsByType(ct); arr != nil {
contentType = arr[0]
}
// If content type was not determined by header or file extention, try using content itself
if contentType == "" {
if ct := http.DetectContentType(body); ct != "application/octet-stream" {
if arr, _ := mime.ExtensionsByType(ct); arr != nil {
contentType = arr[0]
}
}
}

resourceID = filename[:len(filename)-len(path.Ext(filename))] + "_" + resourceID + contentType
resourceID = filename[:len(filename)-len(path.Ext(filename))] + "_" + resourceID + contentType

return c.rs.New(
resources.ResourceSourceDescriptor{
Fs: c.rs.FileCaches.AssetsCache().Fs,
LazyPublish: true,
OpenReadSeekCloser: func() (hugio.ReadSeekCloser, error) {
return hugio.NewReadSeekerNoOpCloser(bytes.NewReader(body)), nil
},
RelTargetFilename: filepath.Clean(resourceID),
})

return c.rs.New(
resources.ResourceSourceDescriptor{
Fs: c.rs.FileCaches.AssetsCache().Fs,
LazyPublish: true,
OpenReadSeekCloser: func() (hugio.ReadSeekCloser, error) {
return hugio.NewReadSeekerNoOpCloser(bytes.NewReader(body)), nil
},
RelTargetFilename: filepath.Clean(resourceID),
})
})
}

func addDefaultHeaders(req *http.Request, accepts ...string) {
Expand Down

0 comments on commit 6675341

Please sign in to comment.