From cdf2acbed177db22583836354b1b580a86f9306a Mon Sep 17 00:00:00 2001 From: Andrew Gaul Date: Tue, 8 Feb 2022 18:45:50 +0900 Subject: [PATCH] Store object data natively and metadata in xattr The latter remains a JSON-encoded blob but is now stored in file extended attributes, except on Windows where it is a separate file. This reduces memory usage and is much faster by avoiding JSON-encoding large objects. This enables a future commit to avoid reading the entire object, particularly for range requests. Note that this commit changes the on-disk format and is not compatible with previous data sets. Extended attributes have some caveats including lack of tmpfs and Windows support. References pkg/xattr#47. References #669. Fixes #671. --- fakestorage/bucket_test.go | 2 +- go.mod | 1 + go.sum | 3 +++ internal/backend/backend_test.go | 2 +- internal/backend/fs.go | 41 +++++++++++++++++++++++++------ internal/backend/xattr_darwin.go | 23 +++++++++++++++++ internal/backend/xattr_linux.go | 23 +++++++++++++++++ internal/backend/xattr_windows.go | 25 +++++++++++++++++++ internal/config/config_test.go | 10 ++++---- 9 files changed, 115 insertions(+), 15 deletions(-) create mode 100644 internal/backend/xattr_darwin.go create mode 100644 internal/backend/xattr_linux.go create mode 100644 internal/backend/xattr_windows.go diff --git a/fakestorage/bucket_test.go b/fakestorage/bucket_test.go index caf65f1e77..26bc169df6 100644 --- a/fakestorage/bucket_test.go +++ b/fakestorage/bucket_test.go @@ -228,7 +228,7 @@ func TestServerClientListObjects(t *testing.T) { {ObjectAttrs: ObjectAttrs{BucketName: "some-bucket", Name: "img/hi-res/party-02.jpg"}}, {ObjectAttrs: ObjectAttrs{BucketName: "some-bucket", Name: "img/hi-res/party-03.jpg"}}, } - dir, err := ioutil.TempDir("", "fakestorage-test-root-") + dir, err := ioutil.TempDir("/var/tmp", "fakestorage-test-root-") if err != nil { t.Fatal(err) } diff --git a/go.mod b/go.mod index 9b85910b91..adb2722d2d 100644 --- a/go.mod +++ b/go.mod @@ -6,6 +6,7 @@ require ( github.com/google/go-cmp v0.5.7 github.com/gorilla/handlers v1.5.1 github.com/gorilla/mux v1.8.0 + github.com/pkg/xattr v0.4.5 github.com/sirupsen/logrus v1.8.1 github.com/stretchr/testify v1.7.0 golang.org/x/oauth2 v0.0.0-20211104180415-d3ed0bb246c8 diff --git a/go.sum b/go.sum index 6be35c58d5..2c21904910 100644 --- a/go.sum +++ b/go.sum @@ -190,6 +190,8 @@ github.com/kr/pretty v0.1.0/go.mod h1:dAy3ld7l9f0ibDNOQOHHMYYIIbhfbHSm3C4ZsoJORN github.com/kr/pty v1.1.1/go.mod h1:pFQYn66WHrOpPYNljwOMqo10TkYh1fy3cYio2l3bCsQ= github.com/kr/text v0.1.0 h1:45sCR5RtlFHMR4UwH9sdQ5TC8v0qDQCHnXt+kaKSTVE= github.com/kr/text v0.1.0/go.mod h1:4Jbv+DJW3UT/LiOwJeYQe1efqtUx/iVham/4vfdArNI= +github.com/pkg/xattr v0.4.5 h1:P5SvUc1T07cHLto76ESJ+/x5kexU7s9127iVoeEW/hs= +github.com/pkg/xattr v0.4.5/go.mod h1:sBD3RAqlr8Q+RC3FutZcikpT8nyDrIEEBw2J744gVWs= github.com/pmezard/go-difflib v1.0.0 h1:4DBwDE0NGyQoBHbLQYPwSUPoCMWR5BEzIk/f1lZbAQM= github.com/pmezard/go-difflib v1.0.0/go.mod h1:iKH77koFhYxTK1pcRnkKkqfTogsbg7gZNVY4sRDYZ/4= github.com/prometheus/client_model v0.0.0-20190812154241-14fe0d1b01d4/go.mod h1:xMI15A0UPsDsEKsMN9yxemIoYk6Tm2C1GtYGdfGttqA= @@ -354,6 +356,7 @@ golang.org/x/sys v0.0.0-20200523222454-059865788121/go.mod h1:h1NjWce9XRLGQEsW7w golang.org/x/sys v0.0.0-20200803210538-64077c9b5642/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs= golang.org/x/sys v0.0.0-20200905004654-be1d3432aa8f/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs= golang.org/x/sys v0.0.0-20200930185726-fdedc70b468f/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs= +golang.org/x/sys v0.0.0-20201101102859-da207088b7d1/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs= golang.org/x/sys v0.0.0-20201119102817-f84b799fce68/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs= golang.org/x/sys v0.0.0-20201201145000-ef89a241ccb3/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs= golang.org/x/sys v0.0.0-20210104204734-6f8348627aad/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs= diff --git a/internal/backend/backend_test.go b/internal/backend/backend_test.go index efeffe77a7..363df473c5 100644 --- a/internal/backend/backend_test.go +++ b/internal/backend/backend_test.go @@ -15,7 +15,7 @@ import ( ) func makeStorageBackends(t *testing.T) (map[string]Storage, func()) { - tempDir, err := ioutil.TempDir(os.TempDir(), "fakegcstest") + tempDir, err := ioutil.TempDir("/var/tmp", "fakegcstest") if err != nil { t.Fatal(err) } diff --git a/internal/backend/fs.go b/internal/backend/fs.go index 26774e76ac..6fc7998d0f 100644 --- a/internal/backend/fs.go +++ b/internal/backend/fs.go @@ -134,13 +134,24 @@ func (s *storageFS) CreateObject(obj Object) (Object, error) { if err != nil { return Object{}, err } - file, err := os.OpenFile(filepath.Join(s.rootDir, url.PathEscape(obj.BucketName), url.PathEscape(obj.Name)), os.O_CREATE|os.O_WRONLY, 0o600) + + path := filepath.Join(s.rootDir, url.PathEscape(obj.BucketName), url.PathEscape(obj.Name)) + + if err = ioutil.WriteFile(path, obj.Content, 0o600); err != nil { + return Object{}, err + } + + // TODO: Handle if metadata is not present more gracefully? + encoded, err := json.Marshal(obj.ObjectAttrs) if err != nil { return Object{}, err } - defer file.Close() - err = json.NewEncoder(file).Encode(obj) - return obj, err + + if err = writeXattr(path, encoded); err != nil { + return Object{}, err + } + + return obj, nil } // ListObjects lists the objects in a given bucket with a given prefix and @@ -155,6 +166,9 @@ func (s *storageFS) ListObjects(bucketName string, prefix string, versions bool) } objects := []ObjectAttrs{} for _, info := range infos { + if isXattrFile(info.Name()) { + continue + } unescaped, err := url.PathUnescape(info.Name()) if err != nil { return nil, fmt.Errorf("failed to unescape object name %s: %w", info.Name(), err) @@ -186,16 +200,23 @@ func (s *storageFS) GetObjectWithGeneration(bucketName, objectName string, gener } func (s *storageFS) getObject(bucketName, objectName string) (Object, error) { - file, err := os.Open(filepath.Join(s.rootDir, url.PathEscape(bucketName), url.PathEscape(objectName))) + path := filepath.Join(s.rootDir, url.PathEscape(bucketName), url.PathEscape(objectName)) + + encoded, err := readXattr(path) if err != nil { return Object{}, err } - defer file.Close() + var obj Object - err = json.NewDecoder(file).Decode(&obj) + if err = json.Unmarshal(encoded, &obj.ObjectAttrs); err != nil { + return Object{}, err + } + + obj.Content, err = ioutil.ReadFile(path) if err != nil { return Object{}, err } + obj.Name = filepath.ToSlash(objectName) obj.BucketName = bucketName obj.Size = int64(len(obj.Content)) @@ -209,7 +230,11 @@ func (s *storageFS) DeleteObject(bucketName, objectName string) error { if objectName == "" { return errors.New("can't delete object with empty name") } - return os.Remove(filepath.Join(s.rootDir, url.PathEscape(bucketName), url.PathEscape(objectName))) + path := filepath.Join(s.rootDir, url.PathEscape(bucketName), url.PathEscape(objectName)) + if err := removeXattrFile(path); err != nil { + return err + } + return os.Remove(path) } // PatchObject patches the given object metadata. diff --git a/internal/backend/xattr_darwin.go b/internal/backend/xattr_darwin.go new file mode 100644 index 0000000000..271693db74 --- /dev/null +++ b/internal/backend/xattr_darwin.go @@ -0,0 +1,23 @@ +package backend + +import ( + "github.com/pkg/xattr" +) + +const XATTR_KEY = "user.metadata" + +func writeXattr(path string, encoded []byte) error { + return xattr.Set(path, XATTR_KEY, encoded) +} + +func readXattr(path string) ([]byte, error) { + return xattr.Get(path, XATTR_KEY) +} + +func isXattrFile(path string) bool { + return false +} + +func removeXattrFile(path string) error { + return nil +} diff --git a/internal/backend/xattr_linux.go b/internal/backend/xattr_linux.go new file mode 100644 index 0000000000..271693db74 --- /dev/null +++ b/internal/backend/xattr_linux.go @@ -0,0 +1,23 @@ +package backend + +import ( + "github.com/pkg/xattr" +) + +const XATTR_KEY = "user.metadata" + +func writeXattr(path string, encoded []byte) error { + return xattr.Set(path, XATTR_KEY, encoded) +} + +func readXattr(path string) ([]byte, error) { + return xattr.Get(path, XATTR_KEY) +} + +func isXattrFile(path string) bool { + return false +} + +func removeXattrFile(path string) error { + return nil +} diff --git a/internal/backend/xattr_windows.go b/internal/backend/xattr_windows.go new file mode 100644 index 0000000000..fd20eff067 --- /dev/null +++ b/internal/backend/xattr_windows.go @@ -0,0 +1,25 @@ +package backend + +import ( + "io/ioutil" + "os" + "strings" +) + +const XATTR_SUFFIX = ".metadata" + +func writeXattr(path string, encoded []byte) error { + return ioutil.WriteFile(path+XATTR_SUFFIX, encoded, 0o600) +} + +func readXattr(path string) ([]byte, error) { + return ioutil.ReadFile(path + XATTR_SUFFIX) +} + +func isXattrFile(path string) bool { + return strings.HasSuffix(path, XATTR_SUFFIX) +} + +func removeXattrFile(path string) error { + return os.Remove(path + XATTR_SUFFIX) +} diff --git a/internal/config/config_test.go b/internal/config/config_test.go index fd7ed676f4..d3706a4a35 100644 --- a/internal/config/config_test.go +++ b/internal/config/config_test.go @@ -25,7 +25,7 @@ func TestLoadConfig(t *testing.T) { name: "all parameters", args: []string{ "-backend", "memory", - "-filesystem-root", "/tmp/something", + "-filesystem-root", "/var/tmp/something", "-public-host", "127.0.0.1.nip.io:8443", "-external-url", "https://myhost.example.com:8443", "-cors-headers", "X-Goog-Meta-Uploader", @@ -42,7 +42,7 @@ func TestLoadConfig(t *testing.T) { expectedConfig: Config{ Seed: "/var/gcs", backend: "memory", - fsRoot: "/tmp/something", + fsRoot: "/var/tmp/something", publicHost: "127.0.0.1.nip.io:8443", externalURL: "https://myhost.example.com:8443", allowedCORSHeaders: []string{"X-Goog-Meta-Uploader"}, @@ -140,7 +140,7 @@ func TestToFakeGcsOptions(t *testing.T) { "filesystem", Config{ backend: "filesystem", - fsRoot: "/tmp/something", + fsRoot: "/var/tmp/something", publicHost: "127.0.0.1.nip.io:8443", externalURL: "https://myhost.example.com:8443", host: "0.0.0.0", @@ -154,7 +154,7 @@ func TestToFakeGcsOptions(t *testing.T) { bucketLocation: "US-EAST1", }, fakestorage.Options{ - StorageRoot: "/tmp/something", + StorageRoot: "/var/tmp/something", PublicHost: "127.0.0.1.nip.io:8443", ExternalURL: "https://myhost.example.com:8443", Host: "0.0.0.0", @@ -176,7 +176,7 @@ func TestToFakeGcsOptions(t *testing.T) { "memory", Config{ backend: "memory", - fsRoot: "/tmp/something", + fsRoot: "/var/tmp/something", publicHost: "127.0.0.1.nip.io:8443", externalURL: "https://myhost.example.com:8443", host: "0.0.0.0",