Skip to content

Commit

Permalink
Merge pull request #34 from frictionlessdata/fiz_load_zip_datapackage
Browse files Browse the repository at this point in the history
Fix loading remote zip datapackages
  • Loading branch information
danielfireman committed Apr 29, 2022
2 parents f9714f5 + ae7db7d commit a7c9ea1
Show file tree
Hide file tree
Showing 4 changed files with 52 additions and 31 deletions.
4 changes: 3 additions & 1 deletion .gitignore
Expand Up @@ -13,4 +13,6 @@
# Project-local glide cache, RE: https://github.com/Masterminds/glide/issues/736
.glide/

covprofile
covprofile

.DS_Store
35 changes: 24 additions & 11 deletions datapackage/package.go
Expand Up @@ -181,7 +181,7 @@ func (p *Package) Zip(path string) error {
fPaths := []string{descriptorPath}
for _, r := range p.resources {
for _, p := range r.path {
c, err := read(filepath.Join(r.basePath, p))
_, c, err := read(filepath.Join(r.basePath, p))
if err != nil {
return err
}
Expand Down Expand Up @@ -293,7 +293,7 @@ func FromString(in string, basePath string, loaders ...validator.RegistryLoader)
// Load the data package descriptor from the specified URL or file path.
// If path has the ".zip" extension, it will be saved in local filesystem and decompressed before loading.
func Load(path string, loaders ...validator.RegistryLoader) (*Package, error) {
contents, err := read(path)
localPath, contents, err := read(path)
if err != nil {
return nil, fmt.Errorf("error reading path contents (%s): %w", path, err)
}
Expand All @@ -305,34 +305,47 @@ func Load(path string, loaders ...validator.RegistryLoader) (*Package, error) {
if err != nil {
return nil, fmt.Errorf("error creating temporary directory: %w", err)
}
fNames, err := unzip(path, dir)
fNames, err := unzip(localPath, dir)
if err != nil {
return nil, fmt.Errorf("error unzipping path contents (%s): %w", path, err)
return nil, fmt.Errorf("error unzipping path contents (%s): %w", localPath, err)
}
if _, ok := fNames[descriptorFileNameWithinZip]; ok {
return Load(filepath.Join(dir, descriptorFileNameWithinZip), loaders...)
}
return nil, fmt.Errorf("zip file %s does not contain a file called %s", path, descriptorFileNameWithinZip)
return nil, fmt.Errorf("zip file %s does not contain a file called %s", localPath, descriptorFileNameWithinZip)
}

func read(path string) ([]byte, error) {
func read(path string) (string, []byte, error) {
if strings.HasPrefix(path, "http") {
resp, err := http.Get(path)
if err != nil {
return nil, fmt.Errorf("error performing HTTP GET(%s): %w", path, err)
return "", nil, fmt.Errorf("error performing HTTP GET(%s): %w", path, err)
}
defer resp.Body.Close()
buf, err := ioutil.ReadAll(resp.Body)
if err != nil {
return nil, fmt.Errorf("error reading response body contents (%s): %w", path, err)
return "", nil, fmt.Errorf("error reading response body contents (%s): %w", path, err)
}
return buf, nil
// Making sure zip file is materialized.
// This makes debugging easier.
localPath, err := func() (string, error) {
f, err := ioutil.TempFile("", "*.zip")
if err != nil {
return "", fmt.Errorf("error creating temp file to save zip (dir:%s): %w", os.TempDir(), err)
}
defer f.Close()
if _, err := f.Write(buf); err != nil {
return f.Name(), fmt.Errorf("error writing temp file to save zip (%s): %w", f.Name(), err)
}
return f.Name(), nil
}()
return localPath, buf, err
}
buf, err := ioutil.ReadFile(path)
if err != nil {
return nil, fmt.Errorf("error reading local file contents (%s): %w", path, err)
return "", nil, fmt.Errorf("error reading local file contents (%s): %w", path, err)
}
return buf, nil
return path, buf, nil
}

func unzip(archive, basePath string) (map[string]struct{}, error) {
Expand Down
44 changes: 25 additions & 19 deletions datapackage/package_test.go
Expand Up @@ -12,6 +12,7 @@ import (
"os"
"path/filepath"
"reflect"
"strconv"
"strings"
"testing"

Expand Down Expand Up @@ -456,26 +457,11 @@ func TestLoad(t *testing.T) {
})
t.Run("LocalZip", func(t *testing.T) {
is := is.New(t)
// Creating a zip file.
fName := filepath.Join(dir, "pkg.zip")
zipFile, err := os.Create(fName)
pkg, err := Load("test_package.zip", validator.InMemoryLoader())
is.NoErr(err)
defer zipFile.Close()

// Adding a datapackage.json file to the zip with proper contents.
w := zip.NewWriter(zipFile)
f, err := w.Create("datapackage.json")
is.NoErr(err)
_, err = f.Write([]byte(r1Str))
is.NoErr(err)
is.NoErr(w.Close())

// Load and check package.
pkg, err := Load(fName, validator.InMemoryLoader())
is.NoErr(err)
res := pkg.GetResource("res1")
is.Equal(res.name, "res1")
is.Equal(res.path, []string{"foo.csv"})
res := pkg.GetResource("books")
is.Equal(res.name, "books")
is.Equal(res.path, []string{"data.csv"})
})
t.Run("LocalZipWithSubdirs", func(t *testing.T) {
is := is.New(t)
Expand Down Expand Up @@ -549,6 +535,26 @@ func TestLoad(t *testing.T) {
})
}
})
t.Run("RemoteZip", func(t *testing.T) {
is := is.New(t)
ts := httptest.NewServer(http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) {
f, err := os.Open("test_package.zip")
is.NoErr(err)
defer f.Close()

stat, err := f.Stat()
is.NoErr(err)
w.Header().Set("Content-Type", "application/octet-stream")
w.Header().Set("Content-Length", strconv.FormatInt(stat.Size(), 10)) //Get file size as a string
io.Copy(w, f)
}))
defer ts.Close()
pkg, err := Load(ts.URL+"/package.zip", validator.InMemoryLoader())
is.NoErr(err)
res := pkg.GetResource("books")
is.Equal(res.name, "books")
is.Equal(res.path, []string{"data.csv"})
})
t.Run("InvalidPath", func(t *testing.T) {
_, err := Load("foobar", validator.InMemoryLoader())
if err == nil {
Expand Down
Binary file added datapackage/test_package.zip
Binary file not shown.

0 comments on commit a7c9ea1

Please sign in to comment.