From c0760028f7f2234ce3bbb24412fbc771b61c22b2 Mon Sep 17 00:00:00 2001 From: Chris Date: Sat, 28 Jan 2023 10:12:42 +0100 Subject: [PATCH 1/9] Add bulk download feature to client --- pkg/paperless/download.go | 100 ++++++++++++++++++++++++++++++++++++++ 1 file changed, 100 insertions(+) create mode 100644 pkg/paperless/download.go diff --git a/pkg/paperless/download.go b/pkg/paperless/download.go new file mode 100644 index 0000000..29b53ef --- /dev/null +++ b/pkg/paperless/download.go @@ -0,0 +1,100 @@ +package paperless + +import ( + "bytes" + "context" + "encoding/json" + "fmt" + "io" + "net/http" + "os" + "strings" + + "github.com/go-logr/logr" +) + +type BulkDownloadContent string + +type BulkDownloadParams struct { + DocumentIDs []int + FollowFormatting bool + Content BulkDownloadContent +} + +const ( + BulkDownloadBoth BulkDownloadContent = "both" + BulkDownloadArchives BulkDownloadContent = "archive" + BulkDownloadOriginal BulkDownloadContent = "originals" +) + +// String implements fmt.Stringer. +func (c BulkDownloadContent) String() string { + return string(c) +} + +// BulkDownload downloads the documents identified by BulkDownloadParams.DocumentIDs and saves to the given targetPath. +// If BulkDownloadParams.DocumentIDs is empty, all documents will be downloaded. +// If targetPath is empty, it will use the suggested file name from Paperless in the current working dir. +func (clt *Client) BulkDownload(ctx context.Context, targetPath string, params BulkDownloadParams) error { + req, err := clt.makeBulkDownloadRequest(ctx, params) + if err != nil { + return err + } + + log := logr.FromContextOrDiscard(ctx) + log.V(1).Info("Awaiting response") + resp, err := clt.HttpClient.Do(req) + if err != nil { + return fmt.Errorf("request failed: %w", err) + } + defer resp.Body.Close() + + if resp.StatusCode != http.StatusOK { + b, _ := io.ReadAll(resp.Body) + return fmt.Errorf("request failed: %s: %s", resp.Status, string(b)) + } + + out, err := os.Create(getTargetPathOrFromHeader(targetPath, resp.Header)) + defer out.Close() + + log.V(1).Info("Writing download content to file", "file", out.Name()) + _, err = io.Copy(out, resp.Body) + if err != nil { + return fmt.Errorf("cannot read response body: %w", err) + } + return nil +} + +func (clt *Client) makeBulkDownloadRequest(ctx context.Context, params BulkDownloadParams) (*http.Request, error) { + log := logr.FromContextOrDiscard(ctx) + + js := map[string]any{ + "content": params.Content, + "follow_formatting": params.FollowFormatting, + "documents": params.DocumentIDs, + } + marshal, err := json.Marshal(js) + if err != nil { + return nil, fmt.Errorf("cannot serialize to JSON: %w", err) + } + body := bytes.NewReader(marshal) + + path := clt.URL + "/api/documents/bulk_download/" + log.V(1).Info("Preparing request", "path", path) + req, err := http.NewRequestWithContext(ctx, "POST", path, body) + if err != nil { + return nil, fmt.Errorf("cannot prepare request: %w", err) + } + clt.setAuth(req) + req.Header.Set("Content-Type", "application/json") + return req, nil +} + +func getTargetPathOrFromHeader(v string, header http.Header) string { + if v != "" { + return v + } + raw := header.Get("content-disposition") + fileName := strings.TrimSuffix(strings.TrimPrefix(raw, `attachment; filename="`), `"`) + return fileName +} From 3d4d5d9b3cc1b7a09519b6a4cd7b19257458ae94 Mon Sep 17 00:00:00 2001 From: Chris Date: Sat, 28 Jan 2023 11:18:00 +0100 Subject: [PATCH 2/9] Add func to Query documents --- pkg/paperless/document.go | 14 ++++++ pkg/paperless/query.go | 93 +++++++++++++++++++++++++++++++++++++++ 2 files changed, 107 insertions(+) create mode 100644 pkg/paperless/document.go create mode 100644 pkg/paperless/query.go diff --git a/pkg/paperless/document.go b/pkg/paperless/document.go new file mode 100644 index 0000000..9a2075d --- /dev/null +++ b/pkg/paperless/document.go @@ -0,0 +1,14 @@ +package paperless + +type Document struct { + // ID of the document, read-only. + ID int `json:"id"` +} + +func MapToDocumentIDs(docs []Document) []int { + ids := make([]int, len(docs)) + for i := 0; i < len(docs); i++ { + ids[i] = docs[i].ID + } + return ids +} diff --git a/pkg/paperless/query.go b/pkg/paperless/query.go new file mode 100644 index 0000000..52f794e --- /dev/null +++ b/pkg/paperless/query.go @@ -0,0 +1,93 @@ +package paperless + +import ( + "context" + "encoding/json" + "fmt" + "io" + "net/http" + "net/url" + "reflect" + "strconv" + + "github.com/go-logr/logr" +) + +type QueryParams struct { + TruncateContent bool `param:"truncate_content"` +} + +type QueryResults struct { + Results []Document `json:"results,omitempty"` +} + +func (clt *Client) QueryDocuments(ctx context.Context, params QueryParams) ([]Document, error) { + req, err := clt.makeQueryRequest(ctx, params) + if err != nil { + return nil, err + } + + log := logr.FromContextOrDiscard(ctx) + log.V(1).Info("Awaiting response") + resp, err := clt.HttpClient.Do(req) + if err != nil { + return nil, fmt.Errorf("request failed: %w", err) + } + defer resp.Body.Close() + + b, err := io.ReadAll(resp.Body) + if err != nil { + return nil, fmt.Errorf("cannot read body: %w", err) + } + log.V(2).Info("Read response", "body", string(b)) + if resp.StatusCode != http.StatusOK { + return nil, fmt.Errorf("request failed: %s: %s", resp.Status, string(b)) + } + + result := QueryResults{} + parseErr := json.Unmarshal(b, &result) + if parseErr != nil { + return nil, fmt.Errorf("cannot parse JSON: %w", parseErr) + } + log.V(1).Info("Parsed response", "result", result) + return result.Results, nil +} + +func (clt *Client) makeQueryRequest(ctx context.Context, params QueryParams) (*http.Request, error) { + log := logr.FromContextOrDiscard(ctx) + + values := paramsToValues(params) + values.Set("ordering", "id") + + path := clt.URL + "/api/documents/?" + values.Encode() + log.V(1).Info("Preparing request", "path", path) + req, err := http.NewRequestWithContext(ctx, "GET", path, nil) + if err != nil { + return nil, fmt.Errorf("cannot prepare request: %w", err) + } + clt.setAuth(req) + req.Header.Set("Content-Type", "application/json") + return req, nil +} + +func paramsToValues(params QueryParams) url.Values { + values := url.Values{} + typ := reflect.TypeOf(params) + value := reflect.ValueOf(params) + for i := 0; i < typ.NumField(); i++ { + structField := typ.Field(i) + tag := structField.Tag.Get("param") + field := value.Field(i) + paramValue := "" + switch field.Kind() { + case reflect.Bool: + paramValue = strconv.FormatBool(field.Bool()) + case reflect.String: + paramValue = field.String() + default: + panic(fmt.Errorf("not implemented type: %s", field.Kind())) + } + values.Set(tag, paramValue) + } + return values +} From 8ad8b03cfe60323d9132ca5a9aca8cab61380d73 Mon Sep 17 00:00:00 2001 From: Chris Date: Sat, 28 Jan 2023 11:13:44 +0100 Subject: [PATCH 3/9] Add bulk-download command --- .gitignore | 2 ++ README.md | 1 + bulk_download_command.go | 59 +++++++++++++++++++++++++++++++++++++++ flags.go | 32 +++++++++++++++++++++ main.go | 1 + pkg/paperless/download.go | 3 +- test/docker-compose.yml | 1 + 7 files changed, 97 insertions(+), 2 deletions(-) create mode 100644 bulk_download_command.go diff --git a/.gitignore b/.gitignore index a9de979..8c36e56 100644 --- a/.gitignore +++ b/.gitignore @@ -8,3 +8,5 @@ # work /.work/ + +/documents.zip diff --git a/README.md b/README.md index acaeb1c..09ea7ff 100644 --- a/README.md +++ b/README.md @@ -6,6 +6,7 @@ CLI tool to interact with paperless-ngx remote API - `upload`: Uploads local document(s) to Paperless instance - `consume`: Consumes a local directory and uploads each file to Paperless instance. The files will be deleted once uploaded. +- `bulk-download`: ## Installation diff --git a/bulk_download_command.go b/bulk_download_command.go new file mode 100644 index 0000000..0fff32b --- /dev/null +++ b/bulk_download_command.go @@ -0,0 +1,59 @@ +package main + +import ( + "github.com/ccremer/clustercode/pkg/paperless" + "github.com/go-logr/logr" + "github.com/urfave/cli/v2" +) + +type BulkDownloadCommand struct { + cli.Command + + PaperlessURL string + PaperlessToken string + PaperlessUser string + + TargetPath string + Content string +} + +func newBulkDownloadCommand() *BulkDownloadCommand { + c := &BulkDownloadCommand{} + c.Command = cli.Command{ + Name: "bulk-download", + Usage: "Downloads multiple documents at once", + Action: actions(LogMetadata, c.Action), + Flags: []cli.Flag{ + newURLFlag(&c.PaperlessURL), + newUsernameFlag(&c.PaperlessUser), + newTokenFlag(&c.PaperlessToken), + newTargetPathFlag(&c.TargetPath), + newDownloadContentFlag(&c.Content), + }, + } + return c +} + +func (c *BulkDownloadCommand) Action(ctx *cli.Context) error { + log := logr.FromContextOrDiscard(ctx.Context) + + log.V(1) + clt := paperless.NewClient(c.PaperlessURL, c.PaperlessUser, c.PaperlessToken) + documents, queryErr := clt.QueryDocuments(ctx.Context, paperless.QueryParams{ + TruncateContent: true, + }) + if queryErr != nil { + return queryErr + } + documentIDs := paperless.MapToDocumentIDs(documents) + downloadErr := clt.BulkDownload(ctx.Context, c.TargetPath, paperless.BulkDownloadParams{ + FollowFormatting: true, + Content: paperless.BulkDownloadContent(c.Content), + DocumentIDs: documentIDs, + }) + if downloadErr != nil { + return downloadErr + } + log.Info("Downloaded zip archive") + return downloadErr +} diff --git a/flags.go b/flags.go index 344b485..a73fbb7 100644 --- a/flags.go +++ b/flags.go @@ -2,8 +2,10 @@ package main import ( "fmt" + "strings" "time" + "github.com/ccremer/clustercode/pkg/paperless" "github.com/urfave/cli/v2" ) @@ -114,6 +116,36 @@ func newConsumeDelayFlag(dest *time.Duration) *cli.DurationFlag { } } +func newTargetPathFlag(dest *string) *cli.StringFlag { + return &cli.StringFlag{ + Name: "target-path", EnvVars: []string{"DOWNLOAD_TARGET_PATH"}, + Usage: "target file path where documents are downloaded.", + DefaultText: "default file name in current working directory", + Destination: dest, + } +} + +func newDownloadContentFlag(dest *string) *cli.StringFlag { + return &cli.StringFlag{ + Name: "content", EnvVars: []string{"DOWNLOAD_CONTENT"}, + Usage: "selection of document variant.", + Value: paperless.BulkDownloadArchives.String(), + Destination: dest, + Action: func(ctx *cli.Context, s string) error { + enum := []string{ + paperless.BulkDownloadArchives.String(), + paperless.BulkDownloadOriginal.String(), + paperless.BulkDownloadBoth.String()} + for _, key := range enum { + if s == key { + return nil + } + } + return fmt.Errorf("parameter %q must be one of [%s]", "content", strings.Join(enum, ", ")) + }, + } +} + func checkEmptyString(flagName string) func(*cli.Context, string) error { return func(ctx *cli.Context, s string) error { if s == "" { diff --git a/main.go b/main.go index 9b2bdc7..772aa6e 100644 --- a/main.go +++ b/main.go @@ -43,6 +43,7 @@ func NewApp() *cli.App { }, Commands: []*cli.Command{ &newUploadCommand().Command, + &newBulkDownloadCommand().Command, &newConsumeCommand().Command, }, } diff --git a/pkg/paperless/download.go b/pkg/paperless/download.go index 29b53ef..d852eee 100644 --- a/pkg/paperless/download.go +++ b/pkg/paperless/download.go @@ -33,7 +33,6 @@ func (c BulkDownloadContent) String() string { } // BulkDownload downloads the documents identified by BulkDownloadParams.DocumentIDs and saves to the given targetPath. -// If BulkDownloadParams.DocumentIDs is empty, all documents will be downloaded. // If targetPath is empty, it will use the suggested file name from Paperless in the current working dir. func (clt *Client) BulkDownload(ctx context.Context, targetPath string, params BulkDownloadParams) error { req, err := clt.makeBulkDownloadRequest(ctx, params) @@ -80,7 +79,7 @@ func (clt *Client) makeBulkDownloadRequest(ctx context.Context, params BulkDownl body := bytes.NewReader(marshal) path := clt.URL + "/api/documents/bulk_download/" - log.V(1).Info("Preparing request", "path", path) + log.V(1).Info("Preparing request", "path", path, "document_ids", params.DocumentIDs) req, err := http.NewRequestWithContext(ctx, "POST", path, body) if err != nil { return nil, fmt.Errorf("cannot prepare request: %w", err) diff --git a/test/docker-compose.yml b/test/docker-compose.yml index 60219fd..c9b1224 100644 --- a/test/docker-compose.yml +++ b/test/docker-compose.yml @@ -42,6 +42,7 @@ services: PAPERLESS_REDIS: redis://broker:6379 PAPERLESS_ADMIN_USER: admin PAPERLESS_ADMIN_PASSWORD: admin + PAPERLESS_FILENAME_FORMAT: "{created_year}/{correspondent}/{title}" volumes: redisdata: From 63a503cbd4a4e359fc18827f9ad12d4c94a6dace Mon Sep 17 00:00:00 2001 From: Chris Date: Sun, 29 Jan 2023 09:38:15 +0100 Subject: [PATCH 4/9] Add pkg to unzip a file --- go.mod | 4 ++ go.sum | 3 ++ pkg/archive/testdata/unzip.zip | Bin 0 -> 539 bytes pkg/archive/unzip.go | 70 +++++++++++++++++++++++++++++++++ pkg/archive/unzip_test.go | 28 +++++++++++++ 5 files changed, 105 insertions(+) create mode 100644 pkg/archive/testdata/unzip.zip create mode 100644 pkg/archive/unzip.go create mode 100644 pkg/archive/unzip_test.go diff --git a/go.mod b/go.mod index 4e2b2fc..6c6d3e5 100644 --- a/go.mod +++ b/go.mod @@ -7,6 +7,7 @@ require ( github.com/fsnotify/fsnotify v1.6.0 github.com/go-logr/logr v1.2.3 github.com/pterm/pterm v0.12.51 + github.com/stretchr/testify v1.8.1 github.com/urfave/cli/v2 v2.23.7 ) @@ -15,9 +16,11 @@ require ( atomicgo.dev/keyboard v0.2.8 // indirect github.com/containerd/console v1.0.3 // indirect github.com/cpuguy83/go-md2man/v2 v2.0.2 // indirect + github.com/davecgh/go-spew v1.1.1 // indirect github.com/gookit/color v1.5.2 // indirect github.com/lithammer/fuzzysearch v1.1.5 // indirect github.com/mattn/go-runewidth v0.0.14 // indirect + github.com/pmezard/go-difflib v1.0.0 // indirect github.com/rivo/uniseg v0.2.0 // indirect github.com/russross/blackfriday/v2 v2.1.0 // indirect github.com/xo/terminfo v0.0.0-20210125001918-ca9a967f8778 // indirect @@ -25,4 +28,5 @@ require ( golang.org/x/sys v0.0.0-20220908164124-27713097b956 // indirect golang.org/x/term v0.0.0-20210927222741-03fcf44c2211 // indirect golang.org/x/text v0.4.0 // indirect + gopkg.in/yaml.v3 v3.0.1 // indirect ) diff --git a/go.sum b/go.sum index c904c01..08d2fb6 100644 --- a/go.sum +++ b/go.sum @@ -38,8 +38,10 @@ github.com/klauspost/cpuid/v2 v2.0.12/go.mod h1:g2LTdtYhdyuGPqyWyv7qRAmj1WBqxuOb github.com/klauspost/cpuid/v2 v2.1.0/go.mod h1:RVVoqg1df56z8g3pUjL/3lE5UfnlrJX8tyFgg4nqhuY= github.com/klauspost/cpuid/v2 v2.2.0 h1:4ZexSFt8agMNzNisrsilL6RClWDC5YJnLHNIfTy4iuc= github.com/klauspost/cpuid/v2 v2.2.0/go.mod h1:RVVoqg1df56z8g3pUjL/3lE5UfnlrJX8tyFgg4nqhuY= +github.com/kr/pretty v0.1.0 h1:L/CwN0zerZDmRFUapSPitk6f+Q3+0za1rQkzVuMiMFI= github.com/kr/pretty v0.1.0/go.mod h1:dAy3ld7l9f0ibDNOQOHHMYYIIbhfbHSm3C4ZsoJORNo= github.com/kr/pty v1.1.1/go.mod h1:pFQYn66WHrOpPYNljwOMqo10TkYh1fy3cYio2l3bCsQ= +github.com/kr/text v0.1.0 h1:45sCR5RtlFHMR4UwH9sdQ5TC8v0qDQCHnXt+kaKSTVE= github.com/kr/text v0.1.0/go.mod h1:4Jbv+DJW3UT/LiOwJeYQe1efqtUx/iVham/4vfdArNI= github.com/lithammer/fuzzysearch v1.1.5 h1:Ag7aKU08wp0R9QCfF4GoGST9HbmAIeLP7xwMrOBEp1c= github.com/lithammer/fuzzysearch v1.1.5/go.mod h1:1R1LRNk7yKid1BaQkmuLQaHruxcC4HmAH30Dh61Ih1Q= @@ -116,6 +118,7 @@ golang.org/x/tools v0.0.0-20191119224855-298f0cb1881e/go.mod h1:b+2E5dAYhXwXZwtn golang.org/x/tools v0.1.12/go.mod h1:hNGJHUnrk76NpqgfD5Aqm5Crs+Hm0VOH/i9J2+nxYbc= golang.org/x/xerrors v0.0.0-20190717185122-a985d3407aa7/go.mod h1:I/5z698sn9Ka8TeJc9MKroUUfqBBauWjQqLJ2OPfmY0= gopkg.in/check.v1 v0.0.0-20161208181325-20d25e280405/go.mod h1:Co6ibVJAznAaIkqp8huTwlJQCZ016jof/cbN4VW5Yz0= +gopkg.in/check.v1 v1.0.0-20190902080502-41f04d3bba15 h1:YR8cESwS4TdDjEe65xsg0ogRM/Nc3DYOhEAlW+xobZo= gopkg.in/check.v1 v1.0.0-20190902080502-41f04d3bba15/go.mod h1:Co6ibVJAznAaIkqp8huTwlJQCZ016jof/cbN4VW5Yz0= gopkg.in/yaml.v2 v2.2.2/go.mod h1:hI93XBmqTisBFMUTm0b8Fm+jr3Dg1NNxqwp+5A1VGuI= gopkg.in/yaml.v2 v2.2.4/go.mod h1:hI93XBmqTisBFMUTm0b8Fm+jr3Dg1NNxqwp+5A1VGuI= diff --git a/pkg/archive/testdata/unzip.zip b/pkg/archive/testdata/unzip.zip new file mode 100644 index 0000000000000000000000000000000000000000..c3556ce5e8ffd35465a6b3341efccecf82651220 GIT binary patch literal 539 zcmWIWW@h1H0D+m_wqalfl;CHOVQ|STQt-@Ea4bsB$Sh0M4-MgDV0O~GmJGtB72FJr zEH9WD7{EjTib3;DKDF=y4FX|lOoM_;lN5kv>!oGpq{0mi#5A-xKQ~n&IX|x?HLrvV zVM7zpshSJ#Zw5IWgn5BBl;juWq?V=TAj}j7nx}+mW=MX4f)7X~&_G5eIc8kGmjD_J z0s;(w9YHikSg=CE0wWwy4MhzJh@p%?3 Date: Sun, 29 Jan 2023 11:04:06 +0100 Subject: [PATCH 5/9] Add flag to unzip downloaded file --- .gitignore | 1 + bulk_download_command.go | 55 +++++++++++++++++++++++++++++++++++---- flags.go | 10 ++++++- pkg/paperless/download.go | 19 +++----------- 4 files changed, 63 insertions(+), 22 deletions(-) diff --git a/.gitignore b/.gitignore index 8c36e56..a58ccd1 100644 --- a/.gitignore +++ b/.gitignore @@ -10,3 +10,4 @@ /.work/ /documents.zip +/documents diff --git a/bulk_download_command.go b/bulk_download_command.go index 0fff32b..2b82230 100644 --- a/bulk_download_command.go +++ b/bulk_download_command.go @@ -1,6 +1,10 @@ package main import ( + "fmt" + "os" + + "github.com/ccremer/clustercode/pkg/archive" "github.com/ccremer/clustercode/pkg/paperless" "github.com/go-logr/logr" "github.com/urfave/cli/v2" @@ -13,8 +17,9 @@ type BulkDownloadCommand struct { PaperlessToken string PaperlessUser string - TargetPath string - Content string + TargetPath string + Content string + UnzipEnabled bool } func newBulkDownloadCommand() *BulkDownloadCommand { @@ -29,6 +34,7 @@ func newBulkDownloadCommand() *BulkDownloadCommand { newTokenFlag(&c.PaperlessToken), newTargetPathFlag(&c.TargetPath), newDownloadContentFlag(&c.Content), + newUnzipFlag(&c.UnzipEnabled), }, } return c @@ -39,6 +45,8 @@ func (c *BulkDownloadCommand) Action(ctx *cli.Context) error { log.V(1) clt := paperless.NewClient(c.PaperlessURL, c.PaperlessUser, c.PaperlessToken) + + log.Info("Getting list of documents") documents, queryErr := clt.QueryDocuments(ctx.Context, paperless.QueryParams{ TruncateContent: true, }) @@ -46,7 +54,15 @@ func (c *BulkDownloadCommand) Action(ctx *cli.Context) error { return queryErr } documentIDs := paperless.MapToDocumentIDs(documents) - downloadErr := clt.BulkDownload(ctx.Context, c.TargetPath, paperless.BulkDownloadParams{ + + tmpFile, createTempErr := os.CreateTemp(os.TempDir(), "paperless-bulk-download-") + if createTempErr != nil { + return fmt.Errorf("cannot open temporary file: %w", createTempErr) + } + defer os.Remove(tmpFile.Name()) // cleanup if not renamed + + log.Info("Downloading documents") + downloadErr := clt.BulkDownload(ctx.Context, tmpFile, paperless.BulkDownloadParams{ FollowFormatting: true, Content: paperless.BulkDownloadContent(c.Content), DocumentIDs: documentIDs, @@ -54,6 +70,35 @@ func (c *BulkDownloadCommand) Action(ctx *cli.Context) error { if downloadErr != nil { return downloadErr } - log.Info("Downloaded zip archive") - return downloadErr + + if c.UnzipEnabled { + return c.unzip(ctx, tmpFile) + } + return c.move(ctx, tmpFile) +} + +func (c *BulkDownloadCommand) unzip(ctx *cli.Context, tmpFile *os.File) error { + log := logr.FromContextOrDiscard(ctx.Context) + downloadFilePath := c.TargetPath + if c.TargetPath == "" { + downloadFilePath = "documents" + } + if unzipErr := archive.Unzip(ctx.Context, tmpFile.Name(), downloadFilePath); unzipErr != nil { + return fmt.Errorf("cannot unzip file %q to %q: %w", tmpFile.Name(), downloadFilePath, unzipErr) + } + log.Info("Unzipped archive to dir", "dir", downloadFilePath) + return nil +} + +func (c *BulkDownloadCommand) move(ctx *cli.Context, tmpFile *os.File) error { + log := logr.FromContextOrDiscard(ctx.Context) + downloadFilePath := c.TargetPath + if c.TargetPath == "" { + downloadFilePath = "documents.zip" + } + if renameErr := os.Rename(tmpFile.Name(), downloadFilePath); renameErr != nil { + return fmt.Errorf("cannot move temp file: %w", renameErr) + } + log.Info("Downloaded zip archive", "file", downloadFilePath) + return nil } diff --git a/flags.go b/flags.go index a73fbb7..4072cf6 100644 --- a/flags.go +++ b/flags.go @@ -120,7 +120,7 @@ func newTargetPathFlag(dest *string) *cli.StringFlag { return &cli.StringFlag{ Name: "target-path", EnvVars: []string{"DOWNLOAD_TARGET_PATH"}, Usage: "target file path where documents are downloaded.", - DefaultText: "default file name in current working directory", + DefaultText: "documents.zip", Destination: dest, } } @@ -146,6 +146,14 @@ func newDownloadContentFlag(dest *string) *cli.StringFlag { } } +func newUnzipFlag(dest *bool) *cli.BoolFlag { + return &cli.BoolFlag{ + Name: "unzip", EnvVars: []string{"DOWNLOAD_UNZIP"}, + Usage: "unzip the downloaded file.", + Destination: dest, + } +} + func checkEmptyString(flagName string) func(*cli.Context, string) error { return func(ctx *cli.Context, s string) error { if s == "" { diff --git a/pkg/paperless/download.go b/pkg/paperless/download.go index d852eee..e986951 100644 --- a/pkg/paperless/download.go +++ b/pkg/paperless/download.go @@ -8,7 +8,6 @@ import ( "io" "net/http" "os" - "strings" "github.com/go-logr/logr" ) @@ -34,7 +33,7 @@ func (c BulkDownloadContent) String() string { // BulkDownload downloads the documents identified by BulkDownloadParams.DocumentIDs and saves to the given targetPath. // If targetPath is empty, it will use the suggested file name from Paperless in the current working dir. -func (clt *Client) BulkDownload(ctx context.Context, targetPath string, params BulkDownloadParams) error { +func (clt *Client) BulkDownload(ctx context.Context, targetFile *os.File, params BulkDownloadParams) error { req, err := clt.makeBulkDownloadRequest(ctx, params) if err != nil { return err @@ -53,11 +52,8 @@ func (clt *Client) BulkDownload(ctx context.Context, targetPath string, params B return fmt.Errorf("request failed: %s: %s", resp.Status, string(b)) } - out, err := os.Create(getTargetPathOrFromHeader(targetPath, resp.Header)) - defer out.Close() - - log.V(1).Info("Writing download content to file", "file", out.Name()) - _, err = io.Copy(out, resp.Body) + log.V(1).Info("Writing download content to file", "file", targetFile.Name()) + _, err = io.Copy(targetFile, resp.Body) if err != nil { return fmt.Errorf("cannot read response body: %w", err) } @@ -88,12 +84,3 @@ func (clt *Client) makeBulkDownloadRequest(ctx context.Context, params BulkDownl req.Header.Set("Content-Type", "application/json") return req, nil } - -func getTargetPathOrFromHeader(v string, header http.Header) string { - if v != "" { - return v - } - raw := header.Get("content-disposition") - fileName := strings.TrimSuffix(strings.TrimPrefix(raw, `attachment; filename="`), `"`) - return fileName -} From 6e349a545d2c522f609d704a282b393c0f6847a1 Mon Sep 17 00:00:00 2001 From: Chris Date: Sun, 29 Jan 2023 10:56:15 +0100 Subject: [PATCH 6/9] Add overwrite flag and abort if target file exists --- bulk_download_command.go | 44 +++++++++++++++++++++++++++++----------- flags.go | 8 ++++++++ 2 files changed, 40 insertions(+), 12 deletions(-) diff --git a/bulk_download_command.go b/bulk_download_command.go index 2b82230..9c79c62 100644 --- a/bulk_download_command.go +++ b/bulk_download_command.go @@ -17,9 +17,10 @@ type BulkDownloadCommand struct { PaperlessToken string PaperlessUser string - TargetPath string - Content string - UnzipEnabled bool + TargetPath string + Content string + UnzipEnabled bool + OverwriteExistingTarget bool } func newBulkDownloadCommand() *BulkDownloadCommand { @@ -35,6 +36,7 @@ func newBulkDownloadCommand() *BulkDownloadCommand { newTargetPathFlag(&c.TargetPath), newDownloadContentFlag(&c.Content), newUnzipFlag(&c.UnzipEnabled), + newOverwriteFlag(&c.OverwriteExistingTarget), }, } return c @@ -43,7 +45,9 @@ func newBulkDownloadCommand() *BulkDownloadCommand { func (c *BulkDownloadCommand) Action(ctx *cli.Context) error { log := logr.FromContextOrDiscard(ctx.Context) - log.V(1) + if prepareErr := c.prepareTarget(); prepareErr != nil { + return prepareErr + } clt := paperless.NewClient(c.PaperlessURL, c.PaperlessUser, c.PaperlessToken) log.Info("Getting list of documents") @@ -79,10 +83,7 @@ func (c *BulkDownloadCommand) Action(ctx *cli.Context) error { func (c *BulkDownloadCommand) unzip(ctx *cli.Context, tmpFile *os.File) error { log := logr.FromContextOrDiscard(ctx.Context) - downloadFilePath := c.TargetPath - if c.TargetPath == "" { - downloadFilePath = "documents" - } + downloadFilePath := c.getTargetPath() if unzipErr := archive.Unzip(ctx.Context, tmpFile.Name(), downloadFilePath); unzipErr != nil { return fmt.Errorf("cannot unzip file %q to %q: %w", tmpFile.Name(), downloadFilePath, unzipErr) } @@ -92,13 +93,32 @@ func (c *BulkDownloadCommand) unzip(ctx *cli.Context, tmpFile *os.File) error { func (c *BulkDownloadCommand) move(ctx *cli.Context, tmpFile *os.File) error { log := logr.FromContextOrDiscard(ctx.Context) - downloadFilePath := c.TargetPath - if c.TargetPath == "" { - downloadFilePath = "documents.zip" - } + downloadFilePath := c.getTargetPath() if renameErr := os.Rename(tmpFile.Name(), downloadFilePath); renameErr != nil { return fmt.Errorf("cannot move temp file: %w", renameErr) } log.Info("Downloaded zip archive", "file", downloadFilePath) return nil } + +func (c *BulkDownloadCommand) getTargetPath() string { + if c.TargetPath != "" { + return c.TargetPath + } + if c.UnzipEnabled { + return "documents" + } + return "documents.zip" +} + +func (c *BulkDownloadCommand) prepareTarget() error { + target := c.getTargetPath() + if c.OverwriteExistingTarget { + return os.RemoveAll(target) + } + _, err := os.Stat(target) + if err != nil && os.IsNotExist(err) { + return nil + } + return fmt.Errorf("target %q exists, abort", target) +} diff --git a/flags.go b/flags.go index 4072cf6..9c5218f 100644 --- a/flags.go +++ b/flags.go @@ -154,6 +154,14 @@ func newUnzipFlag(dest *bool) *cli.BoolFlag { } } +func newOverwriteFlag(dest *bool) *cli.BoolFlag { + return &cli.BoolFlag{ + Name: "overwrite", EnvVars: []string{"DOWNLOAD_OVERWRITE"}, + Usage: "deletes existing file(s) before downloading.", + Destination: dest, + } +} + func checkEmptyString(flagName string) func(*cli.Context, string) error { return func(ctx *cli.Context, s string) error { if s == "" { From 58d6d154f2e84d66cf474e96cf56154608fc2e44 Mon Sep 17 00:00:00 2001 From: Chris Date: Sun, 29 Jan 2023 11:06:29 +0100 Subject: [PATCH 7/9] Update readme --- README.md | 4 ++-- bulk_download_command.go | 2 +- 2 files changed, 3 insertions(+), 3 deletions(-) diff --git a/README.md b/README.md index 09ea7ff..39d438c 100644 --- a/README.md +++ b/README.md @@ -4,9 +4,9 @@ CLI tool to interact with paperless-ngx remote API ## Subcommands -- `upload`: Uploads local document(s) to Paperless instance +- `upload`: Uploads local document(s) to Paperless instance. - `consume`: Consumes a local directory and uploads each file to Paperless instance. The files will be deleted once uploaded. -- `bulk-download`: +- `bulk-download`: Downloads all documents at once. ## Installation diff --git a/bulk_download_command.go b/bulk_download_command.go index 9c79c62..e9776ef 100644 --- a/bulk_download_command.go +++ b/bulk_download_command.go @@ -27,7 +27,7 @@ func newBulkDownloadCommand() *BulkDownloadCommand { c := &BulkDownloadCommand{} c.Command = cli.Command{ Name: "bulk-download", - Usage: "Downloads multiple documents at once", + Usage: "Downloads all documents at once", Action: actions(LogMetadata, c.Action), Flags: []cli.Flag{ newURLFlag(&c.PaperlessURL), From 9c2cbd7bb67790705195a009c22e944623093b2e Mon Sep 17 00:00:00 2001 From: Chris Date: Sun, 29 Jan 2023 11:40:46 +0100 Subject: [PATCH 8/9] Set page size to 100 --- bulk_download_command.go | 2 ++ pkg/paperless/query.go | 7 +++++-- 2 files changed, 7 insertions(+), 2 deletions(-) diff --git a/bulk_download_command.go b/bulk_download_command.go index e9776ef..b3f174d 100644 --- a/bulk_download_command.go +++ b/bulk_download_command.go @@ -53,6 +53,8 @@ func (c *BulkDownloadCommand) Action(ctx *cli.Context) error { log.Info("Getting list of documents") documents, queryErr := clt.QueryDocuments(ctx.Context, paperless.QueryParams{ TruncateContent: true, + Ordering: "id", + PageSize: 100, }) if queryErr != nil { return queryErr diff --git a/pkg/paperless/query.go b/pkg/paperless/query.go index 52f794e..d97d260 100644 --- a/pkg/paperless/query.go +++ b/pkg/paperless/query.go @@ -14,7 +14,9 @@ import ( ) type QueryParams struct { - TruncateContent bool `param:"truncate_content"` + TruncateContent bool `param:"truncate_content"` + Ordering string `param:"ordering"` + PageSize int `param:"page_size"` } type QueryResults struct { @@ -57,7 +59,6 @@ func (clt *Client) makeQueryRequest(ctx context.Context, params QueryParams) (*h log := logr.FromContextOrDiscard(ctx) values := paramsToValues(params) - values.Set("ordering", "id") path := clt.URL + "/api/documents/?" + values.Encode() log.V(1).Info("Preparing request", "path", path) @@ -84,6 +85,8 @@ func paramsToValues(params QueryParams) url.Values { paramValue = strconv.FormatBool(field.Bool()) case reflect.String: paramValue = field.String() + case reflect.Int: + paramValue = strconv.FormatInt(field.Int(), 10) default: panic(fmt.Errorf("not implemented type: %s", field.Kind())) } From 15fda1885d6837277b228d2060468bff01ee1b17 Mon Sep 17 00:00:00 2001 From: Chris Date: Sun, 29 Jan 2023 12:08:39 +0100 Subject: [PATCH 9/9] Implement pagination in queries --- pkg/paperless/query.go | 76 ++++++++++++++++++++++++++++++------------ 1 file changed, 55 insertions(+), 21 deletions(-) diff --git a/pkg/paperless/query.go b/pkg/paperless/query.go index d97d260..94b6174 100644 --- a/pkg/paperless/query.go +++ b/pkg/paperless/query.go @@ -16,14 +16,64 @@ import ( type QueryParams struct { TruncateContent bool `param:"truncate_content"` Ordering string `param:"ordering"` - PageSize int `param:"page_size"` + PageSize int64 `param:"page_size"` + page int64 `param:"page"` } -type QueryResults struct { +type QueryResult struct { Results []Document `json:"results,omitempty"` + Next string `json:"next,omitempty"` +} + +// NextPage returns the next page number for pagination. +// It returns 1 if QueryResult.Next is empty (first page), or 0 if there's an error parsing QueryResult.Next. +func (r QueryResult) NextPage() int64 { + if r.Next == "" { + return 1 // first page + } + values, err := url.ParseQuery(r.Next) + if err != nil { + return 0 + } + raw := values.Get("page") + page, err := strconv.ParseInt(raw, 10, 64) + if err != nil { + return 0 + } + return page } func (clt *Client) QueryDocuments(ctx context.Context, params QueryParams) ([]Document, error) { + documents := make([]Document, 0) + params.page = 1 + for i := int64(0); i < params.page; i++ { + result, err := clt.queryDocumentsInPage(ctx, params) + if err != nil { + return nil, err + } + params.page = result.NextPage() + documents = append(documents, result.Results...) + } + return documents, nil +} + +func (clt *Client) makeQueryRequest(ctx context.Context, params QueryParams) (*http.Request, error) { + log := logr.FromContextOrDiscard(ctx) + + values := paramsToValues(params) + + path := clt.URL + "/api/documents/?" + values.Encode() + log.V(1).Info("Preparing request", "path", path) + req, err := http.NewRequestWithContext(ctx, "GET", path, nil) + if err != nil { + return nil, fmt.Errorf("cannot prepare request: %w", err) + } + clt.setAuth(req) + req.Header.Set("Content-Type", "application/json") + return req, nil +} + +func (clt *Client) queryDocumentsInPage(ctx context.Context, params QueryParams) (*QueryResult, error) { req, err := clt.makeQueryRequest(ctx, params) if err != nil { return nil, err @@ -46,29 +96,13 @@ func (clt *Client) QueryDocuments(ctx context.Context, params QueryParams) ([]Do return nil, fmt.Errorf("request failed: %s: %s", resp.Status, string(b)) } - result := QueryResults{} + result := QueryResult{} parseErr := json.Unmarshal(b, &result) if parseErr != nil { return nil, fmt.Errorf("cannot parse JSON: %w", parseErr) } log.V(1).Info("Parsed response", "result", result) - return result.Results, nil -} - -func (clt *Client) makeQueryRequest(ctx context.Context, params QueryParams) (*http.Request, error) { - log := logr.FromContextOrDiscard(ctx) - - values := paramsToValues(params) - - path := clt.URL + "/api/documents/?" + values.Encode() - log.V(1).Info("Preparing request", "path", path) - req, err := http.NewRequestWithContext(ctx, "GET", path, nil) - if err != nil { - return nil, fmt.Errorf("cannot prepare request: %w", err) - } - clt.setAuth(req) - req.Header.Set("Content-Type", "application/json") - return req, nil + return &result, nil } func paramsToValues(params QueryParams) url.Values { @@ -85,7 +119,7 @@ func paramsToValues(params QueryParams) url.Values { paramValue = strconv.FormatBool(field.Bool()) case reflect.String: paramValue = field.String() - case reflect.Int: + case reflect.Int64: paramValue = strconv.FormatInt(field.Int(), 10) default: panic(fmt.Errorf("not implemented type: %s", field.Kind()))