From c0760028f7f2234ce3bbb24412fbc771b61c22b2 Mon Sep 17 00:00:00 2001
From: Chris <github.account@chrigel.net>
Date: Sat, 28 Jan 2023 10:12:42 +0100
Subject: [PATCH 1/9] Add bulk download feature to client

---
 pkg/paperless/download.go | 100 ++++++++++++++++++++++++++++++++++++++
 1 file changed, 100 insertions(+)
 create mode 100644 pkg/paperless/download.go

diff --git a/pkg/paperless/download.go b/pkg/paperless/download.go
new file mode 100644
index 0000000..29b53ef
--- /dev/null
+++ b/pkg/paperless/download.go
@@ -0,0 +1,100 @@
+package paperless
+
+import (
+	"bytes"
+	"context"
+	"encoding/json"
+	"fmt"
+	"io"
+	"net/http"
+	"os"
+	"strings"
+
+	"github.com/go-logr/logr"
+)
+
+type BulkDownloadContent string
+
+type BulkDownloadParams struct {
+	DocumentIDs      []int
+	FollowFormatting bool
+	Content          BulkDownloadContent
+}
+
+const (
+	BulkDownloadBoth     BulkDownloadContent = "both"
+	BulkDownloadArchives BulkDownloadContent = "archive"
+	BulkDownloadOriginal BulkDownloadContent = "originals"
+)
+
+// String implements fmt.Stringer.
+func (c BulkDownloadContent) String() string {
+	return string(c)
+}
+
+// BulkDownload downloads the documents identified by BulkDownloadParams.DocumentIDs and saves to the given targetPath.
+// If BulkDownloadParams.DocumentIDs is empty, all documents will be downloaded.
+// If targetPath is empty, it will use the suggested file name from Paperless in the current working dir.
+func (clt *Client) BulkDownload(ctx context.Context, targetPath string, params BulkDownloadParams) error {
+	req, err := clt.makeBulkDownloadRequest(ctx, params)
+	if err != nil {
+		return err
+	}
+
+	log := logr.FromContextOrDiscard(ctx)
+	log.V(1).Info("Awaiting response")
+	resp, err := clt.HttpClient.Do(req)
+	if err != nil {
+		return fmt.Errorf("request failed: %w", err)
+	}
+	defer resp.Body.Close()
+
+	if resp.StatusCode != http.StatusOK {
+		b, _ := io.ReadAll(resp.Body)
+		return fmt.Errorf("request failed: %s: %s", resp.Status, string(b))
+	}
+
+	out, err := os.Create(getTargetPathOrFromHeader(targetPath, resp.Header))
+	defer out.Close()
+
+	log.V(1).Info("Writing download content to file", "file", out.Name())
+	_, err = io.Copy(out, resp.Body)
+	if err != nil {
+		return fmt.Errorf("cannot read response body: %w", err)
+	}
+	return nil
+}
+
+func (clt *Client) makeBulkDownloadRequest(ctx context.Context, params BulkDownloadParams) (*http.Request, error) {
+	log := logr.FromContextOrDiscard(ctx)
+
+	js := map[string]any{
+		"content":           params.Content,
+		"follow_formatting": params.FollowFormatting,
+		"documents":         params.DocumentIDs,
+	}
+	marshal, err := json.Marshal(js)
+	if err != nil {
+		return nil, fmt.Errorf("cannot serialize to JSON: %w", err)
+	}
+	body := bytes.NewReader(marshal)
+
+	path := clt.URL + "/api/documents/bulk_download/"
+	log.V(1).Info("Preparing request", "path", path)
+	req, err := http.NewRequestWithContext(ctx, "POST", path, body)
+	if err != nil {
+		return nil, fmt.Errorf("cannot prepare request: %w", err)
+	}
+	clt.setAuth(req)
+	req.Header.Set("Content-Type", "application/json")
+	return req, nil
+}
+
+func getTargetPathOrFromHeader(v string, header http.Header) string {
+	if v != "" {
+		return v
+	}
+	raw := header.Get("content-disposition")
+	fileName := strings.TrimSuffix(strings.TrimPrefix(raw, `attachment; filename="`), `"`)
+	return fileName
+}

From 3d4d5d9b3cc1b7a09519b6a4cd7b19257458ae94 Mon Sep 17 00:00:00 2001
From: Chris <github.account@chrigel.net>
Date: Sat, 28 Jan 2023 11:18:00 +0100
Subject: [PATCH 2/9] Add func to Query documents

---
 pkg/paperless/document.go | 14 ++++++
 pkg/paperless/query.go    | 93 +++++++++++++++++++++++++++++++++++++++
 2 files changed, 107 insertions(+)
 create mode 100644 pkg/paperless/document.go
 create mode 100644 pkg/paperless/query.go

diff --git a/pkg/paperless/document.go b/pkg/paperless/document.go
new file mode 100644
index 0000000..9a2075d
--- /dev/null
+++ b/pkg/paperless/document.go
@@ -0,0 +1,14 @@
+package paperless
+
+type Document struct {
+	// ID of the document, read-only.
+	ID int `json:"id"`
+}
+
+func MapToDocumentIDs(docs []Document) []int {
+	ids := make([]int, len(docs))
+	for i := 0; i < len(docs); i++ {
+		ids[i] = docs[i].ID
+	}
+	return ids
+}
diff --git a/pkg/paperless/query.go b/pkg/paperless/query.go
new file mode 100644
index 0000000..52f794e
--- /dev/null
+++ b/pkg/paperless/query.go
@@ -0,0 +1,93 @@
+package paperless
+
+import (
+	"context"
+	"encoding/json"
+	"fmt"
+	"io"
+	"net/http"
+	"net/url"
+	"reflect"
+	"strconv"
+
+	"github.com/go-logr/logr"
+)
+
+type QueryParams struct {
+	TruncateContent bool `param:"truncate_content"`
+}
+
+type QueryResults struct {
+	Results []Document `json:"results,omitempty"`
+}
+
+func (clt *Client) QueryDocuments(ctx context.Context, params QueryParams) ([]Document, error) {
+	req, err := clt.makeQueryRequest(ctx, params)
+	if err != nil {
+		return nil, err
+	}
+
+	log := logr.FromContextOrDiscard(ctx)
+	log.V(1).Info("Awaiting response")
+	resp, err := clt.HttpClient.Do(req)
+	if err != nil {
+		return nil, fmt.Errorf("request failed: %w", err)
+	}
+	defer resp.Body.Close()
+
+	b, err := io.ReadAll(resp.Body)
+	if err != nil {
+		return nil, fmt.Errorf("cannot read body: %w", err)
+	}
+	log.V(2).Info("Read response", "body", string(b))
+	if resp.StatusCode != http.StatusOK {
+		return nil, fmt.Errorf("request failed: %s: %s", resp.Status, string(b))
+	}
+
+	result := QueryResults{}
+	parseErr := json.Unmarshal(b, &result)
+	if parseErr != nil {
+		return nil, fmt.Errorf("cannot parse JSON: %w", parseErr)
+	}
+	log.V(1).Info("Parsed response", "result", result)
+	return result.Results, nil
+}
+
+func (clt *Client) makeQueryRequest(ctx context.Context, params QueryParams) (*http.Request, error) {
+	log := logr.FromContextOrDiscard(ctx)
+
+	values := paramsToValues(params)
+	values.Set("ordering", "id")
+
+	path := clt.URL + "/api/documents/?" + values.Encode()
+	log.V(1).Info("Preparing request", "path", path)
+	req, err := http.NewRequestWithContext(ctx, "GET", path, nil)
+	if err != nil {
+		return nil, fmt.Errorf("cannot prepare request: %w", err)
+	}
+	clt.setAuth(req)
+	req.Header.Set("Content-Type", "application/json")
+	return req, nil
+}
+
+func paramsToValues(params QueryParams) url.Values {
+	values := url.Values{}
+	typ := reflect.TypeOf(params)
+	value := reflect.ValueOf(params)
+	for i := 0; i < typ.NumField(); i++ {
+		structField := typ.Field(i)
+		tag := structField.Tag.Get("param")
+		field := value.Field(i)
+		paramValue := ""
+		switch field.Kind() {
+		case reflect.Bool:
+			paramValue = strconv.FormatBool(field.Bool())
+		case reflect.String:
+			paramValue = field.String()
+		default:
+			panic(fmt.Errorf("not implemented type: %s", field.Kind()))
+		}
+		values.Set(tag, paramValue)
+	}
+	return values
+}

From 8ad8b03cfe60323d9132ca5a9aca8cab61380d73 Mon Sep 17 00:00:00 2001
From: Chris <github.account@chrigel.net>
Date: Sat, 28 Jan 2023 11:13:44 +0100
Subject: [PATCH 3/9] Add bulk-download command

---
 .gitignore                |  2 ++
 README.md                 |  1 +
 bulk_download_command.go  | 59 +++++++++++++++++++++++++++++++++++++++
 flags.go                  | 32 +++++++++++++++++++++
 main.go                   |  1 +
 pkg/paperless/download.go |  3 +-
 test/docker-compose.yml   |  1 +
 7 files changed, 97 insertions(+), 2 deletions(-)
 create mode 100644 bulk_download_command.go

diff --git a/.gitignore b/.gitignore
index a9de979..8c36e56 100644
--- a/.gitignore
+++ b/.gitignore
@@ -8,3 +8,5 @@
 
 # work
 /.work/
+
+/documents.zip
diff --git a/README.md b/README.md
index acaeb1c..09ea7ff 100644
--- a/README.md
+++ b/README.md
@@ -6,6 +6,7 @@ CLI tool to interact with paperless-ngx remote API
 
 - `upload`: Uploads local document(s) to Paperless instance
 - `consume`: Consumes a local directory and uploads each file to Paperless instance. The files will be deleted once uploaded.
+- `bulk-download`: 
 
 ## Installation
 
diff --git a/bulk_download_command.go b/bulk_download_command.go
new file mode 100644
index 0000000..0fff32b
--- /dev/null
+++ b/bulk_download_command.go
@@ -0,0 +1,59 @@
+package main
+
+import (
+	"github.com/ccremer/clustercode/pkg/paperless"
+	"github.com/go-logr/logr"
+	"github.com/urfave/cli/v2"
+)
+
+type BulkDownloadCommand struct {
+	cli.Command
+
+	PaperlessURL   string
+	PaperlessToken string
+	PaperlessUser  string
+
+	TargetPath string
+	Content    string
+}
+
+func newBulkDownloadCommand() *BulkDownloadCommand {
+	c := &BulkDownloadCommand{}
+	c.Command = cli.Command{
+		Name:   "bulk-download",
+		Usage:  "Downloads multiple documents at once",
+		Action: actions(LogMetadata, c.Action),
+		Flags: []cli.Flag{
+			newURLFlag(&c.PaperlessURL),
+			newUsernameFlag(&c.PaperlessUser),
+			newTokenFlag(&c.PaperlessToken),
+			newTargetPathFlag(&c.TargetPath),
+			newDownloadContentFlag(&c.Content),
+		},
+	}
+	return c
+}
+
+func (c *BulkDownloadCommand) Action(ctx *cli.Context) error {
+	log := logr.FromContextOrDiscard(ctx.Context)
+
+	log.V(1)
+	clt := paperless.NewClient(c.PaperlessURL, c.PaperlessUser, c.PaperlessToken)
+	documents, queryErr := clt.QueryDocuments(ctx.Context, paperless.QueryParams{
+		TruncateContent: true,
+	})
+	if queryErr != nil {
+		return queryErr
+	}
+	documentIDs := paperless.MapToDocumentIDs(documents)
+	downloadErr := clt.BulkDownload(ctx.Context, c.TargetPath, paperless.BulkDownloadParams{
+		FollowFormatting: true,
+		Content:          paperless.BulkDownloadContent(c.Content),
+		DocumentIDs:      documentIDs,
+	})
+	if downloadErr != nil {
+		return downloadErr
+	}
+	log.Info("Downloaded zip archive")
+	return downloadErr
+}
diff --git a/flags.go b/flags.go
index 344b485..a73fbb7 100644
--- a/flags.go
+++ b/flags.go
@@ -2,8 +2,10 @@ package main
 
 import (
 	"fmt"
+	"strings"
 	"time"
 
+	"github.com/ccremer/clustercode/pkg/paperless"
 	"github.com/urfave/cli/v2"
 )
 
@@ -114,6 +116,36 @@ func newConsumeDelayFlag(dest *time.Duration) *cli.DurationFlag {
 	}
 }
 
+func newTargetPathFlag(dest *string) *cli.StringFlag {
+	return &cli.StringFlag{
+		Name: "target-path", EnvVars: []string{"DOWNLOAD_TARGET_PATH"},
+		Usage:       "target file path where documents are downloaded.",
+		DefaultText: "default file name in current working directory",
+		Destination: dest,
+	}
+}
+
+func newDownloadContentFlag(dest *string) *cli.StringFlag {
+	return &cli.StringFlag{
+		Name: "content", EnvVars: []string{"DOWNLOAD_CONTENT"},
+		Usage:       "selection of document variant.",
+		Value:       paperless.BulkDownloadArchives.String(),
+		Destination: dest,
+		Action: func(ctx *cli.Context, s string) error {
+			enum := []string{
+				paperless.BulkDownloadArchives.String(),
+				paperless.BulkDownloadOriginal.String(),
+				paperless.BulkDownloadBoth.String()}
+			for _, key := range enum {
+				if s == key {
+					return nil
+				}
+			}
+			return fmt.Errorf("parameter %q must be one of [%s]", "content", strings.Join(enum, ", "))
+		},
+	}
+}
+
 func checkEmptyString(flagName string) func(*cli.Context, string) error {
 	return func(ctx *cli.Context, s string) error {
 		if s == "" {
diff --git a/main.go b/main.go
index 9b2bdc7..772aa6e 100644
--- a/main.go
+++ b/main.go
@@ -43,6 +43,7 @@ func NewApp() *cli.App {
 		},
 		Commands: []*cli.Command{
 			&newUploadCommand().Command,
+			&newBulkDownloadCommand().Command,
 			&newConsumeCommand().Command,
 		},
 	}
diff --git a/pkg/paperless/download.go b/pkg/paperless/download.go
index 29b53ef..d852eee 100644
--- a/pkg/paperless/download.go
+++ b/pkg/paperless/download.go
@@ -33,7 +33,6 @@ func (c BulkDownloadContent) String() string {
 }
 
 // BulkDownload downloads the documents identified by BulkDownloadParams.DocumentIDs and saves to the given targetPath.
-// If BulkDownloadParams.DocumentIDs is empty, all documents will be downloaded.
 // If targetPath is empty, it will use the suggested file name from Paperless in the current working dir.
 func (clt *Client) BulkDownload(ctx context.Context, targetPath string, params BulkDownloadParams) error {
 	req, err := clt.makeBulkDownloadRequest(ctx, params)
@@ -80,7 +79,7 @@ func (clt *Client) makeBulkDownloadRequest(ctx context.Context, params BulkDownl
 	body := bytes.NewReader(marshal)
 
 	path := clt.URL + "/api/documents/bulk_download/"
-	log.V(1).Info("Preparing request", "path", path)
+	log.V(1).Info("Preparing request", "path", path, "document_ids", params.DocumentIDs)
 	req, err := http.NewRequestWithContext(ctx, "POST", path, body)
 	if err != nil {
 		return nil, fmt.Errorf("cannot prepare request: %w", err)
diff --git a/test/docker-compose.yml b/test/docker-compose.yml
index 60219fd..c9b1224 100644
--- a/test/docker-compose.yml
+++ b/test/docker-compose.yml
@@ -42,6 +42,7 @@ services:
       PAPERLESS_REDIS: redis://broker:6379
       PAPERLESS_ADMIN_USER: admin
       PAPERLESS_ADMIN_PASSWORD: admin
+      PAPERLESS_FILENAME_FORMAT: "{created_year}/{correspondent}/{title}"
 
 volumes:
   redisdata:

From 63a503cbd4a4e359fc18827f9ad12d4c94a6dace Mon Sep 17 00:00:00 2001
From: Chris <github.account@chrigel.net>
Date: Sun, 29 Jan 2023 09:38:15 +0100
Subject: [PATCH 4/9] Add pkg to unzip a file

---
 go.mod                         |   4 ++
 go.sum                         |   3 ++
 pkg/archive/testdata/unzip.zip | Bin 0 -> 539 bytes
 pkg/archive/unzip.go           |  70 +++++++++++++++++++++++++++++++++
 pkg/archive/unzip_test.go      |  28 +++++++++++++
 5 files changed, 105 insertions(+)
 create mode 100644 pkg/archive/testdata/unzip.zip
 create mode 100644 pkg/archive/unzip.go
 create mode 100644 pkg/archive/unzip_test.go

diff --git a/go.mod b/go.mod
index 4e2b2fc..6c6d3e5 100644
--- a/go.mod
+++ b/go.mod
@@ -7,6 +7,7 @@ require (
 	github.com/fsnotify/fsnotify v1.6.0
 	github.com/go-logr/logr v1.2.3
 	github.com/pterm/pterm v0.12.51
+	github.com/stretchr/testify v1.8.1
 	github.com/urfave/cli/v2 v2.23.7
 )
 
@@ -15,9 +16,11 @@ require (
 	atomicgo.dev/keyboard v0.2.8 // indirect
 	github.com/containerd/console v1.0.3 // indirect
 	github.com/cpuguy83/go-md2man/v2 v2.0.2 // indirect
+	github.com/davecgh/go-spew v1.1.1 // indirect
 	github.com/gookit/color v1.5.2 // indirect
 	github.com/lithammer/fuzzysearch v1.1.5 // indirect
 	github.com/mattn/go-runewidth v0.0.14 // indirect
+	github.com/pmezard/go-difflib v1.0.0 // indirect
 	github.com/rivo/uniseg v0.2.0 // indirect
 	github.com/russross/blackfriday/v2 v2.1.0 // indirect
 	github.com/xo/terminfo v0.0.0-20210125001918-ca9a967f8778 // indirect
@@ -25,4 +28,5 @@ require (
 	golang.org/x/sys v0.0.0-20220908164124-27713097b956 // indirect
 	golang.org/x/term v0.0.0-20210927222741-03fcf44c2211 // indirect
 	golang.org/x/text v0.4.0 // indirect
+	gopkg.in/yaml.v3 v3.0.1 // indirect
 )
diff --git a/go.sum b/go.sum
index c904c01..08d2fb6 100644
--- a/go.sum
+++ b/go.sum
@@ -38,8 +38,10 @@ github.com/klauspost/cpuid/v2 v2.0.12/go.mod h1:g2LTdtYhdyuGPqyWyv7qRAmj1WBqxuOb
 github.com/klauspost/cpuid/v2 v2.1.0/go.mod h1:RVVoqg1df56z8g3pUjL/3lE5UfnlrJX8tyFgg4nqhuY=
 github.com/klauspost/cpuid/v2 v2.2.0 h1:4ZexSFt8agMNzNisrsilL6RClWDC5YJnLHNIfTy4iuc=
 github.com/klauspost/cpuid/v2 v2.2.0/go.mod h1:RVVoqg1df56z8g3pUjL/3lE5UfnlrJX8tyFgg4nqhuY=
+github.com/kr/pretty v0.1.0 h1:L/CwN0zerZDmRFUapSPitk6f+Q3+0za1rQkzVuMiMFI=
 github.com/kr/pretty v0.1.0/go.mod h1:dAy3ld7l9f0ibDNOQOHHMYYIIbhfbHSm3C4ZsoJORNo=
 github.com/kr/pty v1.1.1/go.mod h1:pFQYn66WHrOpPYNljwOMqo10TkYh1fy3cYio2l3bCsQ=
+github.com/kr/text v0.1.0 h1:45sCR5RtlFHMR4UwH9sdQ5TC8v0qDQCHnXt+kaKSTVE=
 github.com/kr/text v0.1.0/go.mod h1:4Jbv+DJW3UT/LiOwJeYQe1efqtUx/iVham/4vfdArNI=
 github.com/lithammer/fuzzysearch v1.1.5 h1:Ag7aKU08wp0R9QCfF4GoGST9HbmAIeLP7xwMrOBEp1c=
 github.com/lithammer/fuzzysearch v1.1.5/go.mod h1:1R1LRNk7yKid1BaQkmuLQaHruxcC4HmAH30Dh61Ih1Q=
@@ -116,6 +118,7 @@ golang.org/x/tools v0.0.0-20191119224855-298f0cb1881e/go.mod h1:b+2E5dAYhXwXZwtn
 golang.org/x/tools v0.1.12/go.mod h1:hNGJHUnrk76NpqgfD5Aqm5Crs+Hm0VOH/i9J2+nxYbc=
 golang.org/x/xerrors v0.0.0-20190717185122-a985d3407aa7/go.mod h1:I/5z698sn9Ka8TeJc9MKroUUfqBBauWjQqLJ2OPfmY0=
 gopkg.in/check.v1 v0.0.0-20161208181325-20d25e280405/go.mod h1:Co6ibVJAznAaIkqp8huTwlJQCZ016jof/cbN4VW5Yz0=
+gopkg.in/check.v1 v1.0.0-20190902080502-41f04d3bba15 h1:YR8cESwS4TdDjEe65xsg0ogRM/Nc3DYOhEAlW+xobZo=
 gopkg.in/check.v1 v1.0.0-20190902080502-41f04d3bba15/go.mod h1:Co6ibVJAznAaIkqp8huTwlJQCZ016jof/cbN4VW5Yz0=
 gopkg.in/yaml.v2 v2.2.2/go.mod h1:hI93XBmqTisBFMUTm0b8Fm+jr3Dg1NNxqwp+5A1VGuI=
 gopkg.in/yaml.v2 v2.2.4/go.mod h1:hI93XBmqTisBFMUTm0b8Fm+jr3Dg1NNxqwp+5A1VGuI=
diff --git a/pkg/archive/testdata/unzip.zip b/pkg/archive/testdata/unzip.zip
new file mode 100644
index 0000000000000000000000000000000000000000..c3556ce5e8ffd35465a6b3341efccecf82651220
GIT binary patch
literal 539
zcmWIWW@h1H0D+m_wqalfl;CHOVQ|STQt-@Ea4bsB$Sh0M4-MgDV0O~GmJGtB72FJr
zEH9WD7{EjTib3;DKDF=y4FX|lOoM_;lN5kv>!oGpq{0mi#5A-xKQ~n&IX|x?HLrvV
zVM7zpshSJ#Zw5IWgn5BBl;juWq?V=TAj}j7nx}+mW=MX4f)7X~&_G5eIc8kGmjD_J
z0s;(w9YHikSg=CE0wWwy4MhzJh@p%?<F+(<Vj7AXE`%+>3<zWkRs$_S3l^Y}pkTpp
UGb<ZVEdw(UasnOq2gG3j0RC@q^Z)<=

literal 0
HcmV?d00001

diff --git a/pkg/archive/unzip.go b/pkg/archive/unzip.go
new file mode 100644
index 0000000..760da5c
--- /dev/null
+++ b/pkg/archive/unzip.go
@@ -0,0 +1,70 @@
+package archive
+
+import (
+	"archive/zip"
+	"context"
+	"fmt"
+	"io"
+	"os"
+	"path/filepath"
+	"strings"
+
+	"github.com/go-logr/logr"
+)
+
+// Unzip reads and copies every file in the archive to the destination dir.
+func Unzip(ctx context.Context, source, dest string) error {
+	log := logr.FromContextOrDiscard(ctx)
+	log.V(1).Info("Unzipping file", "source", source, "dest", dest)
+	archive, openErr := zip.OpenReader(source)
+	if openErr != nil {
+		return fmt.Errorf("cannot open source file: %w", openErr)
+	}
+	defer archive.Close()
+
+	for _, f := range archive.File {
+		destFilePath := filepath.Join(dest, f.Name)
+
+		if !strings.HasPrefix(destFilePath, filepath.Clean(dest)+string(os.PathSeparator)) {
+			return fmt.Errorf("invalid file path: %s", destFilePath)
+		}
+		if f.FileInfo().IsDir() {
+			log.V(2).Info("Creating directory", "dir", f.FileInfo().Name())
+			if mkdirErr := os.MkdirAll(destFilePath, os.ModePerm); mkdirErr != nil {
+				return fmt.Errorf("cannot create directory: %w", mkdirErr)
+			}
+			continue
+		}
+		log.V(2).Info("Extracting file", "source", f.Name, "dest", destFilePath)
+
+		err := unzipFile(f, destFilePath)
+		if err != nil {
+			return err
+		}
+	}
+	return nil
+}
+
+func unzipFile(f *zip.File, destFilePath string) error {
+	// ensure directory exists where file should be written.
+	if mkdirErr := os.MkdirAll(filepath.Dir(destFilePath), os.ModePerm); mkdirErr != nil {
+		return fmt.Errorf("cannot create directory: %w", mkdirErr)
+	}
+
+	dstFile, dstFileErr := os.OpenFile(destFilePath, os.O_WRONLY|os.O_CREATE|os.O_TRUNC, f.Mode())
+	if dstFileErr != nil {
+		return fmt.Errorf("cannot open destination file: %w", dstFileErr)
+	}
+	defer dstFile.Close()
+
+	fileInArchive, srcFileErr := f.Open()
+	if srcFileErr != nil {
+		return fmt.Errorf("cannot open source file: %w", srcFileErr)
+	}
+	fileInArchive.Close()
+
+	if _, copyErr := io.Copy(dstFile, fileInArchive); copyErr != nil {
+		return fmt.Errorf("cannot copy %q to %q: %w", f.Name, dstFile.Name(), copyErr)
+	}
+	return nil
+}
diff --git a/pkg/archive/unzip_test.go b/pkg/archive/unzip_test.go
new file mode 100644
index 0000000..22f32aa
--- /dev/null
+++ b/pkg/archive/unzip_test.go
@@ -0,0 +1,28 @@
+package archive
+
+import (
+	"context"
+	"os"
+	"path/filepath"
+	"testing"
+
+	"github.com/stretchr/testify/assert"
+	"github.com/stretchr/testify/require"
+)
+
+func TestUnzip(t *testing.T) {
+	testFilePath := "testdata/unzip.zip"
+	testDir := "testdata/run"
+
+	// cleanup previous test files in case of failure
+	require.NoError(t, os.RemoveAll(testDir))
+
+	err := Unzip(context.TODO(), testFilePath, testDir)
+	assert.NoError(t, err, "unzip failed with error")
+
+	assert.FileExists(t, filepath.Join(testDir, "toplevel.file"))
+	assert.FileExists(t, filepath.Join(testDir, "Dir In Archive", "Sub Dir.file"))
+
+	// cleanup
+	require.NoError(t, os.RemoveAll(testDir))
+}

From aa0295c2f929d3fa7c1b01985777baac900d4d2d Mon Sep 17 00:00:00 2001
From: Chris <github.account@chrigel.net>
Date: Sun, 29 Jan 2023 11:04:06 +0100
Subject: [PATCH 5/9] Add flag to unzip downloaded file

---
 .gitignore                |  1 +
 bulk_download_command.go  | 55 +++++++++++++++++++++++++++++++++++----
 flags.go                  | 10 ++++++-
 pkg/paperless/download.go | 19 +++-----------
 4 files changed, 63 insertions(+), 22 deletions(-)

diff --git a/.gitignore b/.gitignore
index 8c36e56..a58ccd1 100644
--- a/.gitignore
+++ b/.gitignore
@@ -10,3 +10,4 @@
 /.work/
 
 /documents.zip
+/documents
diff --git a/bulk_download_command.go b/bulk_download_command.go
index 0fff32b..2b82230 100644
--- a/bulk_download_command.go
+++ b/bulk_download_command.go
@@ -1,6 +1,10 @@
 package main
 
 import (
+	"fmt"
+	"os"
+
+	"github.com/ccremer/clustercode/pkg/archive"
 	"github.com/ccremer/clustercode/pkg/paperless"
 	"github.com/go-logr/logr"
 	"github.com/urfave/cli/v2"
@@ -13,8 +17,9 @@ type BulkDownloadCommand struct {
 	PaperlessToken string
 	PaperlessUser  string
 
-	TargetPath string
-	Content    string
+	TargetPath   string
+	Content      string
+	UnzipEnabled bool
 }
 
 func newBulkDownloadCommand() *BulkDownloadCommand {
@@ -29,6 +34,7 @@ func newBulkDownloadCommand() *BulkDownloadCommand {
 			newTokenFlag(&c.PaperlessToken),
 			newTargetPathFlag(&c.TargetPath),
 			newDownloadContentFlag(&c.Content),
+			newUnzipFlag(&c.UnzipEnabled),
 		},
 	}
 	return c
@@ -39,6 +45,8 @@ func (c *BulkDownloadCommand) Action(ctx *cli.Context) error {
 
 	log.V(1)
 	clt := paperless.NewClient(c.PaperlessURL, c.PaperlessUser, c.PaperlessToken)
+
+	log.Info("Getting list of documents")
 	documents, queryErr := clt.QueryDocuments(ctx.Context, paperless.QueryParams{
 		TruncateContent: true,
 	})
@@ -46,7 +54,15 @@ func (c *BulkDownloadCommand) Action(ctx *cli.Context) error {
 		return queryErr
 	}
 	documentIDs := paperless.MapToDocumentIDs(documents)
-	downloadErr := clt.BulkDownload(ctx.Context, c.TargetPath, paperless.BulkDownloadParams{
+
+	tmpFile, createTempErr := os.CreateTemp(os.TempDir(), "paperless-bulk-download-")
+	if createTempErr != nil {
+		return fmt.Errorf("cannot open temporary file: %w", createTempErr)
+	}
+	defer os.Remove(tmpFile.Name()) // cleanup if not renamed
+
+	log.Info("Downloading documents")
+	downloadErr := clt.BulkDownload(ctx.Context, tmpFile, paperless.BulkDownloadParams{
 		FollowFormatting: true,
 		Content:          paperless.BulkDownloadContent(c.Content),
 		DocumentIDs:      documentIDs,
@@ -54,6 +70,35 @@ func (c *BulkDownloadCommand) Action(ctx *cli.Context) error {
 	if downloadErr != nil {
 		return downloadErr
 	}
-	log.Info("Downloaded zip archive")
-	return downloadErr
+
+	if c.UnzipEnabled {
+		return c.unzip(ctx, tmpFile)
+	}
+	return c.move(ctx, tmpFile)
+}
+
+func (c *BulkDownloadCommand) unzip(ctx *cli.Context, tmpFile *os.File) error {
+	log := logr.FromContextOrDiscard(ctx.Context)
+	downloadFilePath := c.TargetPath
+	if c.TargetPath == "" {
+		downloadFilePath = "documents"
+	}
+	if unzipErr := archive.Unzip(ctx.Context, tmpFile.Name(), downloadFilePath); unzipErr != nil {
+		return fmt.Errorf("cannot unzip file %q to %q: %w", tmpFile.Name(), downloadFilePath, unzipErr)
+	}
+	log.Info("Unzipped archive to dir", "dir", downloadFilePath)
+	return nil
+}
+
+func (c *BulkDownloadCommand) move(ctx *cli.Context, tmpFile *os.File) error {
+	log := logr.FromContextOrDiscard(ctx.Context)
+	downloadFilePath := c.TargetPath
+	if c.TargetPath == "" {
+		downloadFilePath = "documents.zip"
+	}
+	if renameErr := os.Rename(tmpFile.Name(), downloadFilePath); renameErr != nil {
+		return fmt.Errorf("cannot move temp file: %w", renameErr)
+	}
+	log.Info("Downloaded zip archive", "file", downloadFilePath)
+	return nil
 }
diff --git a/flags.go b/flags.go
index a73fbb7..4072cf6 100644
--- a/flags.go
+++ b/flags.go
@@ -120,7 +120,7 @@ func newTargetPathFlag(dest *string) *cli.StringFlag {
 	return &cli.StringFlag{
 		Name: "target-path", EnvVars: []string{"DOWNLOAD_TARGET_PATH"},
 		Usage:       "target file path where documents are downloaded.",
-		DefaultText: "default file name in current working directory",
+		DefaultText: "documents.zip",
 		Destination: dest,
 	}
 }
@@ -146,6 +146,14 @@ func newDownloadContentFlag(dest *string) *cli.StringFlag {
 	}
 }
 
+func newUnzipFlag(dest *bool) *cli.BoolFlag {
+	return &cli.BoolFlag{
+		Name: "unzip", EnvVars: []string{"DOWNLOAD_UNZIP"},
+		Usage:       "unzip the downloaded file.",
+		Destination: dest,
+	}
+}
+
 func checkEmptyString(flagName string) func(*cli.Context, string) error {
 	return func(ctx *cli.Context, s string) error {
 		if s == "" {
diff --git a/pkg/paperless/download.go b/pkg/paperless/download.go
index d852eee..e986951 100644
--- a/pkg/paperless/download.go
+++ b/pkg/paperless/download.go
@@ -8,7 +8,6 @@ import (
 	"io"
 	"net/http"
 	"os"
-	"strings"
 
 	"github.com/go-logr/logr"
 )
@@ -34,7 +33,7 @@ func (c BulkDownloadContent) String() string {
 
 // BulkDownload downloads the documents identified by BulkDownloadParams.DocumentIDs and saves to the given targetPath.
 // If targetPath is empty, it will use the suggested file name from Paperless in the current working dir.
-func (clt *Client) BulkDownload(ctx context.Context, targetPath string, params BulkDownloadParams) error {
+func (clt *Client) BulkDownload(ctx context.Context, targetFile *os.File, params BulkDownloadParams) error {
 	req, err := clt.makeBulkDownloadRequest(ctx, params)
 	if err != nil {
 		return err
@@ -53,11 +52,8 @@ func (clt *Client) BulkDownload(ctx context.Context, targetPath string, params B
 		return fmt.Errorf("request failed: %s: %s", resp.Status, string(b))
 	}
 
-	out, err := os.Create(getTargetPathOrFromHeader(targetPath, resp.Header))
-	defer out.Close()
-
-	log.V(1).Info("Writing download content to file", "file", out.Name())
-	_, err = io.Copy(out, resp.Body)
+	log.V(1).Info("Writing download content to file", "file", targetFile.Name())
+	_, err = io.Copy(targetFile, resp.Body)
 	if err != nil {
 		return fmt.Errorf("cannot read response body: %w", err)
 	}
@@ -88,12 +84,3 @@ func (clt *Client) makeBulkDownloadRequest(ctx context.Context, params BulkDownl
 	req.Header.Set("Content-Type", "application/json")
 	return req, nil
 }
-
-func getTargetPathOrFromHeader(v string, header http.Header) string {
-	if v != "" {
-		return v
-	}
-	raw := header.Get("content-disposition")
-	fileName := strings.TrimSuffix(strings.TrimPrefix(raw, `attachment; filename="`), `"`)
-	return fileName
-}

From 6e349a545d2c522f609d704a282b393c0f6847a1 Mon Sep 17 00:00:00 2001
From: Chris <github.account@chrigel.net>
Date: Sun, 29 Jan 2023 10:56:15 +0100
Subject: [PATCH 6/9] Add overwrite flag and abort if target file exists

---
 bulk_download_command.go | 44 +++++++++++++++++++++++++++++-----------
 flags.go                 |  8 ++++++++
 2 files changed, 40 insertions(+), 12 deletions(-)

diff --git a/bulk_download_command.go b/bulk_download_command.go
index 2b82230..9c79c62 100644
--- a/bulk_download_command.go
+++ b/bulk_download_command.go
@@ -17,9 +17,10 @@ type BulkDownloadCommand struct {
 	PaperlessToken string
 	PaperlessUser  string
 
-	TargetPath   string
-	Content      string
-	UnzipEnabled bool
+	TargetPath              string
+	Content                 string
+	UnzipEnabled            bool
+	OverwriteExistingTarget bool
 }
 
 func newBulkDownloadCommand() *BulkDownloadCommand {
@@ -35,6 +36,7 @@ func newBulkDownloadCommand() *BulkDownloadCommand {
 			newTargetPathFlag(&c.TargetPath),
 			newDownloadContentFlag(&c.Content),
 			newUnzipFlag(&c.UnzipEnabled),
+			newOverwriteFlag(&c.OverwriteExistingTarget),
 		},
 	}
 	return c
@@ -43,7 +45,9 @@ func newBulkDownloadCommand() *BulkDownloadCommand {
 func (c *BulkDownloadCommand) Action(ctx *cli.Context) error {
 	log := logr.FromContextOrDiscard(ctx.Context)
 
-	log.V(1)
+	if prepareErr := c.prepareTarget(); prepareErr != nil {
+		return prepareErr
+	}
 	clt := paperless.NewClient(c.PaperlessURL, c.PaperlessUser, c.PaperlessToken)
 
 	log.Info("Getting list of documents")
@@ -79,10 +83,7 @@ func (c *BulkDownloadCommand) Action(ctx *cli.Context) error {
 
 func (c *BulkDownloadCommand) unzip(ctx *cli.Context, tmpFile *os.File) error {
 	log := logr.FromContextOrDiscard(ctx.Context)
-	downloadFilePath := c.TargetPath
-	if c.TargetPath == "" {
-		downloadFilePath = "documents"
-	}
+	downloadFilePath := c.getTargetPath()
 	if unzipErr := archive.Unzip(ctx.Context, tmpFile.Name(), downloadFilePath); unzipErr != nil {
 		return fmt.Errorf("cannot unzip file %q to %q: %w", tmpFile.Name(), downloadFilePath, unzipErr)
 	}
@@ -92,13 +93,32 @@ func (c *BulkDownloadCommand) unzip(ctx *cli.Context, tmpFile *os.File) error {
 
 func (c *BulkDownloadCommand) move(ctx *cli.Context, tmpFile *os.File) error {
 	log := logr.FromContextOrDiscard(ctx.Context)
-	downloadFilePath := c.TargetPath
-	if c.TargetPath == "" {
-		downloadFilePath = "documents.zip"
-	}
+	downloadFilePath := c.getTargetPath()
 	if renameErr := os.Rename(tmpFile.Name(), downloadFilePath); renameErr != nil {
 		return fmt.Errorf("cannot move temp file: %w", renameErr)
 	}
 	log.Info("Downloaded zip archive", "file", downloadFilePath)
 	return nil
 }
+
+func (c *BulkDownloadCommand) getTargetPath() string {
+	if c.TargetPath != "" {
+		return c.TargetPath
+	}
+	if c.UnzipEnabled {
+		return "documents"
+	}
+	return "documents.zip"
+}
+
+func (c *BulkDownloadCommand) prepareTarget() error {
+	target := c.getTargetPath()
+	if c.OverwriteExistingTarget {
+		return os.RemoveAll(target)
+	}
+	_, err := os.Stat(target)
+	if err != nil && os.IsNotExist(err) {
+		return nil
+	}
+	return fmt.Errorf("target %q exists, abort", target)
+}
diff --git a/flags.go b/flags.go
index 4072cf6..9c5218f 100644
--- a/flags.go
+++ b/flags.go
@@ -154,6 +154,14 @@ func newUnzipFlag(dest *bool) *cli.BoolFlag {
 	}
 }
 
+func newOverwriteFlag(dest *bool) *cli.BoolFlag {
+	return &cli.BoolFlag{
+		Name: "overwrite", EnvVars: []string{"DOWNLOAD_OVERWRITE"},
+		Usage:       "deletes existing file(s) before downloading.",
+		Destination: dest,
+	}
+}
+
 func checkEmptyString(flagName string) func(*cli.Context, string) error {
 	return func(ctx *cli.Context, s string) error {
 		if s == "" {

From 58d6d154f2e84d66cf474e96cf56154608fc2e44 Mon Sep 17 00:00:00 2001
From: Chris <github.account@chrigel.net>
Date: Sun, 29 Jan 2023 11:06:29 +0100
Subject: [PATCH 7/9] Update readme

---
 README.md                | 4 ++--
 bulk_download_command.go | 2 +-
 2 files changed, 3 insertions(+), 3 deletions(-)

diff --git a/README.md b/README.md
index 09ea7ff..39d438c 100644
--- a/README.md
+++ b/README.md
@@ -4,9 +4,9 @@ CLI tool to interact with paperless-ngx remote API
 
 ## Subcommands
 
-- `upload`: Uploads local document(s) to Paperless instance
+- `upload`: Uploads local document(s) to Paperless instance.
 - `consume`: Consumes a local directory and uploads each file to Paperless instance. The files will be deleted once uploaded.
-- `bulk-download`: 
+- `bulk-download`: Downloads all documents at once.
 
 ## Installation
 
diff --git a/bulk_download_command.go b/bulk_download_command.go
index 9c79c62..e9776ef 100644
--- a/bulk_download_command.go
+++ b/bulk_download_command.go
@@ -27,7 +27,7 @@ func newBulkDownloadCommand() *BulkDownloadCommand {
 	c := &BulkDownloadCommand{}
 	c.Command = cli.Command{
 		Name:   "bulk-download",
-		Usage:  "Downloads multiple documents at once",
+		Usage:  "Downloads all documents at once",
 		Action: actions(LogMetadata, c.Action),
 		Flags: []cli.Flag{
 			newURLFlag(&c.PaperlessURL),

From 9c2cbd7bb67790705195a009c22e944623093b2e Mon Sep 17 00:00:00 2001
From: Chris <github.account@chrigel.net>
Date: Sun, 29 Jan 2023 11:40:46 +0100
Subject: [PATCH 8/9] Set page size to 100

---
 bulk_download_command.go | 2 ++
 pkg/paperless/query.go   | 7 +++++--
 2 files changed, 7 insertions(+), 2 deletions(-)

diff --git a/bulk_download_command.go b/bulk_download_command.go
index e9776ef..b3f174d 100644
--- a/bulk_download_command.go
+++ b/bulk_download_command.go
@@ -53,6 +53,8 @@ func (c *BulkDownloadCommand) Action(ctx *cli.Context) error {
 	log.Info("Getting list of documents")
 	documents, queryErr := clt.QueryDocuments(ctx.Context, paperless.QueryParams{
 		TruncateContent: true,
+		Ordering:        "id",
+		PageSize:        100,
 	})
 	if queryErr != nil {
 		return queryErr
diff --git a/pkg/paperless/query.go b/pkg/paperless/query.go
index 52f794e..d97d260 100644
--- a/pkg/paperless/query.go
+++ b/pkg/paperless/query.go
@@ -14,7 +14,9 @@ import (
 )
 
 type QueryParams struct {
-	TruncateContent bool `param:"truncate_content"`
+	TruncateContent bool   `param:"truncate_content"`
+	Ordering        string `param:"ordering"`
+	PageSize        int    `param:"page_size"`
 }
 
 type QueryResults struct {
@@ -57,7 +59,6 @@ func (clt *Client) makeQueryRequest(ctx context.Context, params QueryParams) (*h
 	log := logr.FromContextOrDiscard(ctx)
 
 	values := paramsToValues(params)
-	values.Set("ordering", "id")
 
 	path := clt.URL + "/api/documents/?" + values.Encode()
 	log.V(1).Info("Preparing request", "path", path)
@@ -84,6 +85,8 @@ func paramsToValues(params QueryParams) url.Values {
 			paramValue = strconv.FormatBool(field.Bool())
 		case reflect.String:
 			paramValue = field.String()
+		case reflect.Int:
+			paramValue = strconv.FormatInt(field.Int(), 10)
 		default:
 			panic(fmt.Errorf("not implemented type: %s", field.Kind()))
 		}

From 15fda1885d6837277b228d2060468bff01ee1b17 Mon Sep 17 00:00:00 2001
From: Chris <github.account@chrigel.net>
Date: Sun, 29 Jan 2023 12:08:39 +0100
Subject: [PATCH 9/9] Implement pagination in queries

---
 pkg/paperless/query.go | 76 ++++++++++++++++++++++++++++++------------
 1 file changed, 55 insertions(+), 21 deletions(-)

diff --git a/pkg/paperless/query.go b/pkg/paperless/query.go
index d97d260..94b6174 100644
--- a/pkg/paperless/query.go
+++ b/pkg/paperless/query.go
@@ -16,14 +16,64 @@ import (
 type QueryParams struct {
 	TruncateContent bool   `param:"truncate_content"`
 	Ordering        string `param:"ordering"`
-	PageSize        int    `param:"page_size"`
+	PageSize        int64  `param:"page_size"`
+	page            int64  `param:"page"`
 }
 
-type QueryResults struct {
+type QueryResult struct {
 	Results []Document `json:"results,omitempty"`
+	Next    string     `json:"next,omitempty"`
+}
+
+// NextPage returns the next page number for pagination.
+// It returns 1 if QueryResult.Next is empty (first page), or 0 if there's an error parsing QueryResult.Next.
+func (r QueryResult) NextPage() int64 {
+	if r.Next == "" {
+		return 1 // first page
+	}
+	values, err := url.ParseQuery(r.Next)
+	if err != nil {
+		return 0
+	}
+	raw := values.Get("page")
+	page, err := strconv.ParseInt(raw, 10, 64)
+	if err != nil {
+		return 0
+	}
+	return page
 }
 
 func (clt *Client) QueryDocuments(ctx context.Context, params QueryParams) ([]Document, error) {
+	documents := make([]Document, 0)
+	params.page = 1
+	for i := int64(0); i < params.page; i++ {
+		result, err := clt.queryDocumentsInPage(ctx, params)
+		if err != nil {
+			return nil, err
+		}
+		params.page = result.NextPage()
+		documents = append(documents, result.Results...)
+	}
+	return documents, nil
+}
+
+func (clt *Client) makeQueryRequest(ctx context.Context, params QueryParams) (*http.Request, error) {
+	log := logr.FromContextOrDiscard(ctx)
+
+	values := paramsToValues(params)
+
+	path := clt.URL + "/api/documents/?" + values.Encode()
+	log.V(1).Info("Preparing request", "path", path)
+	req, err := http.NewRequestWithContext(ctx, "GET", path, nil)
+	if err != nil {
+		return nil, fmt.Errorf("cannot prepare request: %w", err)
+	}
+	clt.setAuth(req)
+	req.Header.Set("Content-Type", "application/json")
+	return req, nil
+}
+
+func (clt *Client) queryDocumentsInPage(ctx context.Context, params QueryParams) (*QueryResult, error) {
 	req, err := clt.makeQueryRequest(ctx, params)
 	if err != nil {
 		return nil, err
@@ -46,29 +96,13 @@ func (clt *Client) QueryDocuments(ctx context.Context, params QueryParams) ([]Do
 		return nil, fmt.Errorf("request failed: %s: %s", resp.Status, string(b))
 	}
 
-	result := QueryResults{}
+	result := QueryResult{}
 	parseErr := json.Unmarshal(b, &result)
 	if parseErr != nil {
 		return nil, fmt.Errorf("cannot parse JSON: %w", parseErr)
 	}
 	log.V(1).Info("Parsed response", "result", result)
-	return result.Results, nil
-}
-
-func (clt *Client) makeQueryRequest(ctx context.Context, params QueryParams) (*http.Request, error) {
-	log := logr.FromContextOrDiscard(ctx)
-
-	values := paramsToValues(params)
-
-	path := clt.URL + "/api/documents/?" + values.Encode()
-	log.V(1).Info("Preparing request", "path", path)
-	req, err := http.NewRequestWithContext(ctx, "GET", path, nil)
-	if err != nil {
-		return nil, fmt.Errorf("cannot prepare request: %w", err)
-	}
-	clt.setAuth(req)
-	req.Header.Set("Content-Type", "application/json")
-	return req, nil
+	return &result, nil
 }
 
 func paramsToValues(params QueryParams) url.Values {
@@ -85,7 +119,7 @@ func paramsToValues(params QueryParams) url.Values {
 			paramValue = strconv.FormatBool(field.Bool())
 		case reflect.String:
 			paramValue = field.String()
-		case reflect.Int:
+		case reflect.Int64:
 			paramValue = strconv.FormatInt(field.Int(), 10)
 		default:
 			panic(fmt.Errorf("not implemented type: %s", field.Kind()))