-
Notifications
You must be signed in to change notification settings - Fork 0
/
builder.go
258 lines (228 loc) · 7.6 KB
/
builder.go
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
package mediasort
import (
"context"
"errors"
"fmt"
"regexp"
"strings"
"github.com/dtrejod/goexif/internal/ilog"
"github.com/dtrejod/goexif/internal/mediatype"
"github.com/dtrejod/goexif/internal/visitors"
"go.uber.org/zap"
)
var (
// DefaultFileTypes are the default media types handled by the sorter if none are specified.
// NOTE: This default list should match the known mediatypes in the ./internal/mediatype package.
DefaultFileTypes = []string{
mediatype.JPEG{}.String(),
mediatype.PNG{}.String(),
mediatype.HEIF{}.String(),
mediatype.TIFF{}.String(),
mediatype.QTFF{}.String(),
mediatype.MP4{}.String(),
}
// DefaultBlocklist are default regexes that are ignored by the sorter.
DefaultBlocklist = []*regexp.Regexp{
// ignore date pattern used internally by the sorter.
// Makes subsequent runs iterative instead of
// repeatedly processing the same media files
regexp.MustCompile(`(\/)?\d{4}\/(\d{2}\/){2}`),
}
errInvalidConfig = errors.New("invalid configuration")
)
// Sorter sorts media from file metadata
type Sorter interface {
// Run runs the sorter
Run(ctx context.Context) error
}
// Option is a param that can be used to configure the media metadata sorter.
type Option interface {
apply(*builderOptions) error
}
type builderFunc func(*builderOptions) error
func (f builderFunc) apply(b *builderOptions) error {
return f(b)
}
type builderOptions struct {
dryRun bool
timestampAsFilename bool
useLastModifiedDate bool
useInputMagicSignature bool
useOutputMagicSignature bool
overwriteExisting bool
stopWalkOnError bool
detectDuplicates bool
allowedFileTypes []string
blocklist []*regexp.Regexp
sourceDirectory *string
destinationDirectory *string
}
// NewSorter returns a sorter configured with the provided Option(s). The
// WithSourceDirectory Option is the only required option.
func NewSorter(ctx context.Context, opts ...Option) (Sorter, error) {
cfg := builderOptions{
allowedFileTypes: uniqLoweredSlice(DefaultFileTypes),
blocklist: DefaultBlocklist,
}
for _, opt := range opts {
if opt == nil {
continue
}
if err := opt.apply(&cfg); err != nil {
return nil, err
}
}
if cfg.sourceDirectory == nil {
err := fmt.Errorf("%w: source directory required for sorting", errInvalidConfig)
ilog.FromContext(ctx).Error("Failed to build sorter", zap.Error(err))
return nil, err
}
ilog.FromContext(ctx).Info("Sorter configuration.", zap.String("configuration", fmt.Sprintf("%+v", cfg)))
return &traverser{
useInputMagicSignature: cfg.useInputMagicSignature,
stopWalkOnError: cfg.stopWalkOnError,
allowedFileTypes: cfg.allowedFileTypes,
blocklist: cfg.blocklist,
sourceDirectory: *cfg.sourceDirectory,
extVisitorFunc: visitors.NewMediaExtAliases(ctx),
progressTracker: &progressTracker{
currentMediaIndex: 0,
totalMediaFiles: 0,
logThreshold: 0,
logNextThreshold: 0,
},
fileHandler: &metadataFileHandler{
useInputMagicSignature: cfg.useInputMagicSignature,
detectDuplicates: cfg.detectDuplicates,
dryRun: cfg.dryRun,
overwriteExisting: cfg.overwriteExisting,
mediaMetadataVisitorFunc: visitors.NewMediaMetadataFilename(
ctx,
cfg.destinationDirectory,
cfg.useLastModifiedDate,
cfg.timestampAsFilename,
cfg.useOutputMagicSignature,
),
},
}, nil
}
// WithDryRun instructs the sorter to make no changes
func WithDryRun() Option {
return builderFunc(func(b *builderOptions) error {
b.dryRun = true
return nil
})
}
// WithTimestampAsFilename instructs the sorter to rename the source file using it's timestamp and file extension.
// Note: This option can help eliminate duplicate images during sorting.
func WithTimestampAsFilename() Option {
return builderFunc(func(b *builderOptions) error {
b.timestampAsFilename = true
return nil
})
}
// WithLastModifiedFallback instructs the sorter to fallback to using the
// file's last modified date if there is no media metadata. If false, images without
// media metadata data are ignored
func WithLastModifiedFallback() Option {
return builderFunc(func(b *builderOptions) error {
b.useLastModifiedDate = true
return nil
})
}
// WithInputFileMagicSignature instructs the sorter to idenitify media files using the
// file's magic signature ignoring the exisiting file extension on the media.
// See the manual page for file(1) to understand how this works.
func WithInputFileMagicSignature() Option {
return builderFunc(func(b *builderOptions) error {
b.useInputMagicSignature = true
return nil
})
}
// WithOutputFileMagicSignature instructs the sorter to use the known file signature when saving the output file.
// See the manual page for file(1) to understand how this works.
func WithOutputFileMagicSignature() Option {
return builderFunc(func(b *builderOptions) error {
b.useOutputMagicSignature = true
return nil
})
}
// WithFileTypes is an array of filetypes that we intend to locate.
// Extensions are matched case-insensitive. *.jpg is treated the same as *.JPG, etc.
// Can handle any file type; not just EXIF-enabled file types when used in conjunction with WithUseLastModifiedDate().
func WithFileTypes(t []string) Option {
return builderFunc(func(b *builderOptions) error {
b.allowedFileTypes = uniqLoweredSlice(t)
return nil
})
}
// WithRegexBlocklist is an array of regular expressions for matching on
// paths to ignore when finding folders. Directory are matched
// case-insensitive
func WithRegexBlocklist(d []string) Option {
return builderFunc(func(b *builderOptions) error {
patterns := uniqLoweredSlice(d)
exprs := make([]*regexp.Regexp, 0, len(patterns))
for _, p := range patterns {
re, err := regexp.Compile(p)
if err != nil {
return err
}
exprs = append(exprs, re)
}
b.blocklist = exprs
return nil
})
}
// WithSourceDirectory is an absolute or relative filepath where sorted media will looked for.
func WithSourceDirectory(s string) Option {
return builderFunc(func(b *builderOptions) error {
b.sourceDirectory = &s
return nil
})
}
// WithDestinationDirectory is an absolute or relative filepath where sorted
// media will be saved to.
func WithDestinationDirectory(d string) Option {
return builderFunc(func(b *builderOptions) error {
b.destinationDirectory = &d
return nil
})
}
// WithOverwriteExisting instructs the sorter to overwrite any existing files
// that may already exist with the same desired destination file name
// Warning: Can be useful for removing duplicates by ensuring no two files with
// the same timestamp can exist, however, can cause data loss if not careful
func WithOverwriteExisting() Option {
return builderFunc(func(b *builderOptions) error {
b.overwriteExisting = true
return nil
})
}
// WithStopOnError instructs the sorter to exit quickly when any error occurs during walking the directory tree
func WithStopOnError() Option {
return builderFunc(func(b *builderOptions) error {
b.stopWalkOnError = true
return nil
})
}
// WithDetectDuplicates will use perception hash algorithm of each file to
// determine whether to images with the same EXIF metadata are duplicate files.
func WithDetectDuplicates() Option {
return builderFunc(func(b *builderOptions) error {
b.detectDuplicates = true
return nil
})
}
// uniqLoweredSlice takes a slice, lowercases all elements, and return a resulting slice with only unique elements.
func uniqLoweredSlice(in []string) []string {
m := make(map[string]struct{}, len(in))
for _, s := range in {
m[strings.ToLower(s)] = struct{}{}
}
out := make([]string, 0, len(m))
for s := range m {
out = append(out, s)
}
return out
}