-
Notifications
You must be signed in to change notification settings - Fork 908
/
lib.rs
478 lines (420 loc) · 16.1 KB
/
lib.rs
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
use std::collections::hash_map::DefaultHasher;
use std::collections::hash_map::Entry as HEntry;
use std::collections::HashMap;
use std::fs::{self, File};
use std::hash::{Hash, Hasher};
use std::path::{Path, PathBuf};
use image::{avif::AvifEncoder, imageops::FilterType, ColorType};
use image::{GenericImageView, ImageOutputFormat};
use lazy_static::lazy_static;
use rayon::prelude::*;
use regex::Regex;
use errors::{Error, Result};
use utils::fs as ufs;
static RESIZED_SUBDIR: &str = "processed_images";
lazy_static! {
pub static ref RESIZED_FILENAME: Regex =
Regex::new(r#"([0-9a-f]{16})([0-9a-f]{2})[.](jpg|png)"#).unwrap();
}
/// Describes the precise kind of a resize operation
#[derive(Debug, Clone, Copy, PartialEq, Eq)]
pub enum ResizeOp {
/// A simple scale operation that doesn't take aspect ratio into account
Scale(u32, u32),
/// Scales the image to a specified width with height computed such
/// that aspect ratio is preserved
FitWidth(u32),
/// Scales the image to a specified height with width computed such
/// that aspect ratio is preserved
FitHeight(u32),
/// If the image is larger than the specified width or height, scales the image such
/// that it fits within the specified width and height preserving aspect ratio.
/// Either dimension may end up being smaller, but never larger than specified.
Fit(u32, u32),
/// Scales the image such that it fills the specified width and height.
/// Output will always have the exact dimensions specified.
/// The part of the image that doesn't fit in the thumbnail due to differing
/// aspect ratio will be cropped away, if any.
Fill(u32, u32),
}
impl ResizeOp {
pub fn from_args(op: &str, width: Option<u32>, height: Option<u32>) -> Result<ResizeOp> {
use ResizeOp::*;
// Validate args:
match op {
"fit_width" => {
if width.is_none() {
return Err("op=\"fit_width\" requires a `width` argument".to_string().into());
}
}
"fit_height" => {
if height.is_none() {
return Err("op=\"fit_height\" requires a `height` argument"
.to_string()
.into());
}
}
"scale" | "fit" | "fill" => {
if width.is_none() || height.is_none() {
return Err(
format!("op={} requires a `width` and `height` argument", op).into()
);
}
}
_ => return Err(format!("Invalid image resize operation: {}", op).into()),
};
Ok(match op {
"scale" => Scale(width.unwrap(), height.unwrap()),
"fit_width" => FitWidth(width.unwrap()),
"fit_height" => FitHeight(height.unwrap()),
"fit" => Fit(width.unwrap(), height.unwrap()),
"fill" => Fill(width.unwrap(), height.unwrap()),
_ => unreachable!(),
})
}
pub fn width(self) -> Option<u32> {
use ResizeOp::*;
match self {
Scale(w, _) => Some(w),
FitWidth(w) => Some(w),
FitHeight(_) => None,
Fit(w, _) => Some(w),
Fill(w, _) => Some(w),
}
}
pub fn height(self) -> Option<u32> {
use ResizeOp::*;
match self {
Scale(_, h) => Some(h),
FitWidth(_) => None,
FitHeight(h) => Some(h),
Fit(_, h) => Some(h),
Fill(_, h) => Some(h),
}
}
}
impl From<ResizeOp> for u8 {
fn from(op: ResizeOp) -> u8 {
use ResizeOp::*;
match op {
Scale(_, _) => 1,
FitWidth(_) => 2,
FitHeight(_) => 3,
Fit(_, _) => 4,
Fill(_, _) => 5,
}
}
}
#[allow(clippy::derive_hash_xor_eq)]
impl Hash for ResizeOp {
fn hash<H: Hasher>(&self, hasher: &mut H) {
hasher.write_u8(u8::from(*self));
if let Some(w) = self.width() {
hasher.write_u32(w);
}
if let Some(h) = self.height() {
hasher.write_u32(h);
}
}
}
/// Thumbnail image format
#[derive(Debug, Clone, Copy, PartialEq, Eq)]
pub enum Format {
/// JPEG, The `u8` argument is JPEG quality (in percent).
Jpeg(u8),
/// PNG
Png,
/// AVIF
Avif(u8),
}
impl Format {
pub fn from_args(source: &str, format: &str, quality: u8) -> Result<Format> {
use Format::*;
assert!(quality > 0 && quality <= 100, "Jpeg quality must be within the range [1; 100]");
match format {
"auto" => match Self::is_lossy(source) {
Some(true) => Ok(Jpeg(quality)),
Some(false) => Ok(Png),
None => Err(format!("Unsupported image file: {}", source).into()),
},
"jpeg" | "jpg" => Ok(Jpeg(quality)),
"png" => Ok(Png),
"avif" => Ok(Avif(quality)),
_ => Err(format!("Invalid image format: {}", format).into()),
}
}
/// Looks at file's extension and, if it's a supported image format, returns whether the format is lossless
pub fn is_lossy<P: AsRef<Path>>(p: P) -> Option<bool> {
p.as_ref()
.extension()
.and_then(std::ffi::OsStr::to_str)
.map(|ext| match ext.to_lowercase().as_str() {
"jpg" | "jpeg" => Some(true),
"png" => Some(false),
"gif" => Some(false),
"bmp" => Some(false),
_ => None,
})
.unwrap_or(None)
}
fn extension(&self) -> &str {
// Kept in sync with RESIZED_FILENAME and op_filename
use Format::*;
match *self {
Png => "png",
Jpeg(_) => "jpg",
Avif(_) => "avif",
}
}
}
#[allow(clippy::derive_hash_xor_eq)]
impl Hash for Format {
fn hash<H: Hasher>(&self, hasher: &mut H) {
use Format::*;
let q = match *self {
Png => 0,
Jpeg(q) => q,
Avif(q) => 101 + q,
};
hasher.write_u8(q);
}
}
/// Holds all data needed to perform a resize operation
#[derive(Debug, PartialEq, Eq)]
pub struct ImageOp {
source: String,
op: ResizeOp,
format: Format,
/// Hash of the above parameters
hash: u64,
/// If there is a hash collision with another ImageOp, this contains a sequential ID > 1
/// identifying the collision in the order as encountered (which is essentially random).
/// Therefore, ImageOps with collisions (ie. collision_id > 0) are always considered out of date.
/// Note that this is very unlikely to happen in practice
collision_id: u32,
}
impl ImageOp {
pub fn new(source: String, op: ResizeOp, format: Format) -> ImageOp {
let mut hasher = DefaultHasher::new();
hasher.write(source.as_ref());
op.hash(&mut hasher);
format.hash(&mut hasher);
let hash = hasher.finish();
ImageOp { source, op, format, hash, collision_id: 0 }
}
pub fn from_args(
source: String,
op: &str,
width: Option<u32>,
height: Option<u32>,
format: &str,
quality: u8,
) -> Result<ImageOp> {
let op = ResizeOp::from_args(op, width, height)?;
let format = Format::from_args(&source, format, quality)?;
Ok(Self::new(source, op, format))
}
fn perform(&self, content_path: &Path, target_path: &Path) -> Result<()> {
use ResizeOp::*;
let src_path = content_path.join(&self.source);
if !ufs::file_stale(&src_path, target_path) {
return Ok(());
}
let mut img = image::open(&src_path)?;
let (img_w, img_h) = img.dimensions();
const RESIZE_FILTER: FilterType = FilterType::Lanczos3;
const RATIO_EPSILLION: f32 = 0.1;
let img = match self.op {
Scale(w, h) => img.resize_exact(w, h, RESIZE_FILTER),
FitWidth(w) => img.resize(w, u32::max_value(), RESIZE_FILTER),
FitHeight(h) => img.resize(u32::max_value(), h, RESIZE_FILTER),
Fit(w, h) => {
if img_w > w || img_h > h {
img.resize(w, h, RESIZE_FILTER)
} else {
img
}
}
Fill(w, h) => {
let factor_w = img_w as f32 / w as f32;
let factor_h = img_h as f32 / h as f32;
if (factor_w - factor_h).abs() <= RATIO_EPSILLION {
// If the horizontal and vertical factor is very similar,
// that means the aspect is similar enough that there's not much point
// in cropping, so just perform a simple scale in this case.
img.resize_exact(w, h, RESIZE_FILTER)
} else {
// We perform the fill such that a crop is performed first
// and then resize_exact can be used, which should be cheaper than
// resizing and then cropping (smaller number of pixels to resize).
let (crop_w, crop_h) = if factor_w < factor_h {
(img_w, (factor_w * h as f32).round() as u32)
} else {
((factor_h * w as f32).round() as u32, img_h)
};
let (offset_w, offset_h) = if factor_w < factor_h {
(0, (img_h - crop_h) / 2)
} else {
((img_w - crop_w) / 2, 0)
};
img.crop(offset_w, offset_h, crop_w, crop_h).resize_exact(w, h, RESIZE_FILTER)
}
}
};
let mut f = File::create(target_path)?;
match self.format {
Format::Png => {
img.write_to(&mut f, ImageOutputFormat::Png)?;
}
Format::Jpeg(q) => {
img.write_to(&mut f, ImageOutputFormat::Jpeg(q))?;
}
Format::Avif(q) => {
let mut avif: Vec<u8> = Vec::new();
AvifEncoder::new_with_speed_quality(&mut avif, 1, q).write_image(
&img.as_bytes(),
img.dimensions().0,
img.dimensions().1,
ColorType::Rgb8,
)?;
std::io::Write::write_all(&mut f, &avif)?;
}
}
Ok(())
}
}
/// A strcture into which image operations can be enqueued and then performed.
/// All output is written in a subdirectory in `static_path`,
/// taking care of file stale status based on timestamps and possible hash collisions.
#[derive(Debug)]
pub struct Processor {
content_path: PathBuf,
resized_path: PathBuf,
resized_url: String,
/// A map of a ImageOps by their stored hash.
/// Note that this cannot be a HashSet, because hashset handles collisions and we don't want that,
/// we need to be aware of and handle collisions ourselves.
img_ops: HashMap<u64, ImageOp>,
/// Hash collisions go here:
img_ops_collisions: Vec<ImageOp>,
}
impl Processor {
pub fn new(content_path: PathBuf, static_path: &Path, base_url: &str) -> Processor {
Processor {
content_path,
resized_path: static_path.join(RESIZED_SUBDIR),
resized_url: Self::resized_url(base_url),
img_ops: HashMap::new(),
img_ops_collisions: Vec::new(),
}
}
fn resized_url(base_url: &str) -> String {
if base_url.ends_with('/') {
format!("{}{}", base_url, RESIZED_SUBDIR)
} else {
format!("{}/{}", base_url, RESIZED_SUBDIR)
}
}
pub fn set_base_url(&mut self, base_url: &str) {
self.resized_url = Self::resized_url(base_url);
}
pub fn source_exists(&self, source: &str) -> bool {
self.content_path.join(source).exists()
}
pub fn num_img_ops(&self) -> usize {
self.img_ops.len() + self.img_ops_collisions.len()
}
fn insert_with_collisions(&mut self, mut img_op: ImageOp) -> u32 {
match self.img_ops.entry(img_op.hash) {
HEntry::Occupied(entry) => {
if *entry.get() == img_op {
return 0;
}
}
HEntry::Vacant(entry) => {
entry.insert(img_op);
return 0;
}
}
// If we get here, that means a hash collision.
// This is detected when there is an ImageOp with the same hash in the `img_ops`
// map but which is not equal to this one.
// To deal with this, all collisions get a (random) sequential ID number.
// First try to look up this ImageOp in `img_ops_collisions`, maybe we've
// already seen the same ImageOp.
// At the same time, count IDs to figure out the next free one.
// Start with the ID of 2, because we'll need to use 1 for the ImageOp
// already present in the map:
let mut collision_id = 2;
for op in self.img_ops_collisions.iter().filter(|op| op.hash == img_op.hash) {
if *op == img_op {
// This is a colliding ImageOp, but we've already seen an equal one
// (not just by hash, but by content too), so just return its ID:
return collision_id;
} else {
collision_id += 1;
}
}
// If we get here, that means this is a new colliding ImageOp and
// `collision_id` is the next free ID
if collision_id == 2 {
// This is the first collision found with this hash, update the ID
// of the matching ImageOp in the map.
self.img_ops.get_mut(&img_op.hash).unwrap().collision_id = 1;
}
img_op.collision_id = collision_id;
self.img_ops_collisions.push(img_op);
collision_id
}
fn op_filename(hash: u64, collision_id: u32, format: Format) -> String {
// Please keep this in sync with RESIZED_FILENAME
assert!(collision_id < 256, "Unexpectedly large number of collisions: {}", collision_id);
format!("{:016x}{:02x}.{}", hash, collision_id, format.extension())
}
fn op_url(&self, hash: u64, collision_id: u32, format: Format) -> String {
format!("{}/{}", &self.resized_url, Self::op_filename(hash, collision_id, format))
}
pub fn insert(&mut self, img_op: ImageOp) -> String {
let hash = img_op.hash;
let format = img_op.format;
let collision_id = self.insert_with_collisions(img_op);
self.op_url(hash, collision_id, format)
}
pub fn prune(&self) -> Result<()> {
// Do not create folders if they don't exist
if !self.resized_path.exists() {
return Ok(());
}
ufs::ensure_directory_exists(&self.resized_path)?;
let entries = fs::read_dir(&self.resized_path)?;
for entry in entries {
let entry_path = entry?.path();
if entry_path.is_file() {
let filename = entry_path.file_name().unwrap().to_string_lossy();
if let Some(capts) = RESIZED_FILENAME.captures(filename.as_ref()) {
let hash = u64::from_str_radix(capts.get(1).unwrap().as_str(), 16).unwrap();
let collision_id =
u32::from_str_radix(capts.get(2).unwrap().as_str(), 16).unwrap();
if collision_id > 0 || !self.img_ops.contains_key(&hash) {
fs::remove_file(&entry_path)?;
}
}
}
}
Ok(())
}
pub fn do_process(&mut self) -> Result<()> {
if !self.img_ops.is_empty() {
ufs::ensure_directory_exists(&self.resized_path)?;
}
self.img_ops
.par_iter()
.map(|(hash, op)| {
let target =
self.resized_path.join(Self::op_filename(*hash, op.collision_id, op.format));
op.perform(&self.content_path, &target)
.map_err(|e| Error::chain(format!("Failed to process image: {}", op.source), e))
})
.collect::<Result<()>>()
}
}