/
main.cc
1903 lines (1730 loc) · 62.3 KB
/
main.cc
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
545
546
547
548
549
550
551
552
553
554
555
556
557
558
559
560
561
562
563
564
565
566
567
568
569
570
571
572
573
574
575
576
577
578
579
580
581
582
583
584
585
586
587
588
589
590
591
592
593
594
595
596
597
598
599
600
601
602
603
604
605
606
607
608
609
610
611
612
613
614
615
616
617
618
619
620
621
622
623
624
625
626
627
628
629
630
631
632
633
634
635
636
637
638
639
640
641
642
643
644
645
646
647
648
649
650
651
652
653
654
655
656
657
658
659
660
661
662
663
664
665
666
667
668
669
670
671
672
673
674
675
676
677
678
679
680
681
682
683
684
685
686
687
688
689
690
691
692
693
694
695
696
697
698
699
700
701
702
703
704
705
706
707
708
709
710
711
712
713
714
715
716
717
718
719
720
721
722
723
724
725
726
727
728
729
730
731
732
733
734
735
736
737
738
739
740
741
742
743
744
745
746
747
748
749
750
751
752
753
754
755
756
757
758
759
760
761
762
763
764
765
766
767
768
769
770
771
772
773
774
775
776
777
778
779
780
781
782
783
784
785
786
787
788
789
790
791
792
793
794
795
796
797
798
799
800
801
802
803
804
805
806
807
808
809
810
811
812
813
814
815
816
817
818
819
820
821
822
823
824
825
826
827
828
829
830
831
832
833
834
835
836
837
838
839
840
841
842
843
844
845
846
847
848
849
850
851
852
853
854
855
856
857
858
859
860
861
862
863
864
865
866
867
868
869
870
871
872
873
874
875
876
877
878
879
880
881
882
883
884
885
886
887
888
889
890
891
892
893
894
895
896
897
898
899
900
901
902
903
904
905
906
907
908
909
910
911
912
913
914
915
916
917
918
919
920
921
922
923
924
925
926
927
928
929
930
931
932
933
934
935
936
937
938
939
940
941
942
943
944
945
946
947
948
949
950
951
952
953
954
955
956
957
958
959
960
961
962
963
964
965
966
967
968
969
970
971
972
973
974
975
976
977
978
979
980
981
982
983
984
985
986
987
988
989
990
991
992
993
994
995
996
997
998
999
1000
// Copyright 2021 The Fuse-Archive Authors.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// https://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
// ----------------
// fuse-archive read-only mounts an archive or compressed file (e.g. foo.tar,
// foo.tar.gz, foo.xz, foo.zip) as a FUSE file system
// (https://en.wikipedia.org/wiki/Filesystem_in_Userspace).
//
// To build:
// g++ -O3 main.cc `pkg-config libarchive fuse --cflags --libs` -o example
//
// To use:
// ./example ../test/data/archive.zip the/path/to/the/mountpoint
// ls -l the/path/to/the/mountpoint
// fusermount -u the/path/to/the/mountpoint
//
// Pass the "-f" flag to "./example" for foreground operation.
// ---- Preprocessor
#define FUSE_USE_VERSION 26
#include <archive.h>
#include <archive_entry.h>
#include <errno.h>
#include <fcntl.h>
#include <fuse.h>
#include <langinfo.h>
#include <locale.h>
#include <stdint.h>
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#include <syslog.h>
#include <atomic>
#include <chrono>
#include <climits>
#include <memory>
#include <string>
#include <thread>
#include <unordered_map>
#include <vector>
// fuse_file_info.fh is a uint64_t. Check that it can hold a pointer.
#if UINTPTR_MAX > UINT64_MAX
#error "fuse-archive requires that casting a uintptr_t to uint64_t is lossless"
#endif
#define TRY(operation) \
do { \
int try_status_code = operation; \
if (try_status_code) { \
return try_status_code; \
} \
} while (false)
// TRY_EXIT_CODE is like TRY but is used in the main function where Linux exit
// codes range from 0 to 255. In contrast, TRY is used for other functions
// where e.g. it's valid to return a negative value like -ENOENT.
#define TRY_EXIT_CODE(operation) \
do { \
int try_status_code = operation; \
if (try_status_code < 0) { \
return EXIT_CODE_GENERIC_FAILURE; \
} else if (try_status_code > 0) { \
return try_status_code; \
} \
} while (false)
// ---- Exit Codes
// These are values passed to the exit function, or returned by main. These are
// (Linux or Linux-like) application exit codes, not library error codes.
//
// Note that, unless the -f command line option was passed for foreground
// operation, the parent process may very well ignore the exit code value after
// daemonization succeeds.
#define EXIT_CODE_GENERIC_FAILURE 1
// Exit code 2 is skipped: https://tldp.org/LDP/abs/html/exitcodes.html
#define EXIT_CODE_CANNOT_OPEN_ARCHIVE 11
#define EXIT_CODE_PASSPHRASE_REQUIRED 20
#define EXIT_CODE_PASSPHRASE_INCORRECT 21
#define EXIT_CODE_PASSPHRASE_NOT_SUPPORTED 22
#define EXIT_CODE_INVALID_RAW_ARCHIVE 30
#define EXIT_CODE_INVALID_ARCHIVE_HEADER 31
#define EXIT_CODE_INVALID_ARCHIVE_CONTENTS 32
// Duplicate entries can be explicit (where "/foo/bar" appears twice in the
// archive's entry list) or implicit (where "/foo/bar" might appear as a
// regular *file* in the entry list, but "/foo/bar/baz.txt" also being present
// implies that "/foo/bar" is a *directory*).
#define EXIT_CODE_INVALID_EXPLICIT_DUPLICATE_ENTRY 33
#define EXIT_CODE_INVALID_IMPLICIT_DUPLICATE_ENTRY 34
// ---- Compile-time Configuration
#define PROGRAM_NAME "fuse-archive"
#ifndef FUSE_ARCHIVE_VERSION
#define FUSE_ARCHIVE_VERSION "0.1.14"
#endif
#ifndef BLOCK_SIZE
#define BLOCK_SIZE 16384
#endif
#ifndef NUM_SAVED_READERS
#define NUM_SAVED_READERS 8
#endif
#ifndef NUM_SIDE_BUFFERS
#define NUM_SIDE_BUFFERS 8
#elif NUM_SIDE_BUFFERS <= 0
#error "invalid NUM_SIDE_BUFFERS"
#endif
// This defaults to 128 KiB (0x20000 bytes) because, on a vanilla x86_64 Debian
// Linux, that seems to be the largest buffer size passed to my_read.
#ifndef SIDE_BUFFER_SIZE
#define SIDE_BUFFER_SIZE 131072
#elif SIDE_BUFFER_SIZE <= 0
#error "invalid SIDE_BUFFER_SIZE"
#endif
// ---- Platform specifics
#if defined(__FreeBSD__) || defined(__OpenBSD__) || defined(__APPLE__)
#define lseek64 lseek
#endif
// ---- Globals
static struct options {
bool help;
bool version;
bool passphrase;
bool quiet;
bool redact;
const char* asyncprogress;
} g_options = {};
enum {
// Options shared with libfuse's helper.c.
MY_KEY_HELP = 100,
MY_KEY_VERSION = 101,
// Options exclusive to fuse-archive.
MY_KEY_ASYNCPROGRESS = 200,
MY_KEY_EAGER = 201,
MY_KEY_PASSPHRASE = 202,
MY_KEY_QUIET = 203,
MY_KEY_REDACT = 204,
};
static struct fuse_opt g_fuse_opts[] = {
FUSE_OPT_KEY("-h", MY_KEY_HELP), //
FUSE_OPT_KEY("--help", MY_KEY_HELP), //
FUSE_OPT_KEY("-V", MY_KEY_VERSION), //
FUSE_OPT_KEY("--version", MY_KEY_VERSION), //
FUSE_OPT_KEY("--asyncprogress=%s", MY_KEY_ASYNCPROGRESS), //
FUSE_OPT_KEY("asyncprogress=%s", MY_KEY_ASYNCPROGRESS), //
FUSE_OPT_KEY("--passphrase", MY_KEY_PASSPHRASE), //
FUSE_OPT_KEY("passphrase", MY_KEY_PASSPHRASE), //
FUSE_OPT_KEY("--quiet", MY_KEY_QUIET), //
FUSE_OPT_KEY("-q", MY_KEY_QUIET), //
FUSE_OPT_KEY("--redact", MY_KEY_REDACT), //
FUSE_OPT_KEY("redact", MY_KEY_REDACT), //
// The remaining options are listed for e.g. "-o formatraw" command line
// compatibility with the https://github.com/cybernoid/archivemount program
// but are otherwise ignored. For example, this program detects 'raw'
// archives automatically and only supports read-only, not read-write.
FUSE_OPT_KEY("formatraw", FUSE_OPT_KEY_DISCARD), //
FUSE_OPT_KEY("nobackup", FUSE_OPT_KEY_DISCARD), //
FUSE_OPT_KEY("nosave", FUSE_OPT_KEY_DISCARD), //
FUSE_OPT_KEY("readonly", FUSE_OPT_KEY_DISCARD), //
FUSE_OPT_END,
};
// g_archive_filename is the command line argument naming the archive file.
static const char* g_archive_filename = NULL;
// g_archive_innername is the base name of g_archive_filename, minus the file
// extension suffix. For example, if g_archive_filename is "/foo/bar.ext0.ext1"
// then g_archive_innername is "bar.ext0".
static const char* g_archive_innername = NULL;
// g_archive_fd is the file descriptor returned by opening g_archive_filename.
static int g_archive_fd = -1;
// g_archive_file_size is the size of the g_archive_filename file.
static int64_t g_archive_file_size = 0;
// g_archive_fd_position_current is the read position of g_archive_fd.
//
// etc_hwm is the etc_current high water mark (the largest value seen). When
// compared to g_archive_file_size, it proxies what proportion of the archive
// has been processed. This matters for 'raw' archives that need a complete
// decompression pass (as they do not have a table of contents within to
// explicitly record the decompressed file size).
static int64_t g_archive_fd_position_current = 0;
static std::atomic<int64_t> g_archive_fd_position_hwm;
// g_archive_realpath holds the canonicalised absolute path of the archive
// file. The command line argument may give a relative filename (one that
// doesn't start with a slash) and the fuse_main function may change the
// current working directory, so subsequent archive_read_open_filename calls
// use this absolute filepath instead. g_archive_filename is still used for
// logging. g_archive_realpath is allocated in pre_initialize and never freed.
static const char* g_archive_realpath = NULL;
// g_passphrase_buffer and g_passphrase_length combine to hold the passphrase,
// if given. The buffer is NUL-terminated but g_passphrase_length excludes the
// final '\0'. If no passphrase was given, g_passphrase_length is zero.
#define PASSPHRASE_BUFFER_LENGTH 1024
static char g_passphrase_buffer[PASSPHRASE_BUFFER_LENGTH] = {};
static int g_passphrase_length = 0;
// g_archive_is_raw is whether the archive file is 'cooked' or 'raw'.
//
// We support 'cooked' archive files (e.g. foo.tar.gz or foo.zip) but also what
// libarchive calls 'raw' files (e.g. foo.gz), which are compressed but not
// explicitly an archive (a collection of files). libarchive can still present
// it as an implicit archive containing 1 file.
static bool g_archive_is_raw = false;
// g_uid and g_gid are the user/group IDs for the files we serve. They're the
// same as the current uid/gid.
//
// libfuse will override my_getattr's use of these variables if the "-o uid=N"
// or "-o gid=N" command line options are set.
static uid_t g_uid = 0;
static gid_t g_gid = 0;
// We serve ls and stat requests from an in-memory directory tree of nodes.
// Building that tree is one of the first things that we do.
//
// Building is split into two parts and the bulk of it is done in the second
// part, lazily (unless the --asyncprogress flag is set), so that the main
// function can call fuse_main (to bind the mountpoint and daemonize) as fast as
// possible (although the main function still does a preliminary check that the
// archive_filename command line argument actually names an existing file that
// looks like a valid archive).
//
// These global variables connect those two parts.
static int g_initialize_status_code = 0;
static struct archive* g_initialize_archive = nullptr;
static struct archive_entry* g_initialize_archive_entry = nullptr;
static int64_t g_initialize_index_within_archive = -1;
// g_displayed_progress is whether we have printed a progress message.
static bool g_displayed_progress = false;
// These global variables are the in-memory directory tree of nodes.
//
// g_root_node being non-nullptr means that initialization is complete (when
// the --asyncprogress option is not used).
//
// g_asyncprogress_complete being true means that initialization is complete
// (when the --asyncprogress option is used).
//
// Building the directory tree can take minutes, for archive file formats like
// .tar.gz that are compressed but also do not contain an explicit on-disk
// directory of archive entries. Using the --asyncprogress option makes serving
// the first FUSE request (such as a top-level "ls") instantaneous, at the
// expense of making it harder (at the file system level) to determine when the
// initialization is complete and the actual directory tree is being served.
static std::unordered_map<std::string, struct node*> g_nodes_by_name;
static std::vector<struct node*> g_nodes_by_index;
static struct node* g_root_node = nullptr;
static std::atomic<bool> g_asyncprogress_complete;
// g_shutting_down (when the --asyncprogress option is used) communicates to
// the worker thread that the main thread is shutting down.
//
// Normally, the worker thread populates the g_nodes_by_etc containers, then
// flips g_asyncprogress_complete, then the main thread reads those containers.
// All of the container reads happen after all of the container writes, even
// though reads and writes happen in different threads.
//
// However, if the mountpoint is unmounted (e.g. by "fusermount -u") before the
// worker thread is done, the main thread's main function's return can trigger
// those containers' destructors and we now have concurrent and racy writes on
// those global variables.
//
// To avoid a segfault (or other undefined behavior), g_shutting_down tells the
// worker thread to stop early and the main thread then joins it before
// implicitly running the g_nodes_by_etc destructors.
static std::atomic<bool> g_shutting_down;
// g_saved_readers is a cache of warm readers. libarchive is designed for
// streaming access, not random access, and generally does not support seeking
// backwards. For example, if some other program reads "/foo", "/bar" and then
// "/baz" sequentially from an archive (via this program) and those correspond
// to the 60th, 40th and 50th archive entries in that archive, then:
//
// - A naive implementation (calling archive_read_free when each FUSE file is
// closed) would have to start iterating from the first archive entry each
// time a FUSE file is opened, for 150 iterations (60 + 40 + 50) in total.
// - Saving readers in an LRU (Least Recently Used) cache (calling
// release_reader when each FUSE file is closed) allows just 110 iterations
// (60 + 40 + 10) in total. The reader for "/bar" can be re-used for "/baz".
//
// Re-use eligibility is based on the archive entries' sequential numerical
// indexes within the archive, not on their string pathnames.
//
// When copying all of the files out of an archive (e.g. "cp -r" from the
// command line) and the files are accessed in the natural order, caching
// readers means that the overall time can be linear instead of quadratic.
//
// Each array element is a pair. The first half of the pair is a unique_ptr for
// the reader. The second half of the pair is a uint64_t LRU priority value.
// Higher/lower values are more/less recently used and the release_reader
// function evicts the array element with the lowest LRU priority value.
static std::pair<std::unique_ptr<struct reader>, uint64_t>
g_saved_readers[NUM_SAVED_READERS] = {};
// g_side_buffer_data and g_side_buffer_metadata combine to hold side buffers:
// statically allocated buffers used as a destination for decompressed bytes
// when reader::advance_offset isn't a no-op. These buffers are roughly
// equivalent to Unix's /dev/null or Go's io.Discard as a first approximation.
// However, since we are already producing valid decompressed bytes, by saving
// them (and their metadata), we may be able to serve some subsequent my_read
// requests cheaply, without having to spin up another libarchive decompressor
// to walk forward from the start of the archive entry.
//
// In particular (https://crbug.com/1245925#c18), even when libfuse is single-
// threaded, we have seen kernel readahead causing the offset arguments in a
// sequence of my_read calls to sometimes arrive out-of-order, where
// conceptually consecutive reads are swapped. With side buffers, we can serve
// the second-to-arrive request by a cheap memcpy instead of an expensive
// "re-do decompression from the start". That side-buffer was filled by a
// reader::advance_offset side-effect from serving the first-to-arrive request.
static uint8_t g_side_buffer_data[NUM_SIDE_BUFFERS][SIDE_BUFFER_SIZE] = {};
static struct side_buffer_metadata {
int64_t index_within_archive;
int64_t offset_within_entry;
int64_t length;
uint64_t lru_priority;
static uint64_t next_lru_priority;
bool contains(int64_t index_within_archive,
int64_t offset_within_entry,
uint64_t length) {
if ((this->index_within_archive >= 0) &&
(this->index_within_archive == index_within_archive) &&
(this->offset_within_entry <= offset_within_entry)) {
int64_t o = offset_within_entry - this->offset_within_entry;
return (this->length >= o) &&
(static_cast<uint64_t>(this->length - o) >= length);
}
return false;
}
} g_side_buffer_metadata[NUM_SIDE_BUFFERS] = {};
uint64_t side_buffer_metadata::next_lru_priority = 0;
// The side buffers are also repurposed as source (compressed) and destination
// (decompressed) buffers during the initial pass over the archive file.
#define SIDE_BUFFER_INDEX_COMPRESSED 0
#define SIDE_BUFFER_INDEX_DECOMPRESSED 1
#if NUM_SIDE_BUFFERS <= 1
#error "invalid NUM_SIDE_BUFFERS"
#endif
// ---- Libarchive Error Codes
static bool //
starts_with(const char* s, const char* prefix) {
if (!s || !prefix) {
return false;
}
size_t ns = strlen(s);
size_t np = strlen(prefix);
return (ns >= np) && (strncmp(s, prefix, np) == 0);
}
// determine_passphrase_exit_code converts libarchive errors to fuse-archive
// exit codes. libarchive doesn't have designated passphrase-related error
// numbers. As for whether a particular archive file's encryption is supported,
// libarchive isn't consistent in archive_read_has_encrypted_entries returning
// ARCHIVE_READ_FORMAT_ENCRYPTION_UNSUPPORTED. Instead, we do a string
// comparison on the various possible error messages.
static int //
determine_passphrase_exit_code(const char* const e) {
if (starts_with(e, "Incorrect passphrase")) {
return EXIT_CODE_PASSPHRASE_INCORRECT;
}
if (starts_with(e, "Passphrase required")) {
return EXIT_CODE_PASSPHRASE_REQUIRED;
}
static const char* const not_supported_prefixes[] = {
"Crypto codec not supported",
"Decryption is unsupported",
"Encrypted file is unsupported",
"Encryption is not supported",
"RAR encryption support unavailable",
"The archive header is encrypted, but currently not supported",
"The file content is encrypted, but currently not supported",
"Unsupported encryption format",
};
for (const char* const prefix : not_supported_prefixes) {
if (starts_with(e, prefix)) {
return EXIT_CODE_PASSPHRASE_NOT_SUPPORTED;
}
}
return EXIT_CODE_INVALID_ARCHIVE_CONTENTS;
}
// ---- Logging
// redact replaces s with a placeholder string when the "--redact" command line
// option was given. This may prevent Personally Identifiable Information (PII)
// such as archive filenames or archive entry pathnames from being logged.
static const char* //
redact(const char* s) {
return g_options.redact ? "[REDACTED]" : s;
}
// ---- Libarchive Read Callbacks
static uint32_t //
initialization_progress_out_of_1000000() {
int64_t m = g_archive_fd_position_hwm.load();
int64_t n = g_archive_file_size;
if ((m <= 0) || (n <= 0)) {
return 0;
} else if (m >= n) {
return 1000000;
}
double x = ((double)m) / ((double)n);
return ((uint32_t)(1000000 * x));
}
static void //
update_g_archive_fd_position_hwm() {
int64_t h = g_archive_fd_position_hwm.load();
if (h < g_archive_fd_position_current) {
g_archive_fd_position_hwm.store(g_archive_fd_position_current);
}
const auto period = std::chrono::seconds(1);
static auto next = std::chrono::steady_clock::now() + period;
const auto now = std::chrono::steady_clock::now();
if (!g_options.quiet && (now >= next)) {
next = now + period;
const int percent = initialization_progress_out_of_1000000() / 10000;
if (isatty(STDERR_FILENO)) {
if (g_displayed_progress) {
fprintf(stderr, "\e[F\e[K");
}
fprintf(stderr, "Loading %d%%\n", percent);
fflush(stderr);
} else {
syslog(LOG_INFO, "Loading %d%%", percent);
}
g_displayed_progress = true;
}
}
// The callbacks below are only used during start-up, for the initial pass
// through the archive to build the node tree, based on the g_archive_fd file
// descriptor that stays open for the lifetime of the process. They are like
// libarchive's built-in "read from a file" callbacks but also update
// g_archive_fd_position_etc. The callback_data arguments are ignored in favor
// of global variables.
static int //
my_file_close(struct archive* a, void* callback_data) {
return ARCHIVE_OK;
}
static int //
my_file_open(struct archive* a, void* callback_data) {
g_archive_fd_position_current = 0;
g_archive_fd_position_hwm.store(0);
g_asyncprogress_complete.store(false);
return ARCHIVE_OK;
}
static ssize_t //
my_file_read(struct archive* a, void* callback_data, const void** out_dst_ptr) {
if (g_options.asyncprogress && g_shutting_down.load()) {
archive_set_error(a, ECANCELED, "shutting down");
return ARCHIVE_FATAL;
} else if (g_archive_fd < 0) {
archive_set_error(a, EIO, "invalid g_archive_fd");
return ARCHIVE_FATAL;
}
uint8_t* dst_ptr = &g_side_buffer_data[SIDE_BUFFER_INDEX_COMPRESSED][0];
while (true) {
ssize_t n = read(g_archive_fd, dst_ptr, SIDE_BUFFER_SIZE);
if (n >= 0) {
g_archive_fd_position_current += n;
update_g_archive_fd_position_hwm();
*out_dst_ptr = dst_ptr;
return n;
} else if (errno == EINTR) {
continue;
}
archive_set_error(a, errno, "could not read archive file: %s",
strerror(errno));
break;
}
return ARCHIVE_FATAL;
}
static int64_t //
my_file_seek(struct archive* a,
void* callback_data,
int64_t offset,
int whence) {
if (g_options.asyncprogress && g_shutting_down.load()) {
archive_set_error(a, ECANCELED, "shutting down");
return ARCHIVE_FATAL;
} else if (g_archive_fd < 0) {
archive_set_error(a, EIO, "invalid g_archive_fd");
return ARCHIVE_FATAL;
}
int64_t o = lseek64(g_archive_fd, offset, whence);
if (o >= 0) {
g_archive_fd_position_current = o;
update_g_archive_fd_position_hwm();
return o;
}
archive_set_error(a, errno, "could not seek in archive file: %s",
strerror(errno));
return ARCHIVE_FATAL;
}
static int64_t //
my_file_skip(struct archive* a, void* callback_data, int64_t delta) {
if (g_options.asyncprogress && g_shutting_down.load()) {
archive_set_error(a, ECANCELED, "shutting down");
return ARCHIVE_FATAL;
} else if (g_archive_fd < 0) {
archive_set_error(a, EIO, "invalid g_archive_fd");
return ARCHIVE_FATAL;
}
int64_t o0 = lseek64(g_archive_fd, 0, SEEK_CUR);
int64_t o1 = lseek64(g_archive_fd, delta, SEEK_CUR);
if ((o1 >= 0) && (o0 >= 0)) {
g_archive_fd_position_current = o1;
update_g_archive_fd_position_hwm();
return o1 - o0;
}
archive_set_error(a, errno, "could not seek in archive file: %s",
strerror(errno));
return ARCHIVE_FATAL;
}
static int //
my_file_switch(struct archive* a, void* callback_data0, void* callback_data1) {
return ARCHIVE_OK;
}
static int //
my_archive_read_open(struct archive* a) {
TRY(archive_read_set_callback_data(a, nullptr));
TRY(archive_read_set_close_callback(a, my_file_close));
TRY(archive_read_set_open_callback(a, my_file_open));
TRY(archive_read_set_read_callback(a, my_file_read));
TRY(archive_read_set_seek_callback(a, my_file_seek));
TRY(archive_read_set_skip_callback(a, my_file_skip));
TRY(archive_read_set_switch_callback(a, my_file_switch));
return archive_read_open1(a);
}
// ---- Side Buffer
// acquire_side_buffer returns the index of the least recently used side
// buffer. This indexes g_side_buffer_data and g_side_buffer_metadata.
static int //
acquire_side_buffer() {
// The preprocessor already checks "#elif NUM_SIDE_BUFFERS <= 0".
int oldest_i = 0;
uint64_t oldest_lru_priority = g_side_buffer_metadata[0].lru_priority;
for (int i = 1; i < NUM_SIDE_BUFFERS; i++) {
if (oldest_lru_priority > g_side_buffer_metadata[i].lru_priority) {
oldest_lru_priority = g_side_buffer_metadata[i].lru_priority;
oldest_i = i;
}
}
g_side_buffer_metadata[oldest_i].index_within_archive = -1;
g_side_buffer_metadata[oldest_i].offset_within_entry = -1;
g_side_buffer_metadata[oldest_i].length = -1;
g_side_buffer_metadata[oldest_i].lru_priority = UINT64_MAX;
return oldest_i;
}
static bool //
read_from_side_buffer(int64_t index_within_archive,
char* dst_ptr,
size_t dst_len,
int64_t offset_within_entry) {
// Find the longest side buffer that contains (index_within_archive,
// offset_within_entry, dst_len).
int best_i = -1;
int64_t best_length = -1;
for (int i = 0; i < NUM_SIDE_BUFFERS; i++) {
struct side_buffer_metadata* meta = &g_side_buffer_metadata[i];
if ((meta->length > best_length) &&
meta->contains(index_within_archive, offset_within_entry, dst_len)) {
best_i = i;
best_length = meta->length;
}
}
if (best_i >= 0) {
struct side_buffer_metadata* meta = &g_side_buffer_metadata[best_i];
meta->lru_priority = ++side_buffer_metadata::next_lru_priority;
int64_t o = offset_within_entry - meta->offset_within_entry;
memcpy(dst_ptr, g_side_buffer_data[best_i] + o, dst_len);
return true;
}
return false;
}
// ---- Reader
// reader bundles libarchive concepts (an archive and an archive entry) and
// other state to point to a particular offset (in decompressed space) of a
// particular archive entry (identified by its index) in an archive.
//
// A reader is backed by its own archive_read_open_filename call, managed by
// libarchive, so each can be positioned independently.
struct reader {
struct archive* archive;
struct archive_entry* archive_entry;
int64_t index_within_archive;
int64_t offset_within_entry;
reader(struct archive* _archive)
: archive(_archive),
archive_entry(nullptr),
index_within_archive(-1),
offset_within_entry(0) {}
~reader() {
if (this->archive) {
archive_read_free(this->archive);
}
}
// advance_index walks forward until positioned at the want'th index. An
// index identifies an archive entry. If this reader wasn't already
// positioned at that index, it also resets the reader's offset to zero.
//
// It returns success (true) or failure (false).
bool advance_index(int64_t want) {
if (!this->archive) {
return false;
}
while (this->index_within_archive < want) {
int status =
archive_read_next_header(this->archive, &this->archive_entry);
if (status == ARCHIVE_EOF) {
syslog(LOG_ERR, "inconsistent archive %s", redact(g_archive_filename));
return false;
} else if ((status != ARCHIVE_OK) && (status != ARCHIVE_WARN)) {
syslog(LOG_ERR, "invalid archive %s: %s", redact(g_archive_filename),
archive_error_string(this->archive));
return false;
}
this->index_within_archive++;
this->offset_within_entry = 0;
}
return true;
}
// advance_offset walks forward until positioned at the want'th offset. An
// offset identifies a byte position relative to the start of an archive
// entry's decompressed contents.
//
// The pathname is used for log messages.
//
// It returns success (true) or failure (false).
bool advance_offset(int64_t want, const char* pathname) {
if (!this->archive || !this->archive_entry) {
return false;
} else if (want < this->offset_within_entry) {
// We can't walk backwards.
return false;
} else if (want == this->offset_within_entry) {
// We are exactly where we want to be.
return true;
}
// We are behind where we want to be. Advance (decompressing from the
// archive entry into a side buffer) until we get there.
int sb = acquire_side_buffer();
if ((sb < 0) || (NUM_SIDE_BUFFERS <= sb)) {
return false;
}
uint8_t* dst_ptr = g_side_buffer_data[sb];
struct side_buffer_metadata* meta = &g_side_buffer_metadata[sb];
while (want > this->offset_within_entry) {
int64_t original_owe = this->offset_within_entry;
int64_t dst_len = want - original_owe;
// If the amount we need to advance is greater than the SIDE_BUFFER_SIZE,
// we need multiple this->read calls, but the total advance might not be
// an exact multiple of SIDE_BUFFER_SIZE. Read that remainder amount
// first, not last. For example, if advancing 260KiB with a 128KiB
// SIDE_BUFFER_SIZE then read 4+128+128 instead of 128+128+4. This leaves
// a full side buffer when we've finished advancing, maximizing later
// requests' chances of side-buffer-as-cache hits.
if (dst_len > SIDE_BUFFER_SIZE) {
dst_len %= SIDE_BUFFER_SIZE;
if (dst_len == 0) {
dst_len = SIDE_BUFFER_SIZE;
}
}
ssize_t n = this->read(dst_ptr, dst_len, pathname);
if (n < 0) {
meta->index_within_archive = -1;
meta->offset_within_entry = -1;
meta->length = -1;
meta->lru_priority = 0;
return false;
}
meta->index_within_archive = this->index_within_archive;
meta->offset_within_entry = original_owe;
meta->length = n;
meta->lru_priority = ++side_buffer_metadata::next_lru_priority;
}
return true;
}
// read copies from the archive entry's decompressed contents to the
// destination buffer. It also advances the reader's offset_within_entry.
//
// The pathname is used for log messages.
ssize_t read(void* dst_ptr, size_t dst_len, const char* pathname) {
ssize_t n = archive_read_data(this->archive, dst_ptr, dst_len);
if (n < 0) {
syslog(LOG_ERR, "could not serve %s from %s: %s", redact(pathname),
redact(g_archive_filename), archive_error_string(this->archive));
return -EIO;
} else if (static_cast<size_t>(n) > dst_len) {
syslog(LOG_ERR, "too much data serving %s from %s", redact(pathname),
redact(g_archive_filename));
// Something has gone wrong, possibly a buffer overflow, so abort.
abort();
}
this->offset_within_entry += n;
return n;
}
// swap swaps fields with another reader.
void swap(struct reader* that) {
std::swap(this->archive, that->archive);
std::swap(this->archive_entry, that->archive_entry);
std::swap(this->index_within_archive, that->index_within_archive);
std::swap(this->offset_within_entry, that->offset_within_entry);
}
};
// compare does a lexicographic comparison of the pairs (i0, o0) and (i1, o1).
static int //
compare(int64_t index_within_archive0,
int64_t offset_within_entry0,
int64_t index_within_archive1,
int64_t offset_within_entry1) {
if (index_within_archive0 < index_within_archive1) {
return -1;
} else if (index_within_archive0 > index_within_archive1) {
return +1;
} else if (offset_within_entry0 < offset_within_entry1) {
return -1;
} else if (offset_within_entry0 > offset_within_entry1) {
return +1;
}
return 0;
}
// acquire_reader returns a reader positioned at the start (offset == 0) of the
// given index'th entry of the archive.
static std::unique_ptr<struct reader> //
acquire_reader(int64_t want_index_within_archive) {
if (want_index_within_archive < 0) {
syslog(LOG_ERR, "negative index_within_archive");
return nullptr;
}
int best_i = -1;
int64_t best_index_within_archive = -1;
int64_t best_offset_within_entry = -1;
for (int i = 0; i < NUM_SAVED_READERS; i++) {
struct reader* sri = g_saved_readers[i].first.get();
if (sri &&
(compare(best_index_within_archive, best_offset_within_entry,
sri->index_within_archive, sri->offset_within_entry) < 0) &&
(compare(sri->index_within_archive, sri->offset_within_entry,
want_index_within_archive, 0) <= 0)) {
best_i = i;
best_index_within_archive = sri->index_within_archive;
best_offset_within_entry = sri->offset_within_entry;
}
}
std::unique_ptr<struct reader> r;
if (best_i >= 0) {
r = std::move(g_saved_readers[best_i].first);
g_saved_readers[best_i].second = 0;
} else {
struct archive* a = archive_read_new();
if (!a) {
syslog(LOG_ERR, "out of memory");
return nullptr;
}
if (g_passphrase_length > 0) {
archive_read_add_passphrase(a, g_passphrase_buffer);
}
archive_read_support_filter_all(a);
archive_read_support_format_all(a);
archive_read_support_format_raw(a);
if (archive_read_open_filename(a, g_archive_realpath, BLOCK_SIZE) !=
ARCHIVE_OK) {
syslog(LOG_ERR, "could not read %s: %s", redact(g_archive_filename),
archive_error_string(a));
archive_read_free(a);
return nullptr;
}
r = std::make_unique<struct reader>(a);
}
if (!r->advance_index(want_index_within_archive)) {
return nullptr;
}
return r;
}
// release_reader returns r to the reader cache.
static void //
release_reader(std::unique_ptr<struct reader> r) {
if (NUM_SAVED_READERS <= 0) {
return;
}
int oldest_i = 0;
uint64_t oldest_lru_priority = g_saved_readers[0].second;
for (int i = 1; i < NUM_SAVED_READERS; i++) {
if (oldest_lru_priority > g_saved_readers[i].second) {
oldest_lru_priority = g_saved_readers[i].second;
oldest_i = i;
}
}
static uint64_t next_lru_priority = 0;
g_saved_readers[oldest_i].first = std::move(r);
g_saved_readers[oldest_i].second = ++next_lru_priority;
}
// ---- In-Memory Directory Tree
struct node {
std::string rel_name; // Relative (not absolute) pathname.
std::string symlink;
int64_t index_within_archive;
int64_t size;
time_t mtime;
mode_t mode;
node* last_child;
node* first_child;
node* next_sibling;
node(std::string&& _rel_name,
std::string&& _symlink,
int64_t _index_within_archive,
int64_t _size,
time_t _mtime,
mode_t _mode)
: rel_name(_rel_name),
symlink(_symlink),
index_within_archive(_index_within_archive),
size(_size),
mtime(_mtime),
mode(_mode),
last_child(nullptr),
first_child(nullptr),
next_sibling(nullptr) {}
void add_child(node* n) {
if (this->last_child == nullptr) {
this->last_child = n;
this->first_child = n;
} else {
this->last_child->next_sibling = n;
this->last_child = n;
}
}
void fill_stat(struct stat* z) const {
if (!z) {
return;
}
memset(z, 0, sizeof(*z));
z->st_mode = this->mode;
z->st_nlink = 1;
z->st_uid = g_uid;
z->st_gid = g_gid;
z->st_size = this->size;
z->st_mtime = this->mtime;
}
};
// valid_pathname returns whether the C string p is neither "", "./" or "/"
// and, when splitting on '/' into pathname fragments, no fragment is "", "."
// or ".." other than a possibly leading "" or "." fragment when p starts with
// "/" or "./".
//
// If allow_slashes is false then p must not contain "/".
//
// When iterating over fragments, the p pointer does not move but the q and r
// pointers bracket each fragment:
// "/an/example/pathname"
// pq-r| || |
// p q------r| |
// p q-------r
static bool //
valid_pathname(const char* const p, bool allow_slashes) {
if (!p) {
return false;
}
const char* q = p;
if ((q[0] == '.') && (q[1] == '/')) {
if (!allow_slashes) {
return false;
}
q += 2;
} else if (*q == '/') {
if (!allow_slashes) {
return false;
}
q++;
}
if (*q == 0) {
return false;
}
while (true) {
const char* r = q;
while (*r != 0) {
if (*r != '/') {
r++;
} else if (!allow_slashes) {
return false;
} else {
break;
}
}
size_t len = r - q;
if (len == 0) {
return false;
} else if ((len == 1) && (q[0] == '.')) {
return false;
} else if ((len == 2) && (q[0] == '.') && (q[1] == '.')) {
return false;
}
if (*r == 0) {
break;
}
q = r + 1;
}
return true;
}
// normalize_pathname validates and returns e's pathname, prepending a leading
// "/" if it didn't already have one.
static std::string //
normalize_pathname(struct archive_entry* e) {
const char* s = archive_entry_pathname_utf8(e);
if (!s) {
s = archive_entry_pathname(e);
if (!s) {
syslog(LOG_ERR, "archive entry in %s has empty pathname",
redact(g_archive_filename));
return "";
}
}