This repository has been archived by the owner on Jun 11, 2022. It is now read-only.
-
Notifications
You must be signed in to change notification settings - Fork 77
/
packfile.rs
201 lines (182 loc) · 5.96 KB
/
packfile.rs
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
//! packfile format
//!
//! a pack file is a collection of blobs, prefixed by their 32 bits size in BE:
//!
//! SIZE (4 bytes BE)
//! DATA (SIZE bytes)
//! OPTIONAL ALIGNMENT? (of 0 to 3 bytes depending on SIZE)
//!
use cryptoxide::blake2b;
use cryptoxide::digest::Digest;
use hash::{BlockHash, PackHash, HASH_SIZE};
use indexfile;
use std::fs;
use std::io;
use std::io::{Read, Seek, SeekFrom};
use std::iter::repeat;
use std::path::Path;
use utils::error::Result;
use utils::magic;
use utils::serialize::{io::write_length_prefixed, offset_align4, read_size, Offset, SIZE_SIZE};
use utils::tmpfile::TmpFile;
const FILE_TYPE: magic::FileType = 0x5041434b; // = PACK
const VERSION: magic::Version = 1;
/// A Stream Reader that also computes the hash of the sum of all data read
pub struct Reader<R> {
reader: R,
pos: Offset,
hash_context: blake2b::Blake2b, // hash of all the content of blocks without length or padding
}
/// A pack reader that can seek in a packfile
pub struct Seeker<R> {
handle: R,
}
impl Reader<fs::File> {
pub fn open<P: AsRef<Path>>(path: P) -> Result<Self> {
let file = fs::File::open(path)?;
Reader::init(file)
}
}
impl<R> Reader<R> {
pub fn pos(&self) -> Offset {
self.pos
}
}
impl<R: Read> Reader<R> {
pub fn init(mut r: R) -> Result<Self> {
magic::check_header(&mut r, FILE_TYPE, VERSION, VERSION)?;
let ctxt = blake2b::Blake2b::new(HASH_SIZE);
Ok(Reader {
reader: r,
pos: 0,
hash_context: ctxt,
})
}
}
impl Seeker<fs::File> {
pub fn init<P: AsRef<Path>>(path: P) -> Result<Self> {
let mut file = fs::File::open(path)?;
magic::check_header(&mut file, FILE_TYPE, VERSION, VERSION)?;
Ok(Seeker::from(file))
}
}
impl<R: Seek> From<R> for Seeker<R> {
fn from(handle: R) -> Self {
Seeker { handle }
}
}
// a block in a pack file is:
// * a 32 bit size in big endian
// * data of the size above
// * 0 to 3 bytes of 0-alignment to make sure the next block is aligned
pub fn read_next_block<R: Read>(mut file: R) -> io::Result<Vec<u8>> {
let mut sz_buf = [0u8; SIZE_SIZE];
file.read_exact(&mut sz_buf)?;
let sz = read_size(&sz_buf);
// don't potentially consume all memory when reading a corrupt file
assert!(sz < 20000000, "read block of size: {}", sz);
let mut v: Vec<u8> = repeat(0).take(sz as usize).collect();
file.read_exact(v.as_mut_slice())?;
if (v.len() % 4) != 0 {
let to_align = 4 - (v.len() % 4);
let mut align = [0u8; 4];
file.read_exact(&mut align[0..to_align])?;
}
Ok(v)
}
// same as read_next_block, but when receiving EOF it will wrapped into returning None
pub fn read_next_block_or_eof<R: Read>(file: R) -> io::Result<Option<Vec<u8>>> {
match read_next_block(file) {
Err(err) => {
if err.kind() == io::ErrorKind::UnexpectedEof {
Ok(None)
} else {
Err(err)
}
}
Ok(data) => Ok(Some(data)),
}
}
impl<R: Read> Reader<R> {
/// Reads the next data block if data are available in the source.
/// If the source is at EOF, `None` is returned.
///
/// # Errors
/// I/O errors are returned in an `Err` value.
pub fn next_block(&mut self) -> io::Result<Option<Vec<u8>>> {
let mdata = read_next_block_or_eof(&mut self.reader)?;
match mdata {
None => {}
Some(ref data) => {
self.hash_context.input(data);
self.pos = self
.pos
.checked_add(4)
.unwrap()
.checked_add(offset_align4(data.len() as u64))
.unwrap();
}
};
Ok(mdata)
}
}
impl<S: Read + Seek> Seeker<S> {
/// Return the next data chunk if it exists
/// on file. On EOF, None is returned.
pub fn next_block(&mut self) -> io::Result<Option<Vec<u8>>> {
read_next_block_or_eof(&mut self.handle)
}
/// Return the data chunk at a specific offset.
/// An EOF encountered before the specified offset is treated as a
/// normal error.
pub fn block_at_offset(&mut self, ofs: Offset) -> io::Result<Vec<u8>> {
self.handle.seek(SeekFrom::Start(ofs))?;
read_next_block(&mut self.handle)
}
}
impl<R> Reader<R> {
pub fn finalize(&mut self) -> PackHash {
let mut packhash = [0u8; HASH_SIZE];
self.hash_context.result(&mut packhash);
packhash
}
}
// A Writer for a specific pack that accumulate some numbers for reportings,
// index, blobs_hashes for index creation (in finalize)
pub struct Writer {
tmpfile: TmpFile,
index: indexfile::Index,
nb_blobs: u32,
pos: Offset, // offset in bytes of the current position (double as the current size of the pack)
hash_context: blake2b::Blake2b, // hash of all the content of blocks without length or padding
}
impl Writer {
pub fn init(mut tmpfile: TmpFile) -> Result<Self> {
magic::write_header(&mut tmpfile, FILE_TYPE, VERSION)?;
let idx = indexfile::Index::new();
let ctxt = blake2b::Blake2b::new(32);
Ok(Writer {
tmpfile: tmpfile,
index: idx,
pos: magic::HEADER_SIZE as u64,
nb_blobs: 0,
hash_context: ctxt,
})
}
pub fn pos(&self) -> Offset {
self.pos
}
pub fn append(&mut self, blockhash: &BlockHash, block: &[u8]) -> io::Result<()> {
let bytes_written = write_length_prefixed(&mut self.tmpfile, block)?;
self.hash_context.input(block);
self.index.append(blockhash, self.pos);
self.pos = self.pos.checked_add(bytes_written).unwrap();
self.nb_blobs += 1;
Ok(())
}
pub fn finalize(mut self) -> io::Result<(TmpFile, PackHash, indexfile::Index)> {
let mut packhash: PackHash = [0u8; HASH_SIZE];
self.hash_context.result(&mut packhash);
Ok((self.tmpfile, packhash, self.index))
}
}