Skip to content

Commit

Permalink
Read/write optimizations (ccache#551)
Browse files Browse the repository at this point in the history
* Avoid one extra read call after the final bytes are read

If read returns less then the requested number of bytes, the file is at
EOF. This can be used in all places where a read is done, but it makes extra
impact in read_file where if the buffer is made one byte bigger than needed,
EOF can be detected before a unnecessary memory reallocation is done.

* Use write instead of fwrite to write the result file

This avoids the caching in stdio where a write is split into two (at least on
my system): first a small 4k one and then one with the remaining 60k.
  • Loading branch information
erijo committed Mar 1, 2020
1 parent df96135 commit ed45b52
Show file tree
Hide file tree
Showing 5 changed files with 51 additions and 23 deletions.
9 changes: 6 additions & 3 deletions src/hash.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -175,7 +175,7 @@ hash_int(struct hash* hash, int x)
}

bool
hash_fd(struct hash* hash, int fd)
hash_fd(struct hash* hash, int fd, bool fd_is_file)
{
char buf[READ_BUFFER_SIZE];
ssize_t n;
Expand All @@ -187,9 +187,12 @@ hash_fd(struct hash* hash, int fd)
if (n > 0) {
do_hash_buffer(hash, buf, n);
do_debug_text(hash, buf, n);
if (fd_is_file && static_cast<size_t>(n) < sizeof(buf)) {
break;
}
}
}
return n == 0;
return n >= 0;
}

bool
Expand All @@ -201,7 +204,7 @@ hash_file(struct hash* hash, const char* fname)
return false;
}

bool ret = hash_fd(hash, fd);
bool ret = hash_fd(hash, fd, true);
close(fd);
return ret;
}
2 changes: 1 addition & 1 deletion src/hash.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -106,7 +106,7 @@ void hash_int(struct hash* hash, int x);
// file.
//
// Returns true on success, otherwise false.
bool hash_fd(struct hash* hash, int fd);
bool hash_fd(struct hash* hash, int fd, bool fd_is_file = false);

// Add contents of a file to the hash.
//
Expand Down
54 changes: 37 additions & 17 deletions src/legacy_util.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -67,24 +67,39 @@ fatal(const char* format, ...)
throw FatalError(msg);
}

bool
write_fd(int fd, const void* buf, size_t size)
{
ssize_t written = 0;
do {
ssize_t count =
write(fd, static_cast<const uint8_t*>(buf) + written, size - written);
if (count == -1) {
if (errno != EAGAIN && errno != EINTR) {
return false;
}
} else {
written += count;
}
} while (static_cast<size_t>(written) < size);

return true;
}

// Copy all data from fd_in to fd_out.
bool
copy_fd(int fd_in, int fd_out)
copy_fd(int fd_in, int fd_out, bool fd_in_is_file)
{
int n;
ssize_t n;
char buf[READ_BUFFER_SIZE];
while ((n = read(fd_in, buf, sizeof(buf))) > 0) {
ssize_t written = 0;
do {
ssize_t count = write(fd_out, buf + written, n - written);
if (count == -1) {
if (errno != EAGAIN && errno != EINTR) {
return false;
}
} else {
written += count;
}
} while (written < n);
if (!write_fd(fd_out, buf, n)) {
return false;
}

if (fd_in_is_file && static_cast<size_t>(n) < sizeof(buf)) {
break;
}
}

return true;
Expand Down Expand Up @@ -215,7 +230,7 @@ copy_file(const char* src, const char* dest, bool via_tmp_file)
}
}

if (copy_fd(src_fd, dest_fd)) {
if (copy_fd(src_fd, dest_fd, true)) {
result = true;
}

Expand Down Expand Up @@ -866,27 +881,32 @@ read_file(const char* path, size_t size_hint, char** data, size_t* size)
if (size_hint == 0) {
size_hint = Stat::stat(path, Stat::OnError::log).size();
}
size_hint = (size_hint < 1024) ? 1024 : size_hint;
// +1 to be able to detect EOF in the first read call
size_hint = (size_hint < 1024) ? 1024 : size_hint + 1;

int fd = open(path, O_RDONLY | O_BINARY);
if (fd == -1) {
return false;
}
size_t allocated = size_hint;
*data = static_cast<char*>(x_malloc(allocated));
int ret;
ssize_t ret;
size_t pos = 0;
while (true) {
if (pos > allocated / 2) {
allocated *= 2;
*data = static_cast<char*>(x_realloc(*data, allocated));
}
ret = read(fd, *data + pos, allocated - pos);
const size_t max_read = allocated - pos;
ret = read(fd, *data + pos, max_read);
if (ret == 0 || (ret == -1 && errno != EINTR)) {
break;
}
if (ret > 0) {
pos += ret;
if (static_cast<size_t>(ret) < max_read) {
break;
}
}
}
close(fd);
Expand Down
3 changes: 2 additions & 1 deletion src/legacy_util.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -24,7 +24,8 @@

void fatal(const char* format, ...) ATTR_FORMAT(printf, 1, 2) ATTR_NORETURN;

bool copy_fd(int fd_in, int fd_out);
bool write_fd(int fd, const void* buf, size_t size);
bool copy_fd(int fd_in, int fd_out, bool fd_in_is_file = false);
bool clone_file(const char* src, const char* dest, bool via_tmp_file);
bool copy_file(const char* src, const char* dest, bool via_tmp_file);
bool move_file(const char* src, const char* dest);
Expand Down
6 changes: 5 additions & 1 deletion src/result.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -176,12 +176,16 @@ read_embedded_file_entry(const Context&,
throw Error(fmt::format(
"Failed to open {} for writing: {}", path, strerror(errno)));
}
int subfile_fd = fileno(subfile.get());

uint8_t buf[READ_BUFFER_SIZE];
size_t remain = file_len;
while (remain > 0) {
size_t n = std::min(remain, sizeof(buf));
reader.read(buf, n);
if (fwrite(buf, n, 1, subfile.get()) != 1) {

// Write directly to the file descriptor to avoid stdio caching.
if (!write_fd(subfile_fd, buf, n)) {
throw Error(fmt::format("Failed to write to {}", path));
}
remain -= n;
Expand Down

0 comments on commit ed45b52

Please sign in to comment.