Permalink
Browse files

readFile reads an entire file into a string, vector<char>, or similar

Test Plan: unittest

Reviewed By: lucian@fb.com

FB internal diff: D1129497
  • Loading branch information...
Andrei Alexandrescu authored and jdelong committed Jan 15, 2014
1 parent 5a7d0d7 commit 52ba96edd198f7985f0315b034f84c6dd29234c5
View
@@ -1,5 +1,5 @@
/*
- * Copyright 2013 Facebook, Inc.
+ * Copyright 2014 Facebook, Inc.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
View
@@ -1,5 +1,5 @@
/*
- * Copyright 2013 Facebook, Inc.
+ * Copyright 2014 Facebook, Inc.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
@@ -18,7 +18,10 @@
#define FOLLY_FILEUTIL_H_
#include "folly/Portability.h"
+#include "folly/ScopeGuard.h"
+#include <cassert>
+#include <limits>
#include <sys/stat.h>
#include <sys/types.h>
#include <sys/uio.h>
@@ -102,7 +105,66 @@ ssize_t writevFull(int fd, iovec* iov, int count);
ssize_t pwritevFull(int fd, iovec* iov, int count, off_t offset);
#endif
+/**
+ * Read entire file (if num_bytes is defaulted) or no more than
+ * num_bytes (otherwise) into container *out. The container is assumed
+ * to be contiguous, with element size equal to 1, and offer size(),
+ * reserve(), and random access (e.g. std::vector<char>, std::string,
+ * fbstring).
+ *
+ * Returns: true on success or false on failure. In the latter case
+ * errno will be set appropriately by the failing system primitive.
+ */
+template <class Container>
+bool readFile(const char* file_name, Container& out,
+ size_t num_bytes = std::numeric_limits<size_t>::max()) {
+ static_assert(sizeof(out[0]) == 1,
+ "readFile: only containers with byte-sized elements accepted");
+ assert(file_name);
+
+ const auto fd = open(file_name, O_RDONLY);
+ if (fd == -1) return false;
+
+ size_t soFar = 0; // amount of bytes successfully read
+ SCOPE_EXIT {
+ assert(out.size() >= soFar); // resize better doesn't throw
+ out.resize(soFar);
+ // Ignore errors when closing the file
+ close(fd);
+ };
+
+ // Obtain file size:
+ struct stat buf;
+ if (fstat(fd, &buf) == -1) return false;
+ // Some files (notably under /proc and /sys on Linux) lie about
+ // their size, so treat the size advertised by fstat under advise
+ // but don't rely on it. In particular, if the size is zero, we
+ // should attempt to read stuff. If not zero, we'll attempt to read
+ // one extra byte.
+ constexpr size_t initialAlloc = 1024 * 4;
+ out.resize(
+ std::min(
+ buf.st_size ? buf.st_size + 1 : initialAlloc,
+ num_bytes));
+
+ while (soFar < out.size()) {
+ const auto actual = readFull(fd, &out[soFar], out.size() - soFar);
+ if (actual == -1) {
+ return false;
+ }
+ soFar += actual;
+ if (soFar < out.size()) {
+ // File exhausted
+ break;
+ }
+ // Ew, allocate more memory. Use exponential growth to avoid
+ // quadratic behavior. Cap size to num_bytes.
+ out.resize(std::min(out.size() * 3 / 2, num_bytes));
+ }
+
+ return true;
+}
+
} // namespaces
#endif /* FOLLY_FILEUTIL_H_ */
-
View
@@ -1,5 +1,5 @@
/*
- * Copyright 2013 Facebook, Inc.
+ * Copyright 2014 Facebook, Inc.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
@@ -648,4 +648,3 @@ void hexDump(const void* ptr, size_t size, OutIt out) {
} // namespace folly
#endif /* FOLLY_STRING_INL_H_ */
-
View
@@ -1,5 +1,5 @@
/*
- * Copyright 2013 Facebook, Inc.
+ * Copyright 2014 Facebook, Inc.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
@@ -1,5 +1,5 @@
/*
- * Copyright 2013 Facebook, Inc.
+ * Copyright 2014 Facebook, Inc.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
@@ -135,4 +135,12 @@ inline auto byLine(File file, char delim = '\n')
| resplit(delim);
}
+/**
+ * Ditto, take the filename and opens it
+ */
+inline auto byLine(const char* fileName, char delim = '\n')
+ -> decltype(byLine(File(fileName))) {
+ return byLine(File(fileName), delim);
+}
+
}} // !folly::gen
@@ -1,5 +1,5 @@
/*
- * Copyright 2013 Facebook, Inc.
+ * Copyright 2014 Facebook, Inc.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
@@ -360,4 +360,3 @@ HugePages::File HugePages::create(ByteRange data,
}
} // namespace folly
-
@@ -1,5 +1,5 @@
/*
- * Copyright 2013 Facebook, Inc.
+ * Copyright 2014 Facebook, Inc.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
@@ -598,7 +598,7 @@ BENCHMARK(ByLine_Pipes, iters) {
PCHECK(::read(rfd, &buf, 1) == 1); // wait for startup
}
- auto s = byLine(rfd) | eachTo<int64_t>() | sum;
+ auto s = byLine(File(rfd)) | eachTo<int64_t>() | sum;
folly::doNotOptimizeAway(s);
BENCHMARK_SUSPEND {
@@ -1,5 +1,5 @@
/*
- * Copyright 2013 Facebook, Inc.
+ * Copyright 2014 Facebook, Inc.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
@@ -1349,7 +1349,7 @@ TEST_P(FileGenBufferedTest, FileWriter) {
auto collect = eachTo<std::string>() | as<vector>();
auto expected = src | resplit('\n') | collect;
- src | eachAs<StringPiece>() | toFile(file.fd(), bufferSize);
+ src | eachAs<StringPiece>() | toFile(File(file.fd()), bufferSize);
auto found = byLine(file.path().c_str()) | collect;
EXPECT_TRUE(expected == found);
@@ -1,5 +1,5 @@
/*
- * Copyright 2013 Facebook, Inc.
+ * Copyright 2014 Facebook, Inc.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
@@ -54,12 +54,12 @@ std::unique_ptr<IOBuf> iobufs(std::initializer_list<T> ranges) {
TEST(RecordIOTest, Simple) {
TemporaryFile file;
{
- RecordIOWriter writer(file.fd());
+ RecordIOWriter writer(File(file.fd()));
writer.write(iobufs({"hello ", "world"}));
writer.write(iobufs({"goodbye"}));
}
{
- RecordIOReader reader(file.fd());
+ RecordIOReader reader(File(file.fd()));
auto it = reader.begin();
ASSERT_FALSE(it == reader.end());
EXPECT_EQ("hello world", sp((it++)->first));
@@ -68,12 +68,12 @@ TEST(RecordIOTest, Simple) {
EXPECT_TRUE(it == reader.end());
}
{
- RecordIOWriter writer(file.fd());
+ RecordIOWriter writer(File(file.fd()));
writer.write(iobufs({"meow"}));
writer.write(iobufs({"woof"}));
}
{
- RecordIOReader reader(file.fd());
+ RecordIOReader reader(File(file.fd()));
auto it = reader.begin();
ASSERT_FALSE(it == reader.end());
EXPECT_EQ("hello world", sp((it++)->first));
@@ -93,13 +93,13 @@ TEST(RecordIOTest, SmallRecords) {
memset(tmp, 'x', kSize);
TemporaryFile file;
{
- RecordIOWriter writer(file.fd());
+ RecordIOWriter writer(File(file.fd()));
for (int i = 0; i < kSize; ++i) { // record of size 0 should be ignored
writer.write(IOBuf::wrapBuffer(tmp, i));
}
}
{
- RecordIOReader reader(file.fd());
+ RecordIOReader reader(File(file.fd()));
auto it = reader.begin();
for (int i = 1; i < kSize; ++i) {
ASSERT_FALSE(it == reader.end());
@@ -112,19 +112,19 @@ TEST(RecordIOTest, SmallRecords) {
TEST(RecordIOTest, MultipleFileIds) {
TemporaryFile file;
{
- RecordIOWriter writer(file.fd(), 1);
+ RecordIOWriter writer(File(file.fd()), 1);
writer.write(iobufs({"hello"}));
}
{
- RecordIOWriter writer(file.fd(), 2);
+ RecordIOWriter writer(File(file.fd()), 2);
writer.write(iobufs({"world"}));
}
{
- RecordIOWriter writer(file.fd(), 1);
+ RecordIOWriter writer(File(file.fd()), 1);
writer.write(iobufs({"goodbye"}));
}
{
- RecordIOReader reader(file.fd(), 0); // return all
+ RecordIOReader reader(File(file.fd()), 0); // return all
auto it = reader.begin();
ASSERT_FALSE(it == reader.end());
EXPECT_EQ("hello", sp((it++)->first));
@@ -135,7 +135,7 @@ TEST(RecordIOTest, MultipleFileIds) {
EXPECT_TRUE(it == reader.end());
}
{
- RecordIOReader reader(file.fd(), 1);
+ RecordIOReader reader(File(file.fd()), 1);
auto it = reader.begin();
ASSERT_FALSE(it == reader.end());
EXPECT_EQ("hello", sp((it++)->first));
@@ -144,14 +144,14 @@ TEST(RecordIOTest, MultipleFileIds) {
EXPECT_TRUE(it == reader.end());
}
{
- RecordIOReader reader(file.fd(), 2);
+ RecordIOReader reader(File(file.fd()), 2);
auto it = reader.begin();
ASSERT_FALSE(it == reader.end());
EXPECT_EQ("world", sp((it++)->first));
EXPECT_TRUE(it == reader.end());
}
{
- RecordIOReader reader(file.fd(), 3);
+ RecordIOReader reader(File(file.fd()), 3);
auto it = reader.begin();
EXPECT_TRUE(it == reader.end());
}
@@ -160,7 +160,7 @@ TEST(RecordIOTest, MultipleFileIds) {
TEST(RecordIOTest, ExtraMagic) {
TemporaryFile file;
{
- RecordIOWriter writer(file.fd());
+ RecordIOWriter writer(File(file.fd()));
writer.write(iobufs({"hello"}));
}
uint8_t buf[recordio_helpers::headerSize() + 5];
@@ -172,7 +172,7 @@ TEST(RecordIOTest, ExtraMagic) {
// and an extra record
EXPECT_EQ(sizeof(buf), write(file.fd(), buf, sizeof(buf)));
{
- RecordIOReader reader(file.fd());
+ RecordIOReader reader(File(file.fd()));
auto it = reader.begin();
ASSERT_FALSE(it == reader.end());
EXPECT_EQ("hello", sp((it++)->first));
@@ -213,7 +213,7 @@ TEST(RecordIOTest, Randomized) {
// Recreate the writer multiple times so we test that we create a
// continuous stream
for (size_t i = 0; i < 3; ++i) {
- RecordIOWriter writer(file.fd());
+ RecordIOWriter writer(File(file.fd()));
for (size_t j = 0; j < recordCount; ++j) {
off_t beginPos = writer.filePos();
record.clear();
@@ -251,7 +251,7 @@ TEST(RecordIOTest, Randomized) {
{
size_t i = 0;
- RecordIOReader reader(file.fd());
+ RecordIOReader reader(File(file.fd()));
for (auto& r : reader) {
SCOPED_TRACE(i);
ASSERT_LT(i, records.size());
@@ -270,4 +270,3 @@ int main(int argc, char *argv[]) {
google::ParseCommandLineFlags(&argc, &argv, true);
return RUN_ALL_TESTS();
}
-
@@ -1,5 +1,5 @@
/*
- * Copyright 2013 Facebook, Inc.
+ * Copyright 2014 Facebook, Inc.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
@@ -30,6 +30,7 @@
namespace folly { namespace test {
using namespace fileutil_detail;
+using namespace std;
namespace {
@@ -238,6 +239,49 @@ TEST_F(FileUtilTest, preadv) {
}
#endif
+TEST(String, readFile) {
+ srand(time(nullptr));
+ const string tmpPrefix = to<string>("/tmp/folly-file-util-test-",
+ getpid(), "-", rand(), "-");
+ const string afile = tmpPrefix + "myfile";
+ const string emptyFile = tmpPrefix + "myfile2";
+
+ SCOPE_EXIT {
+ unlink(afile.c_str());
+ unlink(emptyFile.c_str());
+ };
+
+ auto f = fopen(emptyFile.c_str(), "wb");
+ EXPECT_NE(nullptr, f);
+ EXPECT_EQ(0, fclose(f));
+ f = fopen(afile.c_str(), "wb");
+ EXPECT_NE(nullptr, f);
+ EXPECT_EQ(3, fwrite("bar", 1, 3, f));
+ EXPECT_EQ(0, fclose(f));
+
+ {
+ string contents;
+ EXPECT_TRUE(readFile(emptyFile.c_str(), contents));
+ EXPECT_EQ(contents, "");
+ EXPECT_TRUE(readFile(afile.c_str(), contents, 0));
+ EXPECT_EQ("", contents);
+ EXPECT_TRUE(readFile(afile.c_str(), contents, 2));
+ EXPECT_EQ("ba", contents);
+ EXPECT_TRUE(readFile(afile.c_str(), contents));
+ EXPECT_EQ("bar", contents);
+ }
+ {
+ vector<unsigned char> contents;
+ EXPECT_TRUE(readFile(emptyFile.c_str(), contents));
+ EXPECT_EQ(vector<unsigned char>(), contents);
+ EXPECT_TRUE(readFile(afile.c_str(), contents, 0));
+ EXPECT_EQ(vector<unsigned char>(), contents);
+ EXPECT_TRUE(readFile(afile.c_str(), contents, 2));
+ EXPECT_EQ(vector<unsigned char>({'b', 'a'}), contents);
+ EXPECT_TRUE(readFile(afile.c_str(), contents));
+ EXPECT_EQ(vector<unsigned char>({'b', 'a', 'r'}), contents);
+ }
+}
}} // namespaces
@@ -246,4 +290,3 @@ int main(int argc, char *argv[]) {
google::ParseCommandLineFlags(&argc, &argv, true);
return RUN_ALL_TESTS();
}
-
Oops, something went wrong.

0 comments on commit 52ba96e

Please sign in to comment.