Skip to content

Commit c94b4fc

Browse files
committed
Make LocalStore::addToStore(srcPath) run in constant memory
This reduces memory consumption of nix-instantiate \ -E 'with import <nixpkgs> {}; runCommand "foo" { src = ./blender; } "echo foo"' \ --option nar-buffer-size 10000 (where ./blender is a 1.1 GiB tree) from 1716 to 36 MiB, while still ensuring that we don't do any write I/O for small source paths (up to 'nar-buffer-size' bytes). The downside is that large paths are now always written to a temporary location in the store, even if they produce an already valid store path. Thus, adding large paths might be slower and run out of disk space. ¯\_(ツ)_/¯ Of course, you can always restore the old behaviour by setting 'nar-buffer-size' to a very high value.
1 parent f4156dd commit c94b4fc

File tree

2 files changed

+116
-10
lines changed

2 files changed

+116
-10
lines changed

src/libstore/globals.hh

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -325,6 +325,9 @@ public:
325325

326326
Setting<Paths> pluginFiles{this, {}, "plugin-files",
327327
"Plugins to dynamically load at nix initialization time."};
328+
329+
Setting<size_t> narBufferSize{this, 8 * 1024 * 1024, "nar-buffer-size",
330+
"Maximum size of NARs before spilling them to disk."};
328331
};
329332

330333

src/libstore/local-store.cc

Lines changed: 113 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -1096,16 +1096,119 @@ Path LocalStore::addToStore(const string & name, const Path & _srcPath,
10961096
{
10971097
Path srcPath(absPath(_srcPath));
10981098

1099-
/* Read the whole path into memory. This is not a very scalable
1100-
method for very large paths, but `copyPath' is mainly used for
1101-
small files. */
1102-
StringSink sink;
1103-
if (recursive)
1104-
dumpPath(srcPath, sink, filter);
1105-
else
1106-
sink.s = make_ref<std::string>(readFile(srcPath));
1107-
1108-
return addToStoreFromDump(*sink.s, name, recursive, hashAlgo, repair);
1099+
assert(recursive); // FIXME
1100+
1101+
/* For computing the NAR hash. */
1102+
auto sha256Sink = std::make_unique<HashSink>(htSHA256);
1103+
1104+
/* For computing the store path. In recursive SHA-256 mode, this
1105+
is the same as the NAR hash, so no need to do it again. */
1106+
std::unique_ptr<HashSink> hashSink =
1107+
recursive && hashAlgo == htSHA256
1108+
? nullptr
1109+
: std::make_unique<HashSink>(hashAlgo);
1110+
1111+
/* Read the source path into memory, but only if it's up to
1112+
narBufferSize bytes. If it's larger, write it to a temporary
1113+
location in the Nix store. If the subsequently computed
1114+
destination store path is already valid, we just delete the
1115+
temporary path. Otherwise, we move it to the destination store
1116+
path. */
1117+
bool inMemory = true;
1118+
std::string nar;
1119+
1120+
auto source = sinkToSource([&](Sink & sink) {
1121+
1122+
LambdaSink sink2([&](const unsigned char * buf, size_t len) {
1123+
(*sha256Sink)(buf, len);
1124+
if (hashSink) (*hashSink)(buf, len);
1125+
1126+
if (inMemory) {
1127+
if (nar.size() + len > settings.narBufferSize) {
1128+
inMemory = false;
1129+
sink << 1;
1130+
sink((const unsigned char *) nar.data(), nar.size());
1131+
nar.clear();
1132+
} else {
1133+
nar.append((const char *) buf, len);
1134+
}
1135+
}
1136+
1137+
if (!inMemory) sink(buf, len);
1138+
});
1139+
1140+
dumpPath(srcPath, sink2, filter);
1141+
});
1142+
1143+
std::unique_ptr<AutoDelete> delTempDir;
1144+
Path tempPath;
1145+
1146+
try {
1147+
/* Wait for the source coroutine to give us some dummy
1148+
data. This is so that we don't create the temporary
1149+
directory if the NAR fits in memory. */
1150+
readInt(*source);
1151+
1152+
auto tempDir = createTempDir(realStoreDir, "add");
1153+
delTempDir = std::make_unique<AutoDelete>(tempDir);
1154+
tempPath = tempDir + "/x";
1155+
1156+
restorePath(tempPath, *source);
1157+
1158+
} catch (EndOfFile &) {
1159+
if (!inMemory) throw;
1160+
/* The NAR first in memory, so we didn't do restorePath(). */
1161+
}
1162+
1163+
auto sha256 = sha256Sink->finish();
1164+
1165+
Hash hash = hashSink ? hashSink->finish().first : sha256.first;
1166+
1167+
Path dstPath = makeFixedOutputPath(recursive, hash, name);
1168+
1169+
addTempRoot(dstPath);
1170+
1171+
if (repair || !isValidPath(dstPath)) {
1172+
1173+
/* The first check above is an optimisation to prevent
1174+
unnecessary lock acquisition. */
1175+
1176+
Path realPath = realStoreDir + "/" + baseNameOf(dstPath);
1177+
1178+
PathLocks outputLock({realPath});
1179+
1180+
if (repair || !isValidPath(dstPath)) {
1181+
1182+
deletePath(realPath);
1183+
1184+
autoGC();
1185+
1186+
if (inMemory) {
1187+
/* Restore from the NAR in memory. */
1188+
StringSource source(nar);
1189+
restorePath(realPath, source);
1190+
} else {
1191+
/* Move the temporary path we restored above. */
1192+
if (rename(tempPath.c_str(), realPath.c_str()))
1193+
throw Error("renaming '%s' to '%s'", tempPath, realPath);
1194+
}
1195+
1196+
canonicalisePathMetaData(realPath, -1); // FIXME: merge into restorePath
1197+
1198+
optimisePath(realPath);
1199+
1200+
ValidPathInfo info;
1201+
info.path = dstPath;
1202+
info.narHash = sha256.first;
1203+
info.narSize = sha256.second;
1204+
info.ca = makeFixedOutputCA(true, hash);
1205+
registerValidPath(info);
1206+
}
1207+
1208+
outputLock.setDeletion(true);
1209+
}
1210+
1211+
return dstPath;
11091212
}
11101213

11111214

0 commit comments

Comments
 (0)