Permalink
Browse files

Implement distributed caching for Bazel

This patch implements distributed caching for Bazel using Hazelcast.
Hazelcast is used as a key value store that stores content of files
indexed by the digest of the file. The cache also stores the list of files
for an action. The key in this case is the digest from the key of the action
and the list of files.

In this change I also added the interface for remote execution. The
implementation will be added in a subsequent patch.

This change is only the first in a series of changes related to distributed
caching and remote execution. I plan to revise the APIs and implementation
in subsequent changes.

--
Change-Id: I569285d6149a4e9f8ba2362682c07a9f1e1943b7
Reviewed-on: https://bazel-review.googlesource.com/#/c/2760/
MOS_MIGRATED_REVID=114325038
1 parent 63b856f commit 79adf59e2973754c8c0415fcab45cd58c7c34697 @hhclam hhclam committed with dslomov Feb 10, 2016
@@ -26,18 +26,19 @@ java_library(
filegroup(
name = "srcs",
srcs = glob(["**"]) + [
+ "//src/main/java/com/google/devtools/build/docgen:srcs",
+ "//src/main/java/com/google/devtools/build/lib/bazel/dash:srcs",
+ "//src/main/java/com/google/devtools/build/lib/query2:srcs",
+ "//src/main/java/com/google/devtools/build/lib/remote:srcs",
"//src/main/java/com/google/devtools/build/lib/rules/apple:srcs",
"//src/main/java/com/google/devtools/build/lib/rules/cpp:srcs",
"//src/main/java/com/google/devtools/build/lib/rules/genquery:srcs",
"//src/main/java/com/google/devtools/build/lib/rules/objc:srcs",
- "//src/main/java/com/google/devtools/common/options:srcs",
- "//src/main/java/com/google/devtools/build/lib/bazel/dash:srcs",
"//src/main/java/com/google/devtools/build/lib/sandbox:srcs",
- "//src/main/java/com/google/devtools/build/skyframe:srcs",
"//src/main/java/com/google/devtools/build/lib/standalone:srcs",
"//src/main/java/com/google/devtools/build/lib/worker:srcs",
- "//src/main/java/com/google/devtools/build/lib/query2:srcs",
- "//src/main/java/com/google/devtools/build/docgen:srcs",
+ "//src/main/java/com/google/devtools/build/skyframe:srcs",
+ "//src/main/java/com/google/devtools/common/options:srcs",
],
visibility = ["//src/test/shell/bazel:__pkg__"],
)
@@ -535,6 +536,7 @@ java_library(
":vfs",
"//src/main/java/com/google/devtools/build/lib/actions",
"//src/main/java/com/google/devtools/build/lib/bazel/dash",
+ "//src/main/java/com/google/devtools/build/lib/remote",
"//src/main/java/com/google/devtools/build/lib/sandbox",
"//src/main/java/com/google/devtools/build/lib/standalone",
"//src/main/java/com/google/devtools/build/lib/worker",
@@ -46,6 +46,7 @@
com.google.devtools.build.lib.bazel.dash.DashModule.class,
com.google.devtools.build.lib.bazel.rules.BazelRulesModule.class,
com.google.devtools.build.lib.worker.WorkerModule.class,
+ com.google.devtools.build.lib.remote.RemoteModule.class,
com.google.devtools.build.lib.standalone.StandaloneModule.class,
com.google.devtools.build.lib.sandbox.SandboxModule.class,
com.google.devtools.build.lib.runtime.BuildSummaryStatsModule.class);
@@ -0,0 +1,33 @@
+package(
+ default_visibility = ["//src:__subpackages__"],
+)
+
+java_library(
+ name = "remote",
+ srcs = glob(["*.java"]),
+ deps = [
+ "//src/main/java/com/google/devtools/build/lib:build-base",
+ "//src/main/java/com/google/devtools/build/lib:concurrent",
+ "//src/main/java/com/google/devtools/build/lib:events",
+ "//src/main/java/com/google/devtools/build/lib:io",
+ "//src/main/java/com/google/devtools/build/lib:packages-internal",
+ "//src/main/java/com/google/devtools/build/lib:runtime",
+ "//src/main/java/com/google/devtools/build/lib:util",
+ "//src/main/java/com/google/devtools/build/lib:vfs",
+ "//src/main/java/com/google/devtools/build/lib/actions",
+ "//src/main/java/com/google/devtools/build/lib/standalone",
+ "//src/main/java/com/google/devtools/common/options",
+ "//src/main/protobuf:remote_protocol_proto",
+ "//third_party:apache_httpclient",
+ "//third_party:apache_httpcore",
+ "//third_party:gson",
+ "//third_party:guava",
+ "//third_party:hazelcast",
+ "//third_party:protobuf",
+ ],
+)
+
+filegroup(
+ name = "srcs",
+ srcs = glob(["**"]),
+)
@@ -0,0 +1,33 @@
+// Copyright 2016 The Bazel Authors. All rights reserved.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+package com.google.devtools.build.lib.remote;
+
+/**
+ * An exception to indicate the cache is not found because of an expected
+ * problem.
+ */
+final class CacheNotFoundException extends RuntimeException {
+ CacheNotFoundException() {
+ super();
+ }
+
+ CacheNotFoundException(String message) {
+ super(message);
+ }
+
+ CacheNotFoundException(String message, Throwable cause) {
+ super(message, cause);
+ }
+}
@@ -0,0 +1,48 @@
+// Copyright 2016 The Bazel Authors. All rights reserved.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+package com.google.devtools.build.lib.remote;
+
+import com.hazelcast.client.HazelcastClient;
+import com.hazelcast.client.config.ClientConfig;
+import com.hazelcast.client.config.ClientNetworkConfig;
+import com.hazelcast.core.Hazelcast;
+import com.hazelcast.core.HazelcastInstance;
+
+import java.util.concurrent.ConcurrentMap;
+
+/**
+ * A factory class for providing a {@link ConcurrentMap} object implemented by Hazelcast.
+ * Hazelcast will work as a distributed memory cache.
+ */
+final class HazelcastCacheFactory {
+
+ private static final String CACHE_NAME = "hazelcast-build-cache";
+
+ static ConcurrentMap<String, byte[]> create(RemoteOptions options) {
+ HazelcastInstance instance;
+ if (options.hazelcastNode != null) {
+ // If --hazelast_node is then create a client instance.
+ ClientConfig config = new ClientConfig();
+ ClientNetworkConfig net = config.getNetworkConfig();
+ net.addAddress(options.hazelcastNode.split(","));
+ instance = HazelcastClient.newHazelcastClient(config);
+ } else {
+ // Otherwise create a default instance. This is going to look at
+ // -Dhazelcast.config=some-hazelcast.xml for configuration.
+ instance = Hazelcast.newHazelcastInstance();
+ }
+ return instance.getMap(CACHE_NAME);
+ }
+}
@@ -0,0 +1,169 @@
+// Copyright 2016 The Bazel Authors. All rights reserved.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+package com.google.devtools.build.lib.remote;
+
+import com.google.common.hash.HashCode;
+import com.google.devtools.build.lib.actions.ActionInput;
+import com.google.devtools.build.lib.actions.ActionInputFileCache;
+import com.google.devtools.build.lib.concurrent.ThreadSafety.ThreadSafe;
+import com.google.devtools.build.lib.remote.RemoteProtocol.CacheEntry;
+import com.google.devtools.build.lib.remote.RemoteProtocol.FileEntry;
+import com.google.devtools.build.lib.util.Preconditions;
+import com.google.devtools.build.lib.vfs.Path;
+import com.google.protobuf.ByteString;
+
+import java.io.IOException;
+import java.io.InputStream;
+import java.io.OutputStream;
+import java.util.Collection;
+import java.util.concurrent.ConcurrentMap;
+import java.util.concurrent.Semaphore;
+
+/**
+ * A RemoteActionCache implementation that uses memcache as a distributed storage
+ * for files and action output. The memcache is accessed by the {@link ConcurrentMap}
+ * interface.
+ *
+ * The thread satefy is guaranteed by the underlying memcache client.
+ */
+@ThreadSafe
+final class MemcacheActionCache implements RemoteActionCache {
+ private final Path execRoot;
+ private final ConcurrentMap<String, byte[]> cache;
+ private static final int MAX_MEMORY_KBYTES = 512 * 1024;
+ private final Semaphore uploadMemoryAvailable = new Semaphore(MAX_MEMORY_KBYTES, true);
+
+ /**
+ * Construct an action cache using JCache API.
+ */
+ MemcacheActionCache(
+ Path execRoot, RemoteOptions options, ConcurrentMap<String, byte[]> cache) {
+ this.execRoot = execRoot;
+ this.cache = cache;
+ }
+
+ @Override
+ public String putFileIfNotExist(Path file) throws IOException {
+ String contentKey = HashCode.fromBytes(file.getMD5Digest()).toString();
+ if (containsFile(contentKey)) {
+ return contentKey;
+ }
+ putFile(contentKey, file);
+ return contentKey;
+ }
+
+ @Override
+ public String putFileIfNotExist(ActionInputFileCache cache, ActionInput file) throws IOException {
+ // PerActionFileCache already converted this to a lowercase ascii string.. it's not consistent!
+ String contentKey = new String(cache.getDigest(file).toByteArray());
+ if (containsFile(contentKey)) {
+ return contentKey;
+ }
+ putFile(contentKey, execRoot.getRelative(file.getExecPathString()));
+ return contentKey;
+ }
+
+ private void putFile(String key, Path file) throws IOException {
+ int fileSizeKBytes = (int) (file.getFileSize() / 1024);
+ Preconditions.checkArgument(fileSizeKBytes < MAX_MEMORY_KBYTES);
+ try {
+ uploadMemoryAvailable.acquire(fileSizeKBytes);
+ // TODO(alpha): I should put the file content as chunks to avoid reading the entire
+ // file into memory.
+ try (InputStream stream = file.getInputStream()) {
+ cache.put(
+ key,
+ CacheEntry.newBuilder()
+ .setFileContent(ByteString.readFrom(stream))
+ .build()
+ .toByteArray());
+ }
+ } catch (InterruptedException e) {
+ throw new IOException("Failed to put file to memory cache.", e);
+ } finally {
+ uploadMemoryAvailable.release(fileSizeKBytes);
+ }
+ }
+
+ @Override
+ public void writeFile(String key, Path dest, boolean executable)
+ throws IOException, CacheNotFoundException {
+ byte[] data = cache.get(key);
+ if (data == null) {
+ throw new CacheNotFoundException("File content cannot be found with key: " + key);
+ }
+ try (OutputStream stream = dest.getOutputStream()) {
+ CacheEntry.parseFrom(data).getFileContent().writeTo(stream);
+ dest.setExecutable(executable);
+ }
+ }
+
+ private boolean containsFile(String key) {
+ return cache.containsKey(key);
+ }
+
+ @Override
+ public void writeActionOutput(String key, Path execRoot)
+ throws IOException, CacheNotFoundException {
+ byte[] data = cache.get(key);
+ if (data == null) {
+ throw new CacheNotFoundException("Action output cannot be found with key: " + key);
+ }
+ CacheEntry cacheEntry = CacheEntry.parseFrom(data);
+ for (FileEntry file : cacheEntry.getFilesList()) {
+ writeFile(file.getContentKey(), execRoot.getRelative(file.getPath()), file.getExecutable());
+ }
+ }
+
+ @Override
+ public void putActionOutput(String key, Collection<? extends ActionInput> outputs)
+ throws IOException {
+ CacheEntry.Builder actionOutput = CacheEntry.newBuilder();
+ for (ActionInput output : outputs) {
+ Path file = execRoot.getRelative(output.getExecPathString());
+ addToActionOutput(file, output.getExecPathString(), actionOutput);
+ }
+ cache.put(key, actionOutput.build().toByteArray());
+ }
+
+ @Override
+ public void putActionOutput(String key, Path execRoot, Collection<Path> files)
+ throws IOException {
+ CacheEntry.Builder actionOutput = CacheEntry.newBuilder();
+ for (Path file : files) {
+ addToActionOutput(file, file.relativeTo(execRoot).getPathString(), actionOutput);
+ }
+ cache.put(key, actionOutput.build().toByteArray());
+ }
+
+ /**
+ * Add the file to action output cache entry. Put the file to cache if necessary.
+ */
+ private void addToActionOutput(Path file, String execPathString, CacheEntry.Builder actionOutput)
+ throws IOException {
+ if (file.isDirectory()) {
+ // TODO(alpha): Implement this for directory.
+ throw new UnsupportedOperationException("Storing a directory is not yet supported.");
+ }
+ // First put the file content to cache.
+ String contentKey = putFileIfNotExist(file);
+ // Add to protobuf.
+ actionOutput
+ .addFilesBuilder()
+ .setPath(execPathString)
+ .setContentKey(contentKey)
+ .setExecutable(file.isExecutable());
+ }
+}
@@ -0,0 +1,15 @@
+How to run a standalone Hazelcast server for testing distributed cache.
+
+* First you need to run a standalone Hazelcast server with JCache API in the
+classpath. This will start Hazelcast with the default configuration.
+
+java -cp third_party/hazelcast/hazelcast-3.5.4.jar \
+ com.hazelcast.core.server.StartServer
+
+* Then you run Bazel pointing to the Hazelcast server.
+
+bazel build --hazelcast_node=127.0.0.1:5701 --spawn_strategy=remote \
+ src/tools/generate_workspace:all
+
+Above command will build generate_workspace with remote spawn strategy that uses
+Hazelcast as the distributed caching backend.
Oops, something went wrong.

0 comments on commit 79adf59

Please sign in to comment.