Skip to content

Commit

Permalink
Implement distributed caching for Bazel
Browse files Browse the repository at this point in the history
This patch implements distributed caching for Bazel using Hazelcast.
Hazelcast is used as a key value store that stores content of files
indexed by the digest of the file. The cache also stores the list of files
for an action. The key in this case is the digest from the key of the action
and the list of files.

In this change I also added the interface for remote execution. The
implementation will be added in a subsequent patch.

This change is only the first in a series of changes related to distributed
caching and remote execution. I plan to revise the APIs and implementation
in subsequent changes.

--
Change-Id: I569285d6149a4e9f8ba2362682c07a9f1e1943b7
Reviewed-on: https://bazel-review.googlesource.com/#/c/2760/
MOS_MIGRATED_REVID=114325038
  • Loading branch information
hhclam authored and dslomov committed Feb 10, 2016
1 parent 63b856f commit 79adf59
Show file tree
Hide file tree
Showing 18 changed files with 1,032 additions and 5 deletions.
12 changes: 7 additions & 5 deletions src/main/java/com/google/devtools/build/lib/BUILD
Original file line number Diff line number Diff line change
Expand Up @@ -26,18 +26,19 @@ java_library(
filegroup(
name = "srcs",
srcs = glob(["**"]) + [
"//src/main/java/com/google/devtools/build/docgen:srcs",
"//src/main/java/com/google/devtools/build/lib/bazel/dash:srcs",
"//src/main/java/com/google/devtools/build/lib/query2:srcs",
"//src/main/java/com/google/devtools/build/lib/remote:srcs",
"//src/main/java/com/google/devtools/build/lib/rules/apple:srcs",
"//src/main/java/com/google/devtools/build/lib/rules/cpp:srcs",
"//src/main/java/com/google/devtools/build/lib/rules/genquery:srcs",
"//src/main/java/com/google/devtools/build/lib/rules/objc:srcs",
"//src/main/java/com/google/devtools/common/options:srcs",
"//src/main/java/com/google/devtools/build/lib/bazel/dash:srcs",
"//src/main/java/com/google/devtools/build/lib/sandbox:srcs",
"//src/main/java/com/google/devtools/build/skyframe:srcs",
"//src/main/java/com/google/devtools/build/lib/standalone:srcs",
"//src/main/java/com/google/devtools/build/lib/worker:srcs",
"//src/main/java/com/google/devtools/build/lib/query2:srcs",
"//src/main/java/com/google/devtools/build/docgen:srcs",
"//src/main/java/com/google/devtools/build/skyframe:srcs",
"//src/main/java/com/google/devtools/common/options:srcs",
],
visibility = ["//src/test/shell/bazel:__pkg__"],
)
Expand Down Expand Up @@ -535,6 +536,7 @@ java_library(
":vfs",
"//src/main/java/com/google/devtools/build/lib/actions",
"//src/main/java/com/google/devtools/build/lib/bazel/dash",
"//src/main/java/com/google/devtools/build/lib/remote",
"//src/main/java/com/google/devtools/build/lib/sandbox",
"//src/main/java/com/google/devtools/build/lib/standalone",
"//src/main/java/com/google/devtools/build/lib/worker",
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -46,6 +46,7 @@ public final class BazelMain {
com.google.devtools.build.lib.bazel.dash.DashModule.class,
com.google.devtools.build.lib.bazel.rules.BazelRulesModule.class,
com.google.devtools.build.lib.worker.WorkerModule.class,
com.google.devtools.build.lib.remote.RemoteModule.class,
com.google.devtools.build.lib.standalone.StandaloneModule.class,
com.google.devtools.build.lib.sandbox.SandboxModule.class,
com.google.devtools.build.lib.runtime.BuildSummaryStatsModule.class);
Expand Down
33 changes: 33 additions & 0 deletions src/main/java/com/google/devtools/build/lib/remote/BUILD
Original file line number Diff line number Diff line change
@@ -0,0 +1,33 @@
package(
default_visibility = ["//src:__subpackages__"],
)

java_library(
name = "remote",
srcs = glob(["*.java"]),
deps = [
"//src/main/java/com/google/devtools/build/lib:build-base",
"//src/main/java/com/google/devtools/build/lib:concurrent",
"//src/main/java/com/google/devtools/build/lib:events",
"//src/main/java/com/google/devtools/build/lib:io",
"//src/main/java/com/google/devtools/build/lib:packages-internal",
"//src/main/java/com/google/devtools/build/lib:runtime",
"//src/main/java/com/google/devtools/build/lib:util",
"//src/main/java/com/google/devtools/build/lib:vfs",
"//src/main/java/com/google/devtools/build/lib/actions",
"//src/main/java/com/google/devtools/build/lib/standalone",
"//src/main/java/com/google/devtools/common/options",
"//src/main/protobuf:remote_protocol_proto",
"//third_party:apache_httpclient",
"//third_party:apache_httpcore",
"//third_party:gson",
"//third_party:guava",
"//third_party:hazelcast",
"//third_party:protobuf",
],
)

filegroup(
name = "srcs",
srcs = glob(["**"]),
)
Original file line number Diff line number Diff line change
@@ -0,0 +1,33 @@
// Copyright 2016 The Bazel Authors. All rights reserved.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.

package com.google.devtools.build.lib.remote;

/**
* An exception to indicate the cache is not found because of an expected
* problem.
*/
final class CacheNotFoundException extends RuntimeException {
CacheNotFoundException() {
super();
}

CacheNotFoundException(String message) {
super(message);
}

CacheNotFoundException(String message, Throwable cause) {
super(message, cause);
}
}
Original file line number Diff line number Diff line change
@@ -0,0 +1,48 @@
// Copyright 2016 The Bazel Authors. All rights reserved.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.

package com.google.devtools.build.lib.remote;

import com.hazelcast.client.HazelcastClient;
import com.hazelcast.client.config.ClientConfig;
import com.hazelcast.client.config.ClientNetworkConfig;
import com.hazelcast.core.Hazelcast;
import com.hazelcast.core.HazelcastInstance;

import java.util.concurrent.ConcurrentMap;

/**
* A factory class for providing a {@link ConcurrentMap} object implemented by Hazelcast.
* Hazelcast will work as a distributed memory cache.
*/
final class HazelcastCacheFactory {

private static final String CACHE_NAME = "hazelcast-build-cache";

static ConcurrentMap<String, byte[]> create(RemoteOptions options) {
HazelcastInstance instance;
if (options.hazelcastNode != null) {
// If --hazelast_node is then create a client instance.
ClientConfig config = new ClientConfig();
ClientNetworkConfig net = config.getNetworkConfig();
net.addAddress(options.hazelcastNode.split(","));
instance = HazelcastClient.newHazelcastClient(config);
} else {
// Otherwise create a default instance. This is going to look at
// -Dhazelcast.config=some-hazelcast.xml for configuration.
instance = Hazelcast.newHazelcastInstance();
}
return instance.getMap(CACHE_NAME);
}
}
Original file line number Diff line number Diff line change
@@ -0,0 +1,169 @@
// Copyright 2016 The Bazel Authors. All rights reserved.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.

package com.google.devtools.build.lib.remote;

import com.google.common.hash.HashCode;
import com.google.devtools.build.lib.actions.ActionInput;
import com.google.devtools.build.lib.actions.ActionInputFileCache;
import com.google.devtools.build.lib.concurrent.ThreadSafety.ThreadSafe;
import com.google.devtools.build.lib.remote.RemoteProtocol.CacheEntry;
import com.google.devtools.build.lib.remote.RemoteProtocol.FileEntry;
import com.google.devtools.build.lib.util.Preconditions;
import com.google.devtools.build.lib.vfs.Path;
import com.google.protobuf.ByteString;

import java.io.IOException;
import java.io.InputStream;
import java.io.OutputStream;
import java.util.Collection;
import java.util.concurrent.ConcurrentMap;
import java.util.concurrent.Semaphore;

/**
* A RemoteActionCache implementation that uses memcache as a distributed storage
* for files and action output. The memcache is accessed by the {@link ConcurrentMap}
* interface.
*
* The thread satefy is guaranteed by the underlying memcache client.
*/
@ThreadSafe
final class MemcacheActionCache implements RemoteActionCache {
private final Path execRoot;
private final ConcurrentMap<String, byte[]> cache;
private static final int MAX_MEMORY_KBYTES = 512 * 1024;
private final Semaphore uploadMemoryAvailable = new Semaphore(MAX_MEMORY_KBYTES, true);

/**
* Construct an action cache using JCache API.
*/
MemcacheActionCache(
Path execRoot, RemoteOptions options, ConcurrentMap<String, byte[]> cache) {
this.execRoot = execRoot;
this.cache = cache;
}

@Override
public String putFileIfNotExist(Path file) throws IOException {
String contentKey = HashCode.fromBytes(file.getMD5Digest()).toString();
if (containsFile(contentKey)) {
return contentKey;
}
putFile(contentKey, file);
return contentKey;
}

@Override
public String putFileIfNotExist(ActionInputFileCache cache, ActionInput file) throws IOException {
// PerActionFileCache already converted this to a lowercase ascii string.. it's not consistent!
String contentKey = new String(cache.getDigest(file).toByteArray());
if (containsFile(contentKey)) {
return contentKey;
}
putFile(contentKey, execRoot.getRelative(file.getExecPathString()));
return contentKey;
}

private void putFile(String key, Path file) throws IOException {
int fileSizeKBytes = (int) (file.getFileSize() / 1024);
Preconditions.checkArgument(fileSizeKBytes < MAX_MEMORY_KBYTES);
try {
uploadMemoryAvailable.acquire(fileSizeKBytes);
// TODO(alpha): I should put the file content as chunks to avoid reading the entire
// file into memory.
try (InputStream stream = file.getInputStream()) {
cache.put(
key,
CacheEntry.newBuilder()
.setFileContent(ByteString.readFrom(stream))
.build()
.toByteArray());
}
} catch (InterruptedException e) {
throw new IOException("Failed to put file to memory cache.", e);
} finally {
uploadMemoryAvailable.release(fileSizeKBytes);
}
}

@Override
public void writeFile(String key, Path dest, boolean executable)
throws IOException, CacheNotFoundException {
byte[] data = cache.get(key);
if (data == null) {
throw new CacheNotFoundException("File content cannot be found with key: " + key);
}
try (OutputStream stream = dest.getOutputStream()) {
CacheEntry.parseFrom(data).getFileContent().writeTo(stream);
dest.setExecutable(executable);
}
}

private boolean containsFile(String key) {
return cache.containsKey(key);
}

@Override
public void writeActionOutput(String key, Path execRoot)
throws IOException, CacheNotFoundException {
byte[] data = cache.get(key);
if (data == null) {
throw new CacheNotFoundException("Action output cannot be found with key: " + key);
}
CacheEntry cacheEntry = CacheEntry.parseFrom(data);
for (FileEntry file : cacheEntry.getFilesList()) {
writeFile(file.getContentKey(), execRoot.getRelative(file.getPath()), file.getExecutable());
}
}

@Override
public void putActionOutput(String key, Collection<? extends ActionInput> outputs)
throws IOException {
CacheEntry.Builder actionOutput = CacheEntry.newBuilder();
for (ActionInput output : outputs) {
Path file = execRoot.getRelative(output.getExecPathString());
addToActionOutput(file, output.getExecPathString(), actionOutput);
}
cache.put(key, actionOutput.build().toByteArray());
}

@Override
public void putActionOutput(String key, Path execRoot, Collection<Path> files)
throws IOException {
CacheEntry.Builder actionOutput = CacheEntry.newBuilder();
for (Path file : files) {
addToActionOutput(file, file.relativeTo(execRoot).getPathString(), actionOutput);
}
cache.put(key, actionOutput.build().toByteArray());
}

/**
* Add the file to action output cache entry. Put the file to cache if necessary.
*/
private void addToActionOutput(Path file, String execPathString, CacheEntry.Builder actionOutput)
throws IOException {
if (file.isDirectory()) {
// TODO(alpha): Implement this for directory.
throw new UnsupportedOperationException("Storing a directory is not yet supported.");
}
// First put the file content to cache.
String contentKey = putFileIfNotExist(file);
// Add to protobuf.
actionOutput
.addFilesBuilder()
.setPath(execPathString)
.setContentKey(contentKey)
.setExecutable(file.isExecutable());
}
}
15 changes: 15 additions & 0 deletions src/main/java/com/google/devtools/build/lib/remote/README.md
Original file line number Diff line number Diff line change
@@ -0,0 +1,15 @@
How to run a standalone Hazelcast server for testing distributed cache.

* First you need to run a standalone Hazelcast server with JCache API in the
classpath. This will start Hazelcast with the default configuration.

java -cp third_party/hazelcast/hazelcast-3.5.4.jar \
com.hazelcast.core.server.StartServer

* Then you run Bazel pointing to the Hazelcast server.

bazel build --hazelcast_node=127.0.0.1:5701 --spawn_strategy=remote \
src/tools/generate_workspace:all

Above command will build generate_workspace with remote spawn strategy that uses
Hazelcast as the distributed caching backend.
Loading

0 comments on commit 79adf59

Please sign in to comment.