Skip to content

Commit

Permalink
Move shared cache pre-allocation and support macOS (#70331)
Browse files Browse the repository at this point in the history
This commit moves the shared cache pre-allocation code out of bootstrap,
and instead to the searchable snapshots code. We go out of our way to
only grant permissions to a specific library used for the
pre-allocation.

Additionally, to ensure our interfaces are sound, we add a macOS
implementation based on fcntl and ftruncate.

Co-authored-by: Yannick Welsch <yannick@welsch.lu>
  • Loading branch information
jasontedor and ywelsch committed Mar 11, 2021
1 parent 13946ea commit 77b968c
Show file tree
Hide file tree
Showing 26 changed files with 429 additions and 230 deletions.
13 changes: 0 additions & 13 deletions server/src/main/java/org/elasticsearch/bootstrap/Bootstrap.java
Original file line number Diff line number Diff line change
Expand Up @@ -42,7 +42,6 @@
import org.elasticsearch.node.InternalSettingsPreparer;
import org.elasticsearch.node.Node;
import org.elasticsearch.node.NodeValidationException;
import org.elasticsearch.snapshots.SnapshotsService;

import java.io.ByteArrayOutputStream;
import java.io.IOException;
Expand Down Expand Up @@ -172,18 +171,6 @@ private void setup(boolean addShutdownHook, Environment environment) throws Boot
BootstrapSettings.SYSTEM_CALL_FILTER_SETTING.get(settings),
BootstrapSettings.CTRLHANDLER_SETTING.get(settings));

final long cacheSize = SnapshotsService.SNAPSHOT_CACHE_SIZE_SETTING.get(settings).getBytes();
final long regionSize = SnapshotsService.SNAPSHOT_CACHE_REGION_SIZE_SETTING.get(settings).getBytes();
final int numRegions = Math.toIntExact(cacheSize / regionSize);
final long fileSize = numRegions * regionSize;
if (fileSize > 0) {
try {
Natives.tryCreateCacheFile(environment, fileSize);
} catch (Exception e) {
throw new BootstrapException(e);
}
}

// initialize probes before the security manager is installed
initializeProbes();

Expand Down
57 changes: 0 additions & 57 deletions server/src/main/java/org/elasticsearch/bootstrap/JNAFalloc.java

This file was deleted.

44 changes: 0 additions & 44 deletions server/src/main/java/org/elasticsearch/bootstrap/JNANatives.java
Original file line number Diff line number Diff line change
Expand Up @@ -11,20 +11,11 @@
import com.sun.jna.Native;
import com.sun.jna.Pointer;
import com.sun.jna.WString;

import org.apache.logging.log4j.LogManager;
import org.apache.logging.log4j.Logger;
import org.apache.logging.log4j.message.ParameterizedMessage;
import org.apache.lucene.util.Constants;
import org.elasticsearch.common.SuppressForbidden;
import org.elasticsearch.env.Environment;
import org.elasticsearch.monitor.jvm.JvmInfo;
import org.elasticsearch.snapshots.SnapshotUtils;

import java.io.FileOutputStream;
import java.io.IOException;
import java.lang.reflect.Field;
import java.nio.file.Files;
import java.nio.file.Path;

import static org.elasticsearch.bootstrap.JNAKernel32Library.SizeT;
Expand Down Expand Up @@ -269,39 +260,4 @@ static void tryInstallSystemCallFilter(Path tmpFile) {
}
}

@SuppressForbidden(reason = "need access to fd on FileOutputStream")
static void fallocateSnapshotCacheFile(Environment environment, long fileSize) throws IOException {
final JNAFalloc falloc = JNAFalloc.falloc();
if (falloc == null) {
logger.debug("not trying to create a shared cache file using fallocate because native fallocate library could not be loaded.");
return;
}

Path cacheFile = SnapshotUtils.findCacheSnapshotCacheFilePath(environment, fileSize);
if (cacheFile == null) {
throw new IOException("could not find a directory with adequate free space for cache file");
}
boolean success = false;
try (FileOutputStream fileChannel = new FileOutputStream(cacheFile.toFile())) {
long currentSize = fileChannel.getChannel().size();
if (currentSize < fileSize) {
final Field field = fileChannel.getFD().getClass().getDeclaredField("fd");
field.setAccessible(true);
final int result = falloc.fallocate((int) field.get(fileChannel.getFD()), currentSize, fileSize - currentSize);
if (result == 0) {
success = true;
logger.info("allocated cache file [{}] using fallocate", cacheFile);
} else {
logger.warn("failed to initialize cache file [{}] using fallocate errno [{}]", cacheFile, result);
}
}
} catch (Exception e) {
logger.warn(new ParameterizedMessage("failed to initialize cache file [{}] using fallocate", cacheFile), e);
} finally {
if (success == false) {
// if anything goes wrong, delete the potentially created file to not waste disk space
Files.deleteIfExists(cacheFile);
}
}
}
}
17 changes: 0 additions & 17 deletions server/src/main/java/org/elasticsearch/bootstrap/Natives.java
Original file line number Diff line number Diff line change
Expand Up @@ -10,9 +10,7 @@

import org.apache.logging.log4j.LogManager;
import org.apache.logging.log4j.Logger;
import org.elasticsearch.env.Environment;

import java.io.IOException;
import java.nio.file.Path;

/**
Expand Down Expand Up @@ -135,19 +133,4 @@ static boolean isSystemCallFilterInstalled() {
return JNANatives.LOCAL_SYSTEM_CALL_FILTER;
}

/**
* On Linux, this method tries to create the searchable snapshot frozen cache file using fallocate if JNA is available. This enables
* a much faster creation of the file than the fallback mechanism in the searchable snapshots plugin that will pre-allocate the cache
* file by writing zeros to the file.
*
* @throws IOException on failure to determine free disk space for a data path
*/
public static void tryCreateCacheFile(Environment environment, long fileSize) throws IOException {
if (JNA_AVAILABLE == false) {
logger.warn("cannot use fallocate to create cache file because JNA is not available");
return;
}
JNANatives.fallocateSnapshotCacheFile(environment, fileSize);
}

}
Original file line number Diff line number Diff line change
Expand Up @@ -9,15 +9,10 @@

import org.elasticsearch.action.support.IndicesOptions;
import org.elasticsearch.cluster.metadata.IndexNameExpressionResolver;
import org.elasticsearch.common.Nullable;
import org.elasticsearch.common.regex.Regex;
import org.elasticsearch.env.Environment;
import org.elasticsearch.index.IndexNotFoundException;

import java.util.ArrayList;
import java.io.IOException;
import java.nio.file.Files;
import java.nio.file.Path;
import java.util.Arrays;
import java.util.Collections;
import java.util.HashSet;
Expand Down Expand Up @@ -115,28 +110,4 @@ public static List<String> filterIndices(List<String> availableIndices, String[]
return Collections.unmodifiableList(new ArrayList<>(result));
}

/**
* Tries to find a suitable path to a searchable snapshots shared cache file in the data paths founds in the environment.
*
* @return path for the cache file or {@code null} if none could be found
*/
@Nullable
public static Path findCacheSnapshotCacheFilePath(Environment environment, long fileSize) throws IOException {
Path cacheFile = null;
for (Path path : environment.dataFiles()) {
Files.createDirectories(path);
// TODO: be resilient to this check failing and try next path?
long usableSpace = Environment.getUsableSpace(path);
Path p = path.resolve(SnapshotsService.CACHE_FILE_NAME);
if (Files.exists(p)) {
usableSpace += Files.size(p);
}
// TODO: leave some margin for error here
if (usableSpace > fileSize) {
cacheFile = p;
break;
}
}
return cacheFile;
}
}
Original file line number Diff line number Diff line change
Expand Up @@ -37,9 +37,6 @@
import org.elasticsearch.cluster.RestoreInProgress;
import org.elasticsearch.cluster.SnapshotDeletionsInProgress;
import org.elasticsearch.cluster.SnapshotsInProgress;
import org.elasticsearch.common.unit.ByteSizeValue;
import org.elasticsearch.common.util.CollectionUtils;
import org.elasticsearch.repositories.RepositoryShardId;
import org.elasticsearch.cluster.SnapshotsInProgress.ShardSnapshotStatus;
import org.elasticsearch.cluster.SnapshotsInProgress.ShardState;
import org.elasticsearch.cluster.SnapshotsInProgress.State;
Expand Down Expand Up @@ -68,6 +65,7 @@
import org.elasticsearch.common.settings.Setting;
import org.elasticsearch.common.settings.Settings;
import org.elasticsearch.common.util.concurrent.AbstractRunnable;
import org.elasticsearch.common.util.CollectionUtils;
import org.elasticsearch.index.Index;
import org.elasticsearch.index.shard.ShardId;
import org.elasticsearch.indices.SystemIndices;
Expand All @@ -77,6 +75,7 @@
import org.elasticsearch.repositories.RepositoryData;
import org.elasticsearch.repositories.RepositoryException;
import org.elasticsearch.repositories.RepositoryMissingException;
import org.elasticsearch.repositories.RepositoryShardId;
import org.elasticsearch.repositories.ShardGenerations;
import org.elasticsearch.threadpool.ThreadPool;
import org.elasticsearch.transport.TransportService;
Expand Down Expand Up @@ -143,26 +142,6 @@ public class SnapshotsService extends AbstractLifecycleComponent implements Clus

public static final String UPDATE_SNAPSHOT_STATUS_ACTION_NAME = "internal:cluster/snapshot/update_snapshot_status";

public static final String SHARED_CACHE_SETTINGS_PREFIX = "xpack.searchable.snapshot.shared_cache.";

public static final Setting<ByteSizeValue> SHARED_CACHE_RANGE_SIZE_SETTING = Setting.byteSizeSetting(
SHARED_CACHE_SETTINGS_PREFIX + "range_size",
ByteSizeValue.ofMb(16), // default
Setting.Property.NodeScope
);
public static final Setting<ByteSizeValue> SNAPSHOT_CACHE_REGION_SIZE_SETTING = Setting.byteSizeSetting(
SHARED_CACHE_SETTINGS_PREFIX + "region_size",
SHARED_CACHE_RANGE_SIZE_SETTING,
Setting.Property.NodeScope
);
public static final Setting<ByteSizeValue> SNAPSHOT_CACHE_SIZE_SETTING = Setting.byteSizeSetting(
SHARED_CACHE_SETTINGS_PREFIX + "size",
ByteSizeValue.ZERO,
Setting.Property.NodeScope
);

public static final String CACHE_FILE_NAME = "shared_snapshot_cache";

public static final String NO_FEATURE_STATES_VALUE = "none";

private final ClusterService clusterService;
Expand Down
3 changes: 2 additions & 1 deletion x-pack/plugin/searchable-snapshots/build.gradle
Original file line number Diff line number Diff line change
Expand Up @@ -12,7 +12,8 @@ archivesBaseName = 'x-pack-searchable-snapshots'

dependencies {
compileOnly project(path: xpackModule('core'))
implementation project(path: 'preallocate')
internalClusterTestImplementation(testArtifact(project(xpackModule('core'))))
}

addQaCheckDependencies()
addQaCheckDependencies()
16 changes: 16 additions & 0 deletions x-pack/plugin/searchable-snapshots/preallocate/build.gradle
Original file line number Diff line number Diff line change
@@ -0,0 +1,16 @@
/*
* Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one
* or more contributor license agreements. Licensed under the Elastic License
* 2.0; you may not use this file except in compliance with the Elastic License
* 2.0.
*/

apply plugin: 'elasticsearch.build'

archivesBaseName = 'preallocate'

dependencies {
compileOnly project(":server")
}

tasks.named("testingConventions").configure { enabled = false }
Original file line number Diff line number Diff line change
@@ -0,0 +1,55 @@
/*
* Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one
* or more contributor license agreements. Licensed under the Elastic License
* 2.0; you may not use this file except in compliance with the Elastic License
* 2.0.
*/

package org.elasticsearch.xpack.searchablesnapshots.preallocate;

import com.sun.jna.Native;
import com.sun.jna.Platform;

import java.security.AccessController;
import java.security.PrivilegedAction;

final class LinuxPreallocator implements Preallocator {

@Override
public boolean available() {
return Natives.NATIVES_AVAILABLE;
}

@Override
public int preallocate(final int fd, final long currentSize, final long fileSize) {
final int rc = Natives.fallocate(fd, 0, currentSize, fileSize - currentSize);
return rc == 0 ? 0 : Native.getLastError();
}

@Override
public String error(int errno) {
return Natives.strerror(errno);
}

private static class Natives {

public static final boolean NATIVES_AVAILABLE;

static {
NATIVES_AVAILABLE = AccessController.doPrivileged((PrivilegedAction<Boolean>) () -> {
try {
Native.register(Natives.class, Platform.C_LIBRARY_NAME);
} catch (final UnsatisfiedLinkError e) {
return false;
}
return true;
});
}

static native int fallocate(int fd, int mode, long offset, long length);

static native String strerror(int errno);

}

}

0 comments on commit 77b968c

Please sign in to comment.