Skip to content

Commit

Permalink
Fix / Git filesystem permissions on Windows (#154)
Browse files Browse the repository at this point in the history
* Add scratch dir options to the runtime CLI

* Set scratch dir in the job mgmt service when launching jobs

* Fix sandbox directory permissions in local executor

* Work around Windows issues in Git checkout

* Add utility function for cleaning up temp directories

* Use new utility functions in repos module

* Make model loader use the global scratch dir, and create a sub-dir for each loader scope

* Handle create/destroy of scratch dir in the top level runtime class

* Update CLI arg name for scratch dir persist

* Use new model loader in dev mode for scanning models

* Update model loader test

* Fix file mode for temp dirs created by models/repos modules

* Limit path length to 259 before using UNC paths, to allow for the fact Python string length does not include a nul terminator

* Update repos API and model loader to handle conflicted checkouts
- Where identical model components are loaded, the model class is cached
- Where models are loaded from the same checkout, the model code is cached
- Checkout keys vary by repo type, to allow for different packaging schemes

* Include E2E tests using models from a Git repo when running in CI

* Do not clean up model scope scratch dirs if --scratch-dir-persist is set

* Config options for local batch executor - batch dir, location and persist flag
  • Loading branch information
martin-traverse committed Jul 15, 2022
1 parent ef06176 commit 14e705c
Show file tree
Hide file tree
Showing 16 changed files with 519 additions and 131 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -29,14 +29,19 @@
import java.nio.file.*;
import java.time.Duration;
import java.util.*;
import java.util.concurrent.CompletionException;
import java.util.stream.Collectors;


public class LocalBatchExecutor implements IBatchExecutor {

public static final String CONFIG_VENV_PATH = "venvPath";
public static final String CONFIG_BATCH_DIR = "batchDir";
public static final String CONFIG_BATCH_PERSIST = "batchPersist";

private static final String JOB_DIR_TEMPLATE = "trac-job-%s";
public static final String PROCESS_USERNAME_PROPERTY = "user.name";

private static final String JOB_DIR_TEMPLATE = "tracdap_%s_";
private static final String JOB_LOG_SUBDIR = "log";

private static final boolean IS_WINDOWS = System.getProperty("os.name").toLowerCase().contains("windows");
Expand All @@ -49,11 +54,21 @@ public class LocalBatchExecutor implements IBatchExecutor {

private final Path tracRuntimeVenv;
private final Path batchRootDir;
private final boolean batchPersist;
private final String batchUser;

private final Map<Long, Process> processMap = new HashMap<>();

public LocalBatchExecutor(Properties properties) {

this.tracRuntimeVenv = prepareVenvPath(properties);
this.batchRootDir = prepareBatchRootDir(properties);
this.batchPersist = prepareBatchPersist(properties);
this.batchUser = lookupBatchUser();
}

private Path prepareVenvPath(Properties properties) {

var venvPath = properties.getProperty(CONFIG_VENV_PATH);

if (venvPath == null) {
Expand All @@ -63,7 +78,7 @@ public LocalBatchExecutor(Properties properties) {
throw new EStartup(err);
}

this.tracRuntimeVenv = Paths.get(venvPath)
var tracRuntimeVenv = Paths.get(venvPath)
.toAbsolutePath()
.normalize();

Expand All @@ -73,7 +88,49 @@ public LocalBatchExecutor(Properties properties) {
throw new EStartup(err);
}

this.batchRootDir = null;
return tracRuntimeVenv;
}

private Path prepareBatchRootDir(Properties properties) {

var batchDir = properties.getProperty(CONFIG_BATCH_DIR);

if (batchDir == null)
return null;

var batchRootDir = Paths.get(batchDir)
.toAbsolutePath()
.normalize();

if (!Files.exists(batchRootDir) || !Files.isDirectory(batchRootDir) || !Files.isWritable(batchRootDir)) {
var err = String.format("Local executor batch dir is not a writable directory: [%s]", batchRootDir);
log.error(err);
throw new EStartup(err);
}

return batchRootDir;
}

private boolean prepareBatchPersist(Properties properties) {

var batchPersist = properties.getProperty(CONFIG_BATCH_PERSIST);

if (batchPersist == null)
return false;

return Boolean.parseBoolean(batchPersist);
}

private String lookupBatchUser() {

var processUser = System.getProperty(PROCESS_USERNAME_PROPERTY);

if (processUser == null || processUser.isBlank()) {
log.warn("Local executor batch user could not be determined: Process property [{}] is not set", PROCESS_USERNAME_PROPERTY);
return null;
}

return processUser;
}

private LocalBatchState validState(ExecutorState jobState) {
Expand All @@ -94,44 +151,63 @@ public ExecutorState createBatch(String jobKey) {

try {

var batchDirPrefix = String.format(JOB_DIR_TEMPLATE, jobKey);
var batchDirPrefix = String.format(JOB_DIR_TEMPLATE, jobKey.toLowerCase());
var batchDir = (batchRootDir != null)
? Files.createTempDirectory(batchRootDir, batchDirPrefix)
: Files.createTempDirectory(batchDirPrefix);

var batchDirOwner = Files.getOwner(batchDir);

if (batchUser != null && batchDirOwner != null && !batchUser.equals(batchDirOwner.getName())) {

fixSandboxPermissions(batchDir);
batchDirOwner = Files.getOwner(batchDir);
}

var batchState = new LocalBatchState(jobKey);
batchState.setBatchDir(batchDir.toString());

log.info("Job [{}] sandbox directory created: [{}]", jobKey, batchDir);
log.info("Job [{}] sandbox directory created: [{}], owner = [{}]", jobKey, batchDir, batchDirOwner);

return batchState;
}
catch (AccessDeniedException e) {

// Permissions errors reported as executor access error
catch (IOException e) {

var errorMessage = String.format(
"Job [%s] failed to create sandbox directory: %s",
jobKey, e.getMessage());

log.error(errorMessage, e);
throw new EExecutorAccess(errorMessage, e);
throw new EExecutorFailure(errorMessage, e);
}
catch (IOException e) {
}

var errorMessage = String.format(
"Job [%s] failed to create sandbox directory: %s",
jobKey, e.getMessage());
private void fixSandboxPermissions(Path batchDir) {

log.error(errorMessage, e);
throw new EExecutorFailure(errorMessage, e);
try {

log.info("Sandbox directory [{}] has the wrong owner, attempting to fix...", batchDir.getFileName());

var fs = FileSystems.getDefault();
var userLookup = fs.getUserPrincipalLookupService();
var batchUserPrincipal = userLookup.lookupPrincipalByName(batchUser);

Files.setOwner(batchDir, batchUserPrincipal);

log.info("Sandbox directory [{}] permissions have been repaired", batchDir.getFileName());
}
catch (IOException | UnsupportedOperationException e) {

log.warn("Sandbox directory [{}] permissions could not be repaired: {}",
batchDir.getFileName(), e.getMessage(), e);
}
}

@Override
public void destroyBatch(String jobKey, ExecutorState state) {

var batchState = validState(state);
var batchDir = Paths.get(batchState.getBatchDir());
var process = processMap.get(batchState.getPid());

if (process == null)
Expand All @@ -142,7 +218,29 @@ public void destroyBatch(String jobKey, ExecutorState state) {
process.destroyForcibly();
}

// TODO: Remove files depending on config
if (!batchPersist) {

log.info("Cleaning up sandbox directory [{}]...", batchDir);

try (var files = Files.walk(batchDir)) {

files.sorted(Comparator.reverseOrder()).forEach(f -> {
try { Files.delete(f); }
catch (IOException e) { throw new CompletionException(e); }
});
}
catch (IOException e) {
log.warn("Failed to clean up sandbox directory [{}]: {}", batchDir, e.getMessage(), e);
}
catch (CompletionException e) {
var cause = e.getCause();
log.warn("Failed to clean up sandbox directory [{}]: {}", batchDir, cause.getMessage(), cause);
}
}
else {

log.info("Sandbox directory [{}] will be retained", batchDir);
}

processMap.remove(batchState.getPid());
}
Expand Down Expand Up @@ -237,13 +335,10 @@ public ExecutorState writeFile(ExecutorState state, String volumeName, String fi

return batchState;
}
catch (FileAlreadyExistsException e) {

// TODO
throw new RuntimeException("TODO", e);
}
catch (IOException e) {

// Includes FileAlreadyExistsException

// TODO
throw new RuntimeException("TODO", e);
}
Expand Down Expand Up @@ -274,13 +369,10 @@ public byte[] readFile(ExecutorState state, String volumeName, String fileName)

return Files.readAllBytes(filePath);
}
catch (NoSuchFileException e) {

// TODO
throw new RuntimeException("TODO", e);
}
catch (IOException e) {

// Includes NoSuchFileException

// TODO
throw new RuntimeException("TODO", e);
}
Expand All @@ -304,6 +396,9 @@ public ExecutorState startBatch(ExecutorState jobState, LaunchCmd launchCmd, Lis
processArgs.addAll(TRAC_CMD_ARGS);
processArgs.addAll(decodedArgs);

if (batchPersist)
processArgs.add("--scratch-dir-persist");

var pb = new ProcessBuilder();
pb.command(processArgs);
pb.directory(batchDir.toFile());
Expand All @@ -326,7 +421,7 @@ public ExecutorState startBatch(ExecutorState jobState, LaunchCmd launchCmd, Lis
batchState.setPid(process.pid());
processMap.put(process.pid(), process);

logBatchStart(jobState.getJobKey(), process, batchDir);
logBatchStart(jobState.getJobKey(), pb, process);

// If used, this should go on the main service executor
// process.onExit().whenComplete((proc, err) ->
Expand Down Expand Up @@ -362,6 +457,7 @@ private String decodeLaunchArg(LaunchArg arg, LocalBatchState batchState) {
return batchDir
.resolve(volume)
.resolve(arg.getPathArg())
.normalize()
.toString();

default:
Expand All @@ -370,15 +466,13 @@ private String decodeLaunchArg(LaunchArg arg, LocalBatchState batchState) {
}
}

private void logBatchStart(String jobKey, Process batchProcess, Path batchDir) {

var procInfo = batchProcess.info();
private void logBatchStart(String jobKey, ProcessBuilder processBuilder, Process process) {

log.info("Starting batch process [{}]", jobKey);
log.info("Command: {}", procInfo.commandLine().orElse("unknown"));
log.info("Working dir: {}", batchDir);
log.info("User: [{}]", procInfo.user().orElse("unknown"));
log.info("PID: [{}]", batchProcess.pid());
log.info("Command: [{}]", String.join(" ", processBuilder.command()));
log.info("Working dir: [{}]", processBuilder.directory());
log.info("User: [{}]", process.info().user().orElse("unknown"));
log.info("PID: [{}]", process.pid());
}

private void logBatchComplete(String jobKey, Process batchProcess, LocalBatchState batchState) {
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -41,6 +41,7 @@
import java.io.File;
import java.io.IOException;
import java.net.URL;
import java.nio.charset.StandardCharsets;
import java.nio.file.Files;
import java.nio.file.Path;
import java.nio.file.Paths;
Expand Down Expand Up @@ -225,14 +226,22 @@ void prepareConfig() throws Exception {

log.info("Prepare config for platform testing...");

// Git is not available in CI for tests run inside containers
// So, only look up the current repo if it is needed by the orchestrator
// To run orchestrator tests in a container, we'd need to pass the repo URL in, e.g. with an env var from CI
String currentGitOrigin = startOrch
? getCurrentGitOrigin()
: "git_repo_not_configured";

// Substitutions are used by template config files in test resources
// But it is harmless to apply them to fully defined config files as well
var substitutions = Map.of(
"${TRAC_DIR}", tracDir.toString().replace("\\", "\\\\"),
"${TRAC_STORAGE_DIR}", tracStorageDir.toString().replace("\\", "\\\\"),
"${STORAGE_FORMAT}", storageFormat,
"${TRAC_EXEC_DIR}", tracExecDir.toString().replace("\\", "\\\\"),
"${TRAC_REPO_DIR}", tracRepoDir.toString(),
"${STORAGE_FORMAT}", storageFormat);
"${TRAC_LOCAL_REPO}", tracRepoDir.toString(),
"${TRAC_GIT_REPO}", currentGitOrigin);

platformConfigUrl = ConfigHelpers.prepareConfig(
testConfig, tracDir,
Expand All @@ -248,6 +257,28 @@ void prepareConfig() throws Exception {
platformConfig = config.loadRootConfigObject(PlatformConfig.class);
}

private String getCurrentGitOrigin() throws Exception {

var pb = new ProcessBuilder();
pb.command("git", "config", "--get", "remote.origin.url");

var proc = pb.start();

try {
proc.waitFor(10, TimeUnit.SECONDS);

var procResult = proc.getInputStream().readAllBytes();
var origin = new String(procResult, StandardCharsets.UTF_8).strip();

log.info("Using Git origin: {}", origin);

return origin;
}
finally {
proc.destroy();
}
}

void prepareDatabase() {

log.info("Deploy database schema...");
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -69,9 +69,14 @@ services:
port: 8083

repositories:
UNIT_TEST_REPO:

TRAC_LOCAL_REPO:
repoType: local
repoUrl: ${TRAC_REPO_DIR}
repoUrl: ${TRAC_LOCAL_REPO}

TRAC_GIT_REPO:
repoType: git
repoUrl: ${TRAC_GIT_REPO}

executor:
executorType: LOCAL
Expand Down
Loading

0 comments on commit 14e705c

Please sign in to comment.