Skip to content

Commit

Permalink
GH-15972: Add Configuration Option Filtering File System For Reading …
Browse files Browse the repository at this point in the history
…and Writing
  • Loading branch information
krasinski committed Feb 5, 2024
1 parent aedac69 commit d5b554e
Show file tree
Hide file tree
Showing 7 changed files with 70 additions and 6 deletions.
17 changes: 16 additions & 1 deletion h2o-core/src/main/java/water/H2O.java
Original file line number Diff line number Diff line change
Expand Up @@ -30,6 +30,8 @@
import java.lang.reflect.InvocationTargetException;
import java.lang.reflect.Method;
import java.net.*;
import java.nio.file.FileSystems;
import java.nio.file.PathMatcher;
import java.util.*;
import java.util.concurrent.ConcurrentHashMap;
import java.util.concurrent.atomic.AtomicInteger;
Expand Down Expand Up @@ -323,6 +325,9 @@ public static void printHelp() {
public String context_path = "";

public KeyValueArg[] extra_headers = new KeyValueArg[0];

public PathMatcher file_deny_glob = FileSystems.getDefault().getPathMatcher("glob:{/bin/*,/etc/*,/var/*,/usr/*,/proc/*,**/.**}");

}

public static class KeyValueArg {
Expand Down Expand Up @@ -843,7 +848,17 @@ else if(s.matches(("client_disconnect_timeout"))){
i = s.incrementAndCheck(i, args);
String value = args[i];
trgt.extra_headers = ArrayUtils.append(trgt.extra_headers, new KeyValueArg(key, value));
} else if(s.matches("embedded")) {
} else if (s.matches("file_deny_glob")) {
i = s.incrementAndCheck(i, args);
String key = args[i];
try {
trgt.file_deny_glob = FileSystems.getDefault().getPathMatcher("glob:" + key);
}
catch (Exception e) {
throw new IllegalArgumentException("Error parsing file_deny_glob parameter");
}
}
else if(s.matches("embedded")) {
trgt.embedded = true;
} else {
parseFailed("Unknown argument (" + s + ")");
Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,16 @@
package water.exceptions;

/**
* Exception thrown when a file matches file_deny_glob
*/
public class H2OFileAccessDeniedException extends H2OAbstractRuntimeException {

public H2OFileAccessDeniedException(String message, String dev_message) {
super(message, dev_message);
}

public H2OFileAccessDeniedException(String message) {
super(message, message);
}

}
8 changes: 7 additions & 1 deletion h2o-core/src/main/java/water/fvec/Frame.java
Original file line number Diff line number Diff line change
Expand Up @@ -5,6 +5,7 @@
import water.*;
import water.api.FramesHandler;
import water.api.schemas3.KeyV3;
import water.exceptions.H2OFileAccessDeniedException;
import water.exceptions.H2OIllegalArgumentException;
import water.parser.BinaryFormatExporter;
import water.parser.BufferedString;
Expand All @@ -14,7 +15,6 @@
import java.io.IOException;
import java.io.InputStream;
import java.util.*;
import java.util.regex.Matcher;
import java.util.regex.Pattern;

/** A collection of named {@link Vec}s, essentially an R-like Distributed Data Frame.
Expand Down Expand Up @@ -1590,6 +1590,9 @@ public static Job export(Frame fr, String path, String frameName, boolean overwr
String compression, CSVStreamParams csvParms) {
boolean forceSingle = nParts == 1;
// Validate input
if (H2O.getPM().isFileAccessDenied(path)) {
throw new H2OFileAccessDeniedException("File " + path + " access denied");
}
if (forceSingle) {
boolean fileExists = H2O.getPM().exists(path);
if (overwrite && fileExists) {
Expand All @@ -1613,6 +1616,9 @@ public static Job export(Frame fr, String path, String frameName, boolean overwr

public static Job exportParquet(Frame fr, String path, boolean overwrite, String compression, boolean writeChecksum) {
// Validate input
if (H2O.getPM().isFileAccessDenied(path)) {
throw new H2OFileAccessDeniedException("File " + path + " access denied");
}
if (! H2O.getPM().isEmptyDirectoryAllNodes(path)) {
throw new H2OIllegalArgumentException(path, "exportFrame", "Cannot use path " + path +
" to store part files! The target needs to be either an existing empty directory or not exist yet.");
Expand Down
1 change: 1 addition & 0 deletions h2o-core/src/main/java/water/persist/Persist.java
Original file line number Diff line number Diff line change
Expand Up @@ -302,4 +302,5 @@ public boolean delete(String path) {
public boolean canHandle(String path) {
throw new RuntimeException("Not implemented");
}

}
16 changes: 16 additions & 0 deletions h2o-core/src/main/java/water/persist/PersistManager.java
Original file line number Diff line number Diff line change
Expand Up @@ -16,6 +16,7 @@
import java.net.HttpURLConnection;
import java.net.URI;
import java.net.URL;
import java.nio.file.Path;
import java.util.ArrayList;
import java.util.Collections;
import java.util.List;
Expand Down Expand Up @@ -841,6 +842,21 @@ public Persist getPersistForURI(URI uri) {
}
}

/**
* Returns true when path matches file_deny_glob input argument
*
* @param path path to a file
* @return boolean
*/
public boolean isFileAccessDenied(String path) {
File f = new File(FileUtils.getURI(path));
return isFileAccessDenied(f.toPath());
}

public boolean isFileAccessDenied(Path path) {
return H2O.ARGS.file_deny_glob.matches(path.normalize());
}

/**
* Finds all entries in the list that matches the regex
* @param prefix The substring to extract before pattern matching
Expand Down
4 changes: 4 additions & 0 deletions h2o-core/src/main/java/water/persist/PersistNFS.java
Original file line number Diff line number Diff line change
Expand Up @@ -6,6 +6,7 @@
import java.util.ArrayList;

import water.*;
import water.exceptions.H2OFileAccessDeniedException;
import water.exceptions.H2ONotFoundArgumentException;
import water.fvec.NFSFileVec;
import water.util.FileIntegrityChecker;
Expand Down Expand Up @@ -124,6 +125,9 @@ public ArrayList<String> calcTypeaheadMatches(String filter, int limit) {
@Override
public void importFiles(String path, String pattern, ArrayList<String> files, ArrayList<String> keys, ArrayList<String> fails, ArrayList<String> dels) {
File f = new File(FileUtils.getURI(path));
if (H2O.ARGS.file_deny_glob.matches(f.toPath().normalize())) {
throw new H2OFileAccessDeniedException("File " + path + " access denied");
}
if( !f.exists() ) throw new H2ONotFoundArgumentException("File " + path + " does not exist");
FileIntegrityChecker.check(f).syncDirectory(files,keys,fails,dels);
}
Expand Down
14 changes: 10 additions & 4 deletions h2o-core/src/test/java/water/persist/PersistManagerTest.java
Original file line number Diff line number Diff line change
Expand Up @@ -5,6 +5,7 @@
import org.junit.Test;
import org.junit.rules.TemporaryFolder;
import water.*;
import water.exceptions.H2OFileAccessDeniedException;
import water.fvec.*;

import java.io.*;
Expand Down Expand Up @@ -33,15 +34,15 @@ public void calcTypeaheadMatches_emptyPath() {
List<String> matches = persistManager.calcTypeaheadMatches("", 100);
assertNotNull(matches);
assertEquals(0, matches.size());

// Path with spaces (testing trim is being done)
matches = persistManager.calcTypeaheadMatches(" ", 100);
assertNotNull(matches);
assertEquals(0, matches.size());
}

@Test
public void createReturnsBufferedOutputStreamForFiles() throws IOException {
public void createReturnsBufferedOutputStreamForFiles() throws IOException {
File target = new File(tmp.getRoot(), "target.txt");
try (OutputStream os = persistManager.create(target.getAbsolutePath(), false)) {
assertTrue(os instanceof BufferedOutputStream);
Expand Down Expand Up @@ -150,7 +151,7 @@ public void testCreateHexPath() throws IOException {
}
}


@Test
public void testDeltaLakeMetadataFilter() {
PersistManager.DeltaLakeMetadataFilter filter = new PersistManager.DeltaLakeMetadataFilter();
Expand All @@ -169,5 +170,10 @@ public void testDeltaLakeMetadataFilter() {
"dbfs:///_delta_log/b/fileB.parquet"
), result);
}


@Test(expected = H2OFileAccessDeniedException.class)
public void testImportFileMatchingDenyList() {
persistManager.importFiles("/etc/hosts", null, new ArrayList<String>(), new ArrayList<String>(), new ArrayList<String>(), new ArrayList<String>());
}

}

0 comments on commit d5b554e

Please sign in to comment.