Skip to content

Commit

Permalink
[PUBDEV-3321] Allow arbitrary S3 connection end point/region. (#164)
Browse files Browse the repository at this point in the history
* [PUBDEV-3321] Allow arbitrary S3 connection end point.

The fix:
  - exposes system property "sys.ai.h2o.persist.s3.endPoint" which can override default
  S3 connection end point. For example, `java -Dsys.ai.h2o.persist.s3.endPoint="https://localhost:9000" -jar h2o.jar`

* Allow to specify region and path access style.

The property "sys.ai.h2o.persist.s3.region" can specify S3 region.
The property "sys.ai.h2o.persist.s3.enable.path.style" can force path style acces.

* Fix for S3 Minio support

The Minio does not fill bucket name in returned object.
It needs to be read from listing of objects.
  • Loading branch information
mmalohlava authored and vpatryshev committed Oct 11, 2016
1 parent 44c2169 commit 677b663
Showing 1 changed file with 40 additions and 4 deletions.
44 changes: 40 additions & 4 deletions h2o-persist-s3/src/main/java/water/persist/PersistS3.java
Original file line number Diff line number Diff line change
Expand Up @@ -14,8 +14,11 @@
import com.amazonaws.*;
import com.amazonaws.auth.*;
import com.amazonaws.auth.profile.ProfileCredentialsProvider;
import com.amazonaws.regions.*;
import com.amazonaws.regions.Region;
import com.amazonaws.services.s3.AmazonS3;
import com.amazonaws.services.s3.AmazonS3Client;
import com.amazonaws.services.s3.S3ClientOptions;
import com.amazonaws.services.s3.model.*;

import com.google.common.io.ByteStreams;
Expand All @@ -39,7 +42,7 @@ public static AmazonS3 getClient() {
try {
H2OAWSCredentialsProviderChain c = new H2OAWSCredentialsProviderChain();
ClientConfiguration cc = s3ClientCfg();
_s3 = new AmazonS3Client(c, cc);
_s3 = configureClient(new AmazonS3Client(c, cc));
} catch( Throwable e ) {
e.printStackTrace();
StringBuilder msg = new StringBuilder();
Expand Down Expand Up @@ -117,16 +120,19 @@ public static InputStream openStream(Key k, RIStream.ProgressMonitor pmon) throw
return new H2SO3InputStream(k, pmon);
}

public static Key loadKey(S3ObjectSummary obj) throws IOException {
return S3FileVec.make(encodePath(obj.getBucketName(), obj.getKey()),obj.getSize());
public static Key loadKey(ObjectListing listing, S3ObjectSummary obj) throws IOException {
// Note: Some of S3 implementations does not fill bucketName of returned object (for example, Minio).
// So guess it based on returned ObjectListing
String bucketName = obj.getBucketName() == null ? listing.getBucketName() : obj.getBucketName();
return S3FileVec.make(encodePath(bucketName, obj.getKey()),obj.getSize());
}


private static void processListing(ObjectListing listing, ArrayList<String> succ, ArrayList<String> fail, boolean doImport){
for( S3ObjectSummary obj : listing.getObjectSummaries() ) {
try {
if (doImport) {
Key k = loadKey(obj);
Key k = loadKey(listing, obj);
succ.add(k.toString());
} else {
succ.add(obj.getKey());
Expand Down Expand Up @@ -299,6 +305,15 @@ private static ObjectMetadata getObjectMetadataForKey(Key k) {
public final static String S3_MAX_HTTP_CONNECTIONS_PROP = SYSTEM_PROP_PREFIX + "persist.s3.maxHttpConnections";
/** S3 force HTTP traffic */
public final static String S3_FORCE_HTTP = SYSTEM_PROP_PREFIX + "persist.s3.force.http";
/** S3 end-point, for example: "https://localhost:9000 */
public final static String S3_END_POINT = SYSTEM_PROP_PREFIX + "persist.s3.endPoint";
/** S3 region, for example "us-east-1",
* see {@link com.amazonaws.regions.Region#getRegion(com.amazonaws.regions.Regions)} for region list */
public final static String S3_REGION = SYSTEM_PROP_PREFIX + "persist.s3.region";
/** Enable S3 path style access via setting the property to true.
* See: {@link com.amazonaws.services.s3.S3ClientOptions#setPathStyleAccess(boolean)} */
public final static String S3_ENABLE_PATH_STYLE = SYSTEM_PROP_PREFIX + "persist.s3.enable.path.style";


static ClientConfiguration s3ClientCfg() {
ClientConfiguration cfg = new ClientConfiguration();
Expand All @@ -312,6 +327,27 @@ static ClientConfiguration s3ClientCfg() {
return cfg;
}

static AmazonS3Client configureClient(AmazonS3Client s3Client) {
if (System.getProperty(S3_REGION) != null) {
String region = System.getProperty(S3_REGION);
Log.debug("S3 region specified: ", region);
s3Client.setRegion(RegionUtils.getRegion(region));
}
// Region overrides end-point settings
if (System.getProperty(S3_END_POINT) != null) {
String endPoint = System.getProperty(S3_END_POINT);
Log.debug("S3 endpoint specified: ", endPoint);
s3Client.setEndpoint(endPoint);
}
if (System.getProperty(S3_ENABLE_PATH_STYLE) != null && Boolean.valueOf(System.getProperty(S3_ENABLE_PATH_STYLE))) {
Log.debug("S3 path style access enabled");
S3ClientOptions sco = new S3ClientOptions();
sco.setPathStyleAccess(true);
s3Client.setS3ClientOptions(sco);
}
return s3Client;
}

@Override public void delete(Value v) {
throw new UnsupportedOperationException();
}
Expand Down

0 comments on commit 677b663

Please sign in to comment.