From 276434f388380294f5c7df72ebbd71ffccb2a8b6 Mon Sep 17 00:00:00 2001 From: Mike Lambert Date: Tue, 19 Oct 2021 09:44:48 -0500 Subject: [PATCH] Support DefaultAWSCredentialsProviderChain for passing in AWS S3 credentials (#292) * Support DefaultCredentialsProviderChain for AWK SDK * Sync for rollback * Sync for rollback * Retest with minio, slight refactoring, turn down logging * Fix a bug where failed file uploads would incorrectly decrement the files/byte counts * Updated CHANGELOG.md Co-authored-by: Luigi Marini --- CHANGELOG.md | 7 +- app/services/mongodb/MongoDBFileService.scala | 8 +- app/services/s3/S3ByteStorageService.scala | 215 ++++++++++-------- conf/application.conf | 32 ++- project/Build.scala | 10 +- 5 files changed, 165 insertions(+), 107 deletions(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index dee892117..2c71a6a8e 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -6,11 +6,16 @@ and this project adheres to [Semantic Versioning](http://semver.org/). ## Unreleased +### Added +- Support the [DefaultAWSCredentialsProviderChain](https://docs.aws.amazon.com/AWSJavaSDK/latest/javadoc/com/amazonaws/auth/DefaultAWSCredentialsProviderChain.html) + for passing in credentials to the S3ByteStorageService. + ### Fixed -- Upgraded extractor parameters jsonform to version `2.2.5`. +- Cleaning up after a failed upload should no longer decrement the file + byte counts. ### Changed - now building mongo-init and monitor docker containers with python 3.8 +- Upgraded extractor parameters jsonform to version `2.2.5`. ### Removed - check image is now part of [ncsa/checks](https://github.com/ncsa/checks/) diff --git a/app/services/mongodb/MongoDBFileService.scala b/app/services/mongodb/MongoDBFileService.scala index ca38a846a..52daf75ed 100644 --- a/app/services/mongodb/MongoDBFileService.scala +++ b/app/services/mongodb/MongoDBFileService.scala @@ -847,20 +847,24 @@ class MongoDBFileService @Inject() ( } } + // delete the actual file - if(isLastPointingToLoader(file.loader, file.loader_id)) { + val fileSize = if(isLastPointingToLoader(file.loader, file.loader_id)) { for(preview <- previews.findByFileId(file.id)){ previews.removePreview(preview) } if(!file.thumbnail_id.isEmpty) thumbnails.remove(UUID(file.thumbnail_id.get)) ByteStorageService.delete(file.loader, file.loader_id, FileDAO.COLLECTION) + file.length + } else { + 0 } import UUIDConversions._ FileDAO.removeById(file.id) appConfig.incrementCount('files, -1) - appConfig.incrementCount('bytes, -file.length) + appConfig.incrementCount('bytes, -1 * fileSize) current.plugin[ElasticsearchPlugin].foreach { _.delete(id.stringify) } diff --git a/app/services/s3/S3ByteStorageService.scala b/app/services/s3/S3ByteStorageService.scala index 9eb173a6d..292511cf4 100644 --- a/app/services/s3/S3ByteStorageService.scala +++ b/app/services/s3/S3ByteStorageService.scala @@ -1,12 +1,11 @@ package services.s3 -import java.io.{File, FileOutputStream, IOException, InputStream} +import java.io.{IOException, InputStream} import models.UUID - -import com.amazonaws.auth.{AWSStaticCredentialsProvider, BasicAWSCredentials} +import com.amazonaws.auth.{AWSCredentialsProviderChain, AWSStaticCredentialsProvider, BasicAWSCredentials, DefaultAWSCredentialsProviderChain} import com.amazonaws.client.builder.AwsClientBuilder import com.amazonaws.regions.Regions -import com.amazonaws.services.s3.model.{HeadBucketRequest, CreateBucketRequest, GetObjectRequest, ObjectMetadata} +import com.amazonaws.services.s3.model.{CreateBucketRequest, GetObjectRequest, HeadBucketRequest, ObjectMetadata} import com.amazonaws.services.s3.{AmazonS3, AmazonS3ClientBuilder} import com.amazonaws.{AmazonClientException, ClientConfiguration} import com.google.inject.Inject @@ -14,9 +13,29 @@ import play.Logger import play.api.Play import services.ByteStorageService import com.amazonaws.AmazonServiceException -import com.amazonaws.services.s3.transfer.TransferManager import com.amazonaws.services.s3.transfer.TransferManagerBuilder import com.amazonaws.services.s3.transfer.Upload +import services.s3.S3ByteStorageService.{handleACE, handleASE, handleIOE, handleUnknownError} + + +/** + * A ByteStorageService for Clowder that enables use of S3-compatible + * object stores to serve as the file backing for Clowder. This allows + * you to use an S3 bucket on AWS or Minio to store your files. + * + * + * Available Configuration Options: + * clowder.s3.serviceEndpoint - Host/port of the service to use for storage + * clowder.s3.bucketName - the name of the bucket that should be used to store files + * clowder.s3.accessKey - access key with which to access the bucket + * clowder.s3.secretKey - secret key associated with the access key above + * clowder.s3.region - the region where your S3 bucket lives + * clowder.s3.depth - the number of sub-paths to insert (default: 3) + * NOTE: this will randomly distribute files into smaller subdirectories and is recommended for performance reasons + * + * @author Mike Lambert + * + */ /** Available configuration options for s3 storage */ object S3ByteStorageService { @@ -25,42 +44,120 @@ object S3ByteStorageService { val AccessKey: String = "clowder.s3.accessKey" val SecretKey: String = "clowder.s3.secretKey" val Region: String = "clowder.s3.region" + + /* Reusable handlers for various Exception types */ + def handleUnknownError(err: Exception = null) = { + if (err != null) { + Logger.error("An unknown error occurred in the S3ByteStorageService: " + err.toString) + } else { + Logger.error("An unknown error occurred in the S3ByteStorageService.") + } + } + + /* Reusable handlers for various Exception types */ + def handleIOE(err: IOException) = { + Logger.error("IOException occurred in the S3ByteStorageService: " + err) + } + + /* Reusable handlers for various Exception types */ + def handleACE(ace: AmazonClientException) = { + Logger.error("Caught an AmazonClientException, which " + "means the client encountered " + "an internal error while trying to " + "communicate with S3, " + "such as not being able to access the network.") + Logger.error("Error Message: " + ace.getMessage) + } + + /* Reusable handlers for various Exception types */ + def handleASE(ase: AmazonServiceException) = { + Logger.error("Caught an AmazonServiceException, which " + "means your request made it " + "to Amazon S3, but was rejected with an error response" + " for some reason.") + Logger.error("Error Message: " + ase.getMessage) + Logger.error("HTTP Status Code: " + ase.getStatusCode) + Logger.error("AWS Error Code: " + ase.getErrorCode) + Logger.error("Error Type: " + ase.getErrorType) + Logger.error("Request ID: " + ase.getRequestId) + } } /** + * * A ByteStorageService for Clowder that enables use of S3-compatible * object stores to serve as the file backing for Clowder. This allows * you to use an S3 bucket on AWS or Minio to store your files. - * - * - * Available Configuration Options: - * clowder.s3.serviceEndpoint - Host/port of the service to use for storage - * clowder.s3.bucketName - the name of the bucket that should be used to store files - * clowder.s3.accessKey - access key with which to access the bucket - * clowder.s3.secretKey - secret key associated with the access key above - * clowder.s3.region - the region where your S3 bucket lives (currently unused) - * - * - * @author Mike Lambert - * */ class S3ByteStorageService @Inject()() extends ByteStorageService { + val s3: AmazonS3 = this.buildS3Client() + this.ensureBucketAccessAndExists() + + def buildS3Client(): AmazonS3 = { + // NOTE: Region is ignored for MinIO case + val s3client = (Play.current.configuration.getString(S3ByteStorageService.ServiceEndpoint), Play.current.configuration.getString(S3ByteStorageService.Region)) match { + case (Some(serviceEndpoint), Some(region)) => { + Logger.debug("Creating S3 Client with custom endpoint and region: " + serviceEndpoint + " in region " + region) + AmazonS3ClientBuilder.standard().withEndpointConfiguration(new AwsClientBuilder.EndpointConfiguration(serviceEndpoint, region)) + } + case (Some(serviceEndpoint), None) => { + Logger.debug("Creating S3 Client with custom endpoint: " + serviceEndpoint + " (using default region)") + AmazonS3ClientBuilder.standard() + .withEndpointConfiguration(new AwsClientBuilder.EndpointConfiguration(serviceEndpoint, Regions.US_EAST_1.name())) + } + case (None, Some(region)) => { + Logger.debug("Creating S3 Client with custom region: " + region) + AmazonS3ClientBuilder.standard().withRegion(region) + } + case (None, None) => { + Logger.debug("Creating S3 Client with default region.") + AmazonS3ClientBuilder.standard() + } + } + + // Search for AccessKey / SecretKey in config (envvar values will be populated here by default) + (Play.current.configuration.getString(S3ByteStorageService.AccessKey), + Play.current.configuration.getString(S3ByteStorageService.SecretKey)) match { + case (Some(accessKey), Some(secretKey)) => { + val credentials = new BasicAWSCredentials(accessKey, secretKey) + val clientConfiguration = new ClientConfiguration + clientConfiguration.setSignerOverride("AWSS3V4SignerType") + Logger.debug("Creating S3 Client with custom credentials.") - // Only run a single thread at a time when verifying bucket existence - synchronized { + return s3client.withClientConfiguration(clientConfiguration) + .withPathStyleAccessEnabled(true) + .withCredentials(new AWSStaticCredentialsProvider(credentials)) + .build() + } + case (None, None) => { + Logger.debug("Creating S3 Client with default credentials.") + + return s3client.withCredentials(DefaultAWSCredentialsProviderChain.getInstance) + .withPathStyleAccessEnabled(true) + .build() + } + + case _ => { + val errMsg = "Bad S3 configuration: accessKey and secretKey are both required if one is given. Falling back to default credentials..." + Logger.warn(errMsg) + + return s3client.withCredentials(DefaultAWSCredentialsProviderChain.getInstance) + .withPathStyleAccessEnabled(true) + .build() + } + } + } + + def ensureBucketAccessAndExists() = { + // Ensure that bucket exists and that we have access to it before continuing Play.current.configuration.getString(S3ByteStorageService.BucketName) match { case Some(bucketName) => { try { // Validate configuration by checking for bucket existence on startup - this.s3Bucket.headBucket(new HeadBucketRequest(bucketName)) + this.s3.headBucket(new HeadBucketRequest(bucketName)) + Logger.debug("Confirmed access to configured S3 bucket. S3ByteStorageService loading is complete.") } catch { - case sdke @ (_: AmazonClientException | _: AmazonServiceException) => { + case sdke@(_: AmazonClientException | _: AmazonServiceException) => { if (sdke.getMessage.contains("Status Code: 404")) { Logger.warn("Configured S3 bucket does not exist, attempting to create it now...") try { // Bucket does not exist - create the bucket - this.s3Bucket.createBucket(new CreateBucketRequest(bucketName)) + this.s3.createBucket(new CreateBucketRequest(bucketName)) + Logger.debug("Created configured S3 bucket. S3ByteStorageService loading is complete.") } catch { // Bucket could not be created - abort case _: Throwable => throw new RuntimeException("Bad S3 configuration: Bucket does not exist and could not be created.") @@ -75,50 +172,13 @@ class S3ByteStorageService @Inject()() extends ByteStorageService { throw new RuntimeException("Bad S3 configuration: an unknown error has occurred - " + errMsg) } } - case _: Throwable => handleUnknownError(_) + case _: Throwable => throw new RuntimeException("Bad S3 configuration: an unknown error has occurred.") } } case _ => throw new RuntimeException("Bad S3 configuration: verify that you have set all configuration options.") } } - /** - * Grabs config parameters from Clowder to return a - * AmazonS3 pointing at the configured service endpoint. - */ - def s3Bucket(): AmazonS3 = { - (Play.current.configuration.getString(S3ByteStorageService.ServiceEndpoint), - Play.current.configuration.getString(S3ByteStorageService.AccessKey), - Play.current.configuration.getString(S3ByteStorageService.SecretKey)) match { - case (Some(serviceEndpoint), Some(accessKey), Some(secretKey)) => { - val credentials = new BasicAWSCredentials(accessKey, secretKey) - val clientConfiguration = new ClientConfiguration - clientConfiguration.setSignerOverride("AWSS3V4SignerType") - - Logger.debug("Created S3 Client for " + serviceEndpoint) - - val region = Play.current.configuration.getString(S3ByteStorageService.Region) match { - case Some(region) => region - case _ => Regions.US_EAST_1.name() - } - - return AmazonS3ClientBuilder.standard() - // NOTE: Region is ignored for MinIO case? - // TODO: Allow user to set region for AWS case? - .withEndpointConfiguration(new AwsClientBuilder.EndpointConfiguration(serviceEndpoint, region)) - .withPathStyleAccessEnabled(true) - .withClientConfiguration(clientConfiguration) - .withCredentials(new AWSStaticCredentialsProvider(credentials)) - .build() - } - case _ => { - val errMsg = "Bad S3 configuration: verify that you have set all configuration options." - Logger.error(errMsg) - throw new RuntimeException(errMsg) - } - } - } - /** * Store bytes to the specified path within the configured S3 bucket. * @@ -131,8 +191,8 @@ class S3ByteStorageService @Inject()() extends ByteStorageService { Play.current.configuration.getString(S3ByteStorageService.BucketName) match { case None => Logger.error("Failed saving bytes: failed to find configured S3 bucketName.") case Some(bucketName) => { - val xferManager: TransferManager = TransferManagerBuilder.standard().withS3Client(this.s3Bucket).build try { + val xferManager = TransferManagerBuilder.standard().withS3Client(this.s3).build Logger.debug("Saving file to: /" + bucketName + "/" + prefix) val id = UUID.generate.stringify @@ -163,7 +223,7 @@ class S3ByteStorageService @Inject()() extends ByteStorageService { //XferMgrProgress.showTransferProgress(xfer) // or block with Transfer.waitForCompletion() xfer.waitForCompletion() - xferManager.shutdownNow() + xferManager.shutdownNow(false) Logger.debug("File saved to: /" + bucketName + "/" + prefix + "/" + targetPath) @@ -199,7 +259,7 @@ class S3ByteStorageService @Inject()() extends ByteStorageService { // Download object from S3 bucket // NOTE: path should already contain the prefix val rangeObjectRequest = new GetObjectRequest(bucketName, path) - val objectPortion = this.s3Bucket.getObject(rangeObjectRequest) + val objectPortion = s3.getObject(rangeObjectRequest) return Option(objectPortion.getObjectContent) } catch { @@ -231,7 +291,7 @@ class S3ByteStorageService @Inject()() extends ByteStorageService { try { // Delete object from S3 bucket // NOTE: path should already contain the prefix - this.s3Bucket.deleteObject(bucketName, path) + s3.deleteObject(bucketName, path) return true } catch { case ase: AmazonServiceException => handleASE(ase) @@ -245,31 +305,4 @@ class S3ByteStorageService @Inject()() extends ByteStorageService { // Return false (in case of failure) return false } - - /* Reusable handlers for various Exception types */ - def handleUnknownError(err: Exception = null) = { - if (err != null) { - Logger.error("An unknown error occurred in the S3ByteStorageService: " + err.toString) - } else { - Logger.error("An unknown error occurred in the S3ByteStorageService.") - } - } - - def handleIOE(err: IOException) = { - Logger.error("IOException occurred in the S3ByteStorageService: " + err) - } - - def handleACE(ace: AmazonClientException) = { - Logger.error("Caught an AmazonClientException, which " + "means the client encountered " + "an internal error while trying to " + "communicate with S3, " + "such as not being able to access the network.") - Logger.error("Error Message: " + ace.getMessage) - } - - def handleASE(ase: AmazonServiceException) = { - Logger.error("Caught an AmazonServiceException, which " + "means your request made it " + "to Amazon S3, but was rejected with an error response" + " for some reason.") - Logger.error("Error Message: " + ase.getMessage) - Logger.error("HTTP Status Code: " + ase.getStatusCode) - Logger.error("AWS Error Code: " + ase.getErrorCode) - Logger.error("Error Type: " + ase.getErrorType) - Logger.error("Request ID: " + ase.getRequestId) - } } diff --git a/conf/application.conf b/conf/application.conf index 898da5fe9..2da5cf5e2 100644 --- a/conf/application.conf +++ b/conf/application.conf @@ -238,15 +238,20 @@ service.files=services.mongodb.MongoDBFileService # S3 Plugin # ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~# # Point at AWS S3, or start MinIO up a MinIO container: -# $ docker run --name=minio -itd -p 8000:9000 -v $(pwd)/example-bucket:/data minio/minio server /data --compat +# $ docker run -itd --name=minio -p 8000:9000 -p 8001:9001 -e "MINIO_ROOT_USER=EXAMPLEACCESSKEY" \ +# -e "MINIO_ROOT_PASSWORD=EXAMPLESECRETKEY" quay.io/minio/minio server /data --console-address ":9001" # # To view the logs / retrieve credentials: # $ docker logs -f minio # # You should see something similar to the following in the logs: -# Endpoint: http://172.17.0.6:9000 http://127.0.0.1:9000 -# AccessKey: W0ZTICFRNY2IPDK96QAA -# SecretKey: sgSXcoO3t94EiN3K2bjBM8MaKAo2EnL7sGCJdcsr +# API: http://172.17.0.2:9000 http://127.0.0.1:9000 +# Console: http://172.17.0.2:9001 http://127.0.0.1:9001 +# Documentation: https://docs.min.io +# +# You should now be able to access the MinIO UI by navigating +# your browser to localhost:8001 and logging in with the provided +# USER/PASSWORD. # # Point the ServiceEndpoint at your bucket's S3 service provider and # copy the AccessKey / SecretKey into your configuration, as below. @@ -259,11 +264,20 @@ service.files=services.mongodb.MongoDBFileService # # Default configuration for the example MinIO setup above: # NOTE: Clowder and MinIO both use port 9000, so we map MinIO to port 8000. -clowder.s3.serviceEndpoint="http://localhost:8000" -clowder.s3.bucketName="clowder-uploads" -clowder.s3.accessKey="W0ZTICFRNY2IPDK96QAA" -clowder.s3.secretKey="sgSXcoO3t94EiN3K2bjBM8MaKAo2EnL7sGCJdcsr" -clowder.s3.region="us-east-1" +#clowder.s3.serviceEndpoint="https://s3.amazonaws.com" +#clowder.s3.serviceEndpoint="http://localhost:8000" +#clowder.s3.bucketName="clowder-uploads" +#clowder.s3.accessKey="W0ZTICFRNY2IPDK96QAA" +#clowder.s3.secretKey="sgSXcoO3t94EiN3K2bjBM8MaKAo2EnL7sGCJdcsr" +#clowder.s3.region="us-east-1" + +# Set reasonable defaults +clowder.s3.region=${?AWS_DEFAULT_REGION} +clowder.s3.region=${?AWS_REGION} +clowder.s3.accessKey=${?AWS_ACCESS_KEY_ID} +clowder.s3.accessKey=${?AWS_ACCESS_KEY} +clowder.s3.secretKey=${?AWS_SECRET_KEY} +clowder.s3.secretKey=${?AWS_SECRET_ACCESS_KEY} clowder.s3.depth=3 # ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ diff --git a/project/Build.scala b/project/Build.scala index b365cbedc..6b3c5420b 100644 --- a/project/Build.scala +++ b/project/Build.scala @@ -107,8 +107,8 @@ object ApplicationBuild extends Build { "com.google.inject" % "guice" % "3.0", // ?? - "org.apache.httpcomponents" % "httpclient" % "4.2.3", - "org.apache.httpcomponents" % "httpcore" % "4.2.3", + "org.apache.httpcomponents" % "httpclient" % "4.5.2", + "org.apache.httpcomponents" % "httpcore" % "4.4.4", "org.apache.httpcomponents" % "httpmime" % "4.2.3", // JSONparser and JSONObject @@ -125,9 +125,11 @@ object ApplicationBuild extends Build { "org.julienrf" %% "play-jsonp-filter" % "1.1", // Official AWS Java SDK - "com.amazonaws" % "aws-java-sdk-bom" % "1.11.106", + "com.amazonaws" % "aws-java-sdk-bom" % "1.12.78", - "com.amazonaws" % "aws-java-sdk-s3" % "1.11.106" + "com.amazonaws" % "aws-java-sdk-s3" % "1.12.78", + + "com.amazonaws" % "aws-java-sdk-sts" % "1.12.78" ) // Only compile the bootstrap bootstrap.less file and any other *.less file in the stylesheets directory