-
Notifications
You must be signed in to change notification settings - Fork 11
Commit
- Loading branch information
There are no files selected for viewing
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,25 @@ | ||
# Cloud Storage Extension 1.1.0, released 2021-MM-DD | ||
|
||
## Features | ||
|
||
* #134: Added support for Hadoop Distributed Filesystem (HDFS) (PR #136) | ||
|
||
## Documentation | ||
|
||
* #131: Added access privilege to connection object (PR #132). | ||
|
||
## Dependency Updates | ||
|
||
### Runtime Dependency Updates | ||
|
||
### Test Dependency Updates | ||
|
||
* Updated `org.scalatest:scalatest:3.2.3` to `3.2.6` | ||
* Updated `org.mockito:mockito-core:3.7.7` to `3.8.0` | ||
* Updated `org.testcontainers:localstack:1.15.1` to `1.15.2` | ||
* Updated `com.exasol:test-db-builder-java:3.0.0` to `3.1.1` | ||
* Updated `com.exasol:exasol-testcontainers:3.5.0` to `3.5.1` | ||
|
||
### Plugin Updates | ||
|
||
* Updated `com.timushev.sbt:sbt-updates:0.5.1` to `0.5.2` |
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -275,12 +275,12 @@ Run these statements to create export UDF scripts: | |
OPEN SCHEMA CLOUD_STORAGE_EXTENSION; | ||
|
||
CREATE OR REPLACE JAVA SET SCRIPT EXPORT_PATH(...) EMITS (...) AS | ||
%scriptclass com.exasol.cloudetl.scriptclasses.ExportPath; | ||
%scriptclass com.exasol.cloudetl.scriptclasses.TableExportQueryGenerator; | ||
This comment has been minimized.
Sorry, something went wrong.
This comment has been minimized.
Sorry, something went wrong.
morazow
Author
Contributor
|
||
%jar /buckets/bfsdefault/<BUCKET>/exasol-cloud-storage-extension-<VERSION>.jar; | ||
/ | ||
|
||
CREATE OR REPLACE JAVA SET SCRIPT EXPORT_TABLE(...) EMITS (ROWS_AFFECTED INT) AS | ||
%scriptclass com.exasol.cloudetl.scriptclasses.ExportTable; | ||
%scriptclass com.exasol.cloudetl.scriptclasses.TableDataExporter; | ||
This comment has been minimized.
Sorry, something went wrong.
allipatev
Contributor
|
||
%jar /buckets/bfsdefault/<BUCKET>/exasol-cloud-storage-extension-<VERSION>.jar; | ||
/ | ||
``` | ||
|
@@ -291,6 +291,28 @@ UDF and it will call the `EXPORT_TABLE` script internally. | |
Make sure you change the `<BUCKET>` name and jar version `<VERSION>` | ||
accordingly. | ||
|
||
#### Setup Export UDF Scripts in Docker | ||
|
||
Similar to import, the UDF scripts require slightly different deployment for | ||
Exasol Docker installations. | ||
|
||
```sql | ||
OPEN SCHEMA CLOUD_STORAGE_EXTENSION; | ||
|
||
CREATE OR REPLACE JAVA SET SCRIPT EXPORT_PATH(...) EMITS (...) AS | ||
%scriptclass com.exasol.cloudetl.scriptclasses.DockerTableExportQueryGenerator; | ||
%jar /buckets/bfsdefault/<BUCKET>/exasol-cloud-storage-extension-<VERSION>.jar; | ||
/ | ||
|
||
CREATE OR REPLACE JAVA SET SCRIPT EXPORT_TABLE(...) EMITS (ROWS_AFFECTED INT) AS | ||
%scriptclass com.exasol.cloudetl.scriptclasses.DockerTableDataExporter; | ||
%jar /buckets/bfsdefault/<BUCKET>/exasol-cloud-storage-extension-<VERSION>.jar; | ||
/ | ||
``` | ||
|
||
Please notice that we use different class names for the `%scriptclasses` | ||
parameter. | ||
|
||
## Prepare an Exasol Table for Import | ||
|
||
To store the imported data, you need to create a table inside the Exasol | ||
|
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,31 @@ | ||
package com.exasol.cloudetl.bucket | ||
|
||
import com.exasol.cloudetl.storage.StorageProperties | ||
|
||
import org.apache.hadoop.conf.Configuration | ||
|
||
/** A [[Bucket]] implementation for the HDFS filesystem */ | ||
final case class HDFSBucket(path: String, params: StorageProperties) extends Bucket { | ||
|
||
/** @inheritdoc */ | ||
override val bucketPath: String = path | ||
|
||
/** @inheritdoc */ | ||
override val properties: StorageProperties = params | ||
|
||
/** Returns the list of required property keys for HDFS filesystem. */ | ||
override def getRequiredProperties(): Seq[String] = Seq.empty[String] | ||
|
||
/** @inheritdoc */ | ||
override def validate(): Unit = | ||
validateRequiredProperties() | ||
|
||
/** @inheritdoc */ | ||
override def getConfiguration(): Configuration = { | ||
validate() | ||
val conf = new Configuration() | ||
conf.set("fs.hdfs.impl", classOf[org.apache.hadoop.hdfs.DistributedFileSystem].getName()) | ||
conf | ||
} | ||
|
||
} |
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,17 @@ | ||
package com.exasol.cloudetl.scriptclasses | ||
|
||
import com.exasol.ExaIterator | ||
import com.exasol.ExaMetadata | ||
|
||
/** | ||
* A table data exporter class to run inside the Exasol docker container. | ||
*/ | ||
object DockerTableDataExporter { | ||
|
||
def run(metadata: ExaMetadata, iterator: ExaIterator): Unit = { | ||
import org.apache.hadoop.security.UserGroupInformation | ||
UserGroupInformation.setLoginUser(UserGroupInformation.createRemoteUser("exauser")) | ||
TableDataExporter.run(metadata, iterator) | ||
} | ||
|
||
} |
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,20 @@ | ||
package com.exasol.cloudetl.scriptclasses | ||
|
||
import com.exasol.ExaExportSpecification | ||
import com.exasol.ExaMetadata | ||
|
||
/** | ||
* An export query generator class to run inside the Exasol docker container. | ||
*/ | ||
object DockerTableExportQueryGenerator { | ||
|
||
def generateSqlForExportSpec( | ||
metadata: ExaMetadata, | ||
exportSpecification: ExaExportSpecification | ||
): String = { | ||
import org.apache.hadoop.security.UserGroupInformation | ||
UserGroupInformation.setLoginUser(UserGroupInformation.createRemoteUser("exauser")) | ||
TableExportQueryGenerator.generateSqlForExportSpec(metadata, exportSpecification) | ||
} | ||
|
||
} |
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,17 @@ | ||
package com.exasol.cloudetl.bucket | ||
|
||
class HDFSBucketTest extends AbstractBucketTest { | ||
|
||
test("apply sets correct configuration") { | ||
val properties = Map( | ||
PATH -> "hdfs://dir/path", | ||
FORMAT -> "orc" | ||
) | ||
val exaMetadata = mockConnectionInfo("", "") | ||
val bucket = getBucket(properties, exaMetadata) | ||
val expectedFileSystemName = classOf[org.apache.hadoop.hdfs.DistributedFileSystem].getName() | ||
assert(bucket.isInstanceOf[HDFSBucket]) | ||
assert(bucket.getConfiguration().get("fs.hdfs.impl") === expectedFileSystemName) | ||
} | ||
|
||
} |
@morazow : this new class name does not work with cloud-storage-extension 1.0.0 and raises
F-UDF-CL-LIB-1126: F-UDF-CL-SL-JAVA-1006: F-UDF-CL-SL-JAVA-1026: com.exasol.ExaCompilationException: F-UDF-CL-SL-JAVA-1066: The main script class defined via %scriptclass cannot be found: com.exasol.cloudetl.scriptclasses.TableExportQueryGenerator
Is it expected?