-
Notifications
You must be signed in to change notification settings - Fork 11
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
- Loading branch information
Showing
33 changed files
with
470 additions
and
272 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
62 changes: 62 additions & 0 deletions
62
src/main/scala/com/exasol/cloudetl/scriptclasses/FilesDataImporter.scala
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,62 @@ | ||
package com.exasol.cloudetl.scriptclasses | ||
|
||
import scala.collection.mutable.ListBuffer | ||
|
||
import com.exasol.ExaIterator | ||
import com.exasol.ExaMetadata | ||
import com.exasol.cloudetl.bucket.Bucket | ||
import com.exasol.cloudetl.source._ | ||
import com.exasol.cloudetl.storage.StorageProperties | ||
import com.exasol.common.data.Row | ||
|
||
import com.typesafe.scalalogging.LazyLogging | ||
import org.apache.hadoop.fs.Path | ||
|
||
/** | ||
* A importer class that reads and imports data into Exasol database. | ||
*/ | ||
object FilesDataImporter extends LazyLogging { | ||
|
||
/** | ||
* Reads files and emits their data into Exasol iterator. | ||
* | ||
* @param metadata an Exasol metadata object | ||
* @param iterator an Exasol iterator object | ||
*/ | ||
def run(metadata: ExaMetadata, iterator: ExaIterator): Unit = { | ||
val storageProperties = StorageProperties(iterator.getString(1), metadata) | ||
val fileFormat = storageProperties.getFileFormat() | ||
val bucket = Bucket(storageProperties) | ||
|
||
val files = groupFiles(iterator, 2) | ||
val nodeId = metadata.getNodeId | ||
val vmId = metadata.getVmId | ||
logger.info(s"The total number of files for node: $nodeId, vm: $vmId is '${files.size}'.") | ||
|
||
files.foreach { file => | ||
logger.debug(s"Importing from file: '$file'") | ||
val source = | ||
Source(fileFormat, new Path(file), bucket.getConfiguration(), bucket.fileSystem) | ||
readAndEmit(source.stream(), iterator) | ||
source.close() | ||
} | ||
} | ||
|
||
private[this] def groupFiles( | ||
iterator: ExaIterator, | ||
fileStartingIndex: Int | ||
): Seq[String] = { | ||
val files = ListBuffer[String]() | ||
do { | ||
files.append(iterator.getString(fileStartingIndex)) | ||
} while (iterator.next()) | ||
files.toSeq | ||
} | ||
|
||
private[this] def readAndEmit(rowIterator: Iterator[Row], ctx: ExaIterator): Unit = | ||
rowIterator.foreach { row => | ||
val columns: Seq[Object] = row.getValues().map(_.asInstanceOf[AnyRef]) | ||
ctx.emit(columns: _*) | ||
} | ||
|
||
} |
49 changes: 49 additions & 0 deletions
49
src/main/scala/com/exasol/cloudetl/scriptclasses/FilesImportQueryGenerator.scala
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,49 @@ | ||
package com.exasol.cloudetl.scriptclasses | ||
|
||
import com.exasol.ExaImportSpecification | ||
import com.exasol.ExaMetadata | ||
import com.exasol.cloudetl.bucket.Bucket | ||
import com.exasol.cloudetl.storage.StorageProperties | ||
|
||
/** | ||
* A SQL query generator class that facilitates the metadata reading and | ||
* file data importing. | ||
*/ | ||
object FilesImportQueryGenerator { | ||
|
||
private[this] val DEFAULT_PARALLELISM = "nproc()" | ||
|
||
/** | ||
* Generates an Exasol SQL for the data import query. | ||
* | ||
* @param metadata an Exasol metadata object | ||
* @param importSpecification an Exasol import specification object | ||
*/ | ||
def generateSqlForImportSpec( | ||
metadata: ExaMetadata, | ||
importSpecification: ExaImportSpecification | ||
): String = { | ||
val storageProperties = StorageProperties(importSpecification.getParameters()) | ||
val bucket = Bucket(storageProperties) | ||
bucket.validate() | ||
|
||
val scriptSchema = metadata.getScriptSchema() | ||
val bucketPath = bucket.bucketPath | ||
val parallelism = storageProperties.getParallelism(DEFAULT_PARALLELISM) | ||
val storagePropertiesAsString = storageProperties.mkString() | ||
|
||
s"""|SELECT | ||
| $scriptSchema.IMPORT_FILES( | ||
| '$bucketPath', '$storagePropertiesAsString', filename | ||
|) | ||
|FROM ( | ||
| SELECT $scriptSchema.IMPORT_METADATA( | ||
| '$bucketPath', '$storagePropertiesAsString', $parallelism | ||
| ) | ||
|) | ||
|GROUP BY | ||
| partition_index; | ||
|""".stripMargin | ||
} | ||
|
||
} |
Oops, something went wrong.