forked from Acxiom/metalus
-
Notifications
You must be signed in to change notification settings - Fork 1
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
Acxiom#345 Added DataStream trait and reader/writer traits. Implement…
…ed a CSVDataROwReader.
- Loading branch information
Showing
7 changed files
with
233 additions
and
10 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
62 changes: 62 additions & 0 deletions
62
metalus-core/src/main/scala/com/acxiom/metalus/connectors/DataRowStream.scala
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,62 @@ | ||
package com.acxiom.metalus.connectors | ||
|
||
import com.acxiom.metalus.{Constants, PipelineException} | ||
import com.acxiom.metalus.sql.{Row, Schema} | ||
|
||
/** | ||
* Represents a stream of data. | ||
*/ | ||
trait DataRowStream { | ||
/** | ||
* Closes the stream. | ||
*/ | ||
def close(): Unit | ||
|
||
/** | ||
* Opens the stream for processing. | ||
*/ | ||
def open(): Unit | ||
} | ||
|
||
/** | ||
* Provides the ability to read from a data stream. | ||
*/ | ||
trait DataRowReader extends DataRowStream { | ||
/** | ||
* Fetches the next set of rows from the stream. An empty list indicates the stream is open but no data was available | ||
* while None indicates the stream is closed and no more data is available, | ||
* | ||
* @return A list of rows or None if the end of the stream has been reached. | ||
*/ | ||
def next(): Option[List[Row]] | ||
} | ||
|
||
/** | ||
* Provides the ability to write data to a stream. | ||
*/ | ||
trait DataRowWriter extends DataRowStream { | ||
/** | ||
* Prepares the provided row and pushes to the stream. The format of the data will be determined by the | ||
* implementation. | ||
* | ||
* @param row A single row to push to the stream. | ||
* @throws PipelineException - will be thrown if this call cannot be completed. | ||
*/ | ||
@throws(classOf[PipelineException]) | ||
def process(row: Row): Unit = process(List(row)) | ||
|
||
/** | ||
* Prepares the provided rows and pushes to the stream. The format of the data will be determined by the | ||
* implementation. | ||
* | ||
* @param rows A list of Row objects. | ||
* @throws PipelineException - will be thrown if this call cannot be completed. | ||
*/ | ||
@throws(classOf[PipelineException]) | ||
def process(rows: List[Row]): Unit | ||
} | ||
|
||
|
||
case class DataStreamOptions(schema: Option[Schema], | ||
options: Map[String, Any] = Map(), | ||
rowBufferSize: Int = Constants.ONE_HUNDRED) |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
42 changes: 42 additions & 0 deletions
42
metalus-core/src/test/scala/com/acxiom/metalus/connectors/LocalFileConnectorTests.scala
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,42 @@ | ||
package com.acxiom.metalus.connectors | ||
|
||
import com.acxiom.metalus.Constants | ||
import org.scalatest.funspec.AnyFunSpec | ||
|
||
import java.io.File | ||
import java.nio.file.{Files, StandardCopyOption} | ||
|
||
class LocalFileConnectorTests extends AnyFunSpec { | ||
describe("LocalFileConnector - DataRowReader") { | ||
it ("should read data from a file in chunks") { | ||
val source = File.createTempFile("placeholder", ".txt") | ||
source.deleteOnExit() | ||
val dataFilePath = s"${source.getParentFile.getAbsolutePath}/MOCK_DATA.csv" | ||
Files.copy(getClass.getResourceAsStream("/MOCK_DATA.csv"), | ||
new File(dataFilePath).toPath, | ||
StandardCopyOption.REPLACE_EXISTING) | ||
val localFileConnector = LocalFileConnector("my-connector", None, None) | ||
val options = DataStreamOptions(None, | ||
Map("filePath" -> dataFilePath, "fileDelimiter" -> ",", "useHeader" -> true), | ||
Constants.TWELVE) | ||
val reader = localFileConnector.getReader(Some(options)) | ||
assert(reader.isDefined) | ||
val firstRows = reader.get.next() | ||
assert(firstRows.isDefined && firstRows.get.length == Constants.TWELVE) | ||
val firstRow = firstRows.get.head | ||
assert(firstRow.columns.length == Constants.SEVEN) | ||
assert(firstRow.schema.isDefined) | ||
assert(firstRow.schema.get.attributes.length == Constants.SEVEN) | ||
val columnNames = List("id","first_name","last_name","email","gender","ein","postal_code") | ||
firstRow.schema.get.attributes.foreach(a => assert(columnNames.contains(a.name))) | ||
var count = firstRows.get.length | ||
Iterator.continually(reader.get.next()).takeWhile(_.isDefined).foreach { rows => | ||
if (rows.isDefined) { | ||
assert(rows.get.nonEmpty) | ||
count = count + rows.get.length | ||
} | ||
} | ||
assert(count == Constants.ONE_THOUSAND) | ||
} | ||
} | ||
} |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters