The purpose of the hadoop-input
module is to provide programmatic access to the following objects:
- Hadoop CombineFileInputFormat object
- Hadoop DataDrivenDBInputFormat object
- Hadoop DBInputFormat object
- Hadoop FileInputFormat object
- Hadoop KeyValueTextInputFormat object
- Hadoop NLineInputFormat object
- Hadoop OracleDataDrivenDBInputFormat object
- Hadoop SequenceFileInputFormat object
- Hadoop SequenceFileAsBinaryInputFormat object
- Hadoop SequenceFileAsTextInputFormat object
- Hadoop SequenceFileInputFilter object
- Hadoop TextInputFormat object
Below is a list of exposed module properties.
For example usage of all available APIs, please see the unit tests.
/**
* Represents a Hadoop CombineFileInputFormat.
*/
var CombineFileInputFormat = {};
/* Same APIs as the FileInputFormat */
For example usage of all available APIs, please see the unit tests.
/**
* Represents a Hadoop DataDrivenDBInputFormat.
*/
var DataDrivenDBInputFormat = {};
/**
* Set the user-defined bounding query to use with a user-defined query.
*
* @param {Configuration} conf - The Hadoop configuration
* @param {string} query - Set the user-defined bounding query to use with a user-defined query. This *must* include
* the substring "$CONDITIONS" (DataDrivenDBInputFormat.SUBSTITUTE_TOKEN) inside the WHERE
* clause, so that DataDrivenDBInputFormat knows where to insert split clauses. e.g., "SELECT
* foo FROM mytable WHERE $CONDITIONS" This will be expanded to something like: SELECT foo
* FROM mytable WHERE (id > 100) AND (id < 250) inside each split.
*/
DataDrivenDBInputFormat.setBoundingQuery = function (conf, query) { /* ... */ };
/* Same APIs as the DBInputFormat */
For example usage of all available APIs, please see the unit tests.
/**
* Represents a Hadoop DBInputFormat.
*/
var DBInputFormat = {};
/**
* Initializes the map-part of the job with the appropriate input settings.
*
* @param {Job} job - The Hadoop job
* @param {string} inputClass - The class object implementing DBWritable, which is the Java object holding tuple fields
* @param {string} inputQueryOrTableName - The input query for the four argument version or the table name for the six
* argument version
* @param {string} conditionsOrInputCountQuery - The conditions for the four argument verison or the input count query
* for the six argument verison
* @param {string} [orderBy] - The fieldNames in the orderBy clause
* @param {string[]} [fieldNames] - The field names in the table
*/
DBInputFormat.setInput = function (job, inputClass, inputQueryOrTableName, conditionsOrInputCountQuery, orderBy,
fieldNames) { /* ... */ };
For example usage of all available APIs, please see the unit tests.
/**
* Represents a Hadoop FileInputFormat.
*/
var FileInputFormat = {};
/**
* Add a path to the list of inputs for the map-reduce job.
*
* @param {Job} job - The Hadoop job
* @param {string} path - The path to be added to the list of inputs for the map-reduce job
*/
FileInputFormat.addInputPath = function (job, path) { /* ... */ };
/**
* Add the given comma separated paths to the list of inputs for the map-reduce job.
*
* @param {Job} job - The Hadoop job
* @param {string} paths - The comma separated paths to be added to the list of inputs for the map-reduce job
*/
FileInputFormat.addInputPaths = function (job, paths) { /* ... */ };
/**
* Get a PathFilter class name of the filter set for the input paths.
*
* @param {Job} job - The Hadoop job
*
* @returns {string}
*/
FileInputFormat.getInputPathFilter = function (job) { /* ... */ };
/**
* Get the list of input paths for the map-reduce job.
*
* @param {Job} job - The Hadoop job
*
* @returns {string[]}
*/
FileInputFormat.getInputPaths = function (job) { /* ... */ };
/**
* Get the maximum split size.
*
* @param {Job} job - The Hadoop job
*
* @returns {number}
*/
FileInputFormat.getMaxSplitSize = function (job) { /* ... */ };
/**
* Get the minimum split size.
*
* @param {Job} job - The Hadoop job
*
* @returns {number}
*/
FileInputFormat.getMinSplitSize = function (job) { /* ... */ };
/**
* Set the PathFilter by class name to be applied to the input paths for the map-reduce job.
*
* @param {Job} job - The Hadoop job
* @param {string} filterClass - The PathFilter class name to use
*/
FileInputFormat.setInputPathFilter = function (job, filterClass) { /* ... */ };
/**
* Sets the given comma separated paths as the list of inputs for the map-reduce job.
*
* @param {Job} job - The Hadoop job
* @param {string} paths - The comma separated paths to be set as the list of inputs for the map-reduce job
*
* @returns {string[]}
*/
FileInputFormat.setInputPaths = function (job, paths) { /* ... */ };
/**
* Set the maximum split size.
*
* @param {Job} job - The Hadoop job
* @param {number} splitSize - The maximum split size
*
* @returns {number}
*/
FileInputFormat.setMaxSplitSize = function (job, splitSize) { /* ... */ };
/**
* Set the minimum split size.
*
* @param {Job} job - The Hadoop job
* @param {number} splitSize - The minimum split size
*
* @returns {number}
*/
FileInputFormat.setMinSplitSize = function (job, splitSize) { /* ... */ };
For example usage of all available APIs, please see the unit tests.
/**
* Represents a Hadoop KeyValueTextInputFormat.
*/
var KeyValueTextInputFormat = {};
/* Same APIs as the FileInputFormat */
For example usage of all available APIs, please see the unit tests.
/**
* Represents a Hadoop NLineInputFormat.
*/
var NLineInputFormat = {};
/**
* Get the number of lines per split.
*
* @param {Job} job - The Hadoop job
*
* @returns {number}
*/
NLineInputFormat.getNumLinesPerSplit = function (job) { /* ... */ };
/**
* Get the number of splits for the given file.
*
* @param {string} path - The input path
* @param {Configuration} conf - The Hadoop configuration
* @param {number} numLinesPerSplit - The number of lines per split
*
* @returns {number}
*/
NLineInputFormat.getSplitsForFile = function (path, conf, numLinesPerSplit) { /* ... */ };
/**
* Set the number of lines per split.
*
* @param {Job} job - The Hadoop job
* @param {number} numLines - The number of lines per split
*
* @returns {number}
*/
NLineInputFormat.setNumLinesPerSplit = function (job, numLines) { /* ... */ };
/* Same APIs as the FileInputFormat */
For example usage of all available APIs, please see the unit tests.
/**
* Represents a Hadoop OracleDataDrivenDBInputFormat.
*/
var OracleDataDrivenDBInputFormat = {};
/* Same APIs as the DataDrivenDBInputFormat */
For example usage of all available APIs, please see the unit tests.
/**
* Represents a Hadoop SequenceFileInputFormat.
*/
var SequenceFileInputFormat = {};
/* Same APIs as the FileInputFormat */
For example usage of all available APIs, please see the unit tests.
/**
* Represents a Hadoop SequenceFileAsBinaryInputFormat.
*/
var SequenceFileAsBinaryInputFormat = {};
/* Same APIs as the SequenceFileInputFormat */
For example usage of all available APIs, please see the unit tests.
/**
* Represents a Hadoop SequenceFileAsTextInputFormat.
*/
var SequenceFileAsTextInputFormat = {};
/* Same APIs as the SequenceFileInputFormat */
For example usage of all available APIs, please see the unit tests.
/**
* Represents a Hadoop SequenceFileInputFilter.
*/
var SequenceFileInputFilter = {};
/**
* Set the filter class.
*
* @param {Job} job - The Hadoop job
* @param {string} filterClass - The PathFilter class name
*/
SequenceFileInputFilter.setFilterClass = function (job, filterClass) { /* ... */ };
/* Same APIs as the SequenceFileInputFormat */
For example usage of all available APIs, please see the unit tests.
/**
* Represents a Hadoop TextInputFormat.
*/
var TextInputFormat = {};
/* Same APIs as the FileInputFormat */