Skip to content
This repository has been archived by the owner on May 11, 2022. It is now read-only.

Latest commit

 

History

History
350 lines (285 loc) · 13 KB

Hadoop_Input_API.md

File metadata and controls

350 lines (285 loc) · 13 KB

The purpose of the hadoop-input module is to provide programmatic access to the following objects:

Below is a list of exposed module properties.

CombineFileInputFormat

For example usage of all available APIs, please see the unit tests.

/**
 * Represents a Hadoop CombineFileInputFormat.
 */
var CombineFileInputFormat = {};

/* Same APIs as the FileInputFormat */

DataDrivenDBInputFormat

For example usage of all available APIs, please see the unit tests.

/**
 * Represents a Hadoop DataDrivenDBInputFormat.
 */
var DataDrivenDBInputFormat = {};

/**
 * Set the user-defined bounding query to use with a user-defined query.
 *
 * @param {Configuration} conf - The Hadoop configuration
 * @param {string} query - Set the user-defined bounding query to use with a user-defined query. This *must* include
 *                         the substring "$CONDITIONS" (DataDrivenDBInputFormat.SUBSTITUTE_TOKEN) inside the WHERE
 *                         clause, so that DataDrivenDBInputFormat knows where to insert split clauses. e.g., "SELECT
 *                         foo FROM mytable WHERE $CONDITIONS" This will be expanded to something like: SELECT foo
 *                         FROM mytable WHERE (id > 100) AND (id < 250) inside each split.
 */
DataDrivenDBInputFormat.setBoundingQuery = function (conf, query) { /* ... */ };

/* Same APIs as the DBInputFormat */

DBInputFormat

For example usage of all available APIs, please see the unit tests.

/**
 * Represents a Hadoop DBInputFormat.
 */
var DBInputFormat = {};

/**
 * Initializes the map-part of the job with the appropriate input settings.
 *
 * @param {Job} job - The Hadoop job
 * @param {string} inputClass - The class object implementing DBWritable, which is the Java object holding tuple fields
 * @param {string} inputQueryOrTableName - The input query for the four argument version or the table name for the six
 *                                         argument version
 * @param {string} conditionsOrInputCountQuery - The conditions for the four argument verison or the input count query
 *                                               for the six argument verison
 * @param {string} [orderBy] - The fieldNames in the orderBy clause
 * @param {string[]} [fieldNames] - The field names in the table
 */
DBInputFormat.setInput = function (job, inputClass, inputQueryOrTableName, conditionsOrInputCountQuery, orderBy,
                                   fieldNames) { /* ... */ };

FileInputFormat

For example usage of all available APIs, please see the unit tests.

/**
 * Represents a Hadoop FileInputFormat.
 */
var FileInputFormat = {};

/**
 * Add a path to the list of inputs for the map-reduce job.
 *
 * @param {Job} job - The Hadoop job
 * @param {string} path - The path to be added to the list of inputs for the map-reduce job
 */
FileInputFormat.addInputPath = function (job, path) { /* ... */ };

/**
 * Add the given comma separated paths to the list of inputs for the map-reduce job.
 *
 * @param {Job} job - The Hadoop job
 * @param {string} paths - The comma separated paths to be added to the list of inputs for the map-reduce job
 */
FileInputFormat.addInputPaths = function (job, paths) { /* ... */ };

/**
 * Get a PathFilter class name of the filter set for the input paths.
 *
 * @param {Job} job - The Hadoop job
 *
 * @returns {string}
 */
FileInputFormat.getInputPathFilter = function (job) { /* ... */ };

/**
 * Get the list of input paths for the map-reduce job.
 *
 * @param {Job} job - The Hadoop job
 *
 * @returns {string[]}
 */
FileInputFormat.getInputPaths = function (job) { /* ... */ };

/**
 * Get the maximum split size.
 *
 * @param {Job} job - The Hadoop job
 *
 * @returns {number}
 */
FileInputFormat.getMaxSplitSize = function (job) { /* ... */ };

/**
 * Get the minimum split size.
 *
 * @param {Job} job - The Hadoop job
 *
 * @returns {number}
 */
FileInputFormat.getMinSplitSize = function (job) { /* ... */ };

/**
 * Set the PathFilter by class name to be applied to the input paths for the map-reduce job.
 *
 * @param {Job} job - The Hadoop job
 * @param {string} filterClass - The PathFilter class name to use
 */
FileInputFormat.setInputPathFilter = function (job, filterClass) { /* ... */ };

/**
 * Sets the given comma separated paths as the list of inputs for the map-reduce job.
 *
 * @param {Job} job - The Hadoop job
 * @param {string} paths - The comma separated paths to be set as the list of inputs for the map-reduce job
 *
 * @returns {string[]}
 */
FileInputFormat.setInputPaths = function (job, paths) { /* ... */ };

/**
 * Set the maximum split size.
 *
 * @param {Job} job - The Hadoop job
 * @param {number} splitSize - The maximum split size
 *
 * @returns {number}
 */
FileInputFormat.setMaxSplitSize = function (job, splitSize) { /* ... */ };

/**
 * Set the minimum split size.
 *
 * @param {Job} job - The Hadoop job
 * @param {number} splitSize - The minimum split size
 *
 * @returns {number}
 */
FileInputFormat.setMinSplitSize = function (job, splitSize) { /* ... */ };

KeyValueTextInputFormat

For example usage of all available APIs, please see the unit tests.

/**
 * Represents a Hadoop KeyValueTextInputFormat.
 */
var KeyValueTextInputFormat = {};

/* Same APIs as the FileInputFormat */

NLineInputFormat

For example usage of all available APIs, please see the unit tests.

/**
 * Represents a Hadoop NLineInputFormat.
 */
var NLineInputFormat = {};

/**
 * Get the number of lines per split.
 *
 * @param {Job} job - The Hadoop job
 *
 * @returns {number}
 */
NLineInputFormat.getNumLinesPerSplit = function (job) { /* ... */ };

/**
 * Get the number of splits for the given file.
 *
 * @param {string} path - The input path
 * @param {Configuration} conf - The Hadoop configuration
 * @param {number} numLinesPerSplit - The number of lines per split
 *
 * @returns {number}
 */
NLineInputFormat.getSplitsForFile = function (path, conf, numLinesPerSplit) { /* ... */ };

/**
 * Set the number of lines per split.
 *
 * @param {Job} job - The Hadoop job
 * @param {number} numLines - The number of lines per split
 *
 * @returns {number}
 */
NLineInputFormat.setNumLinesPerSplit = function (job, numLines) { /* ... */ };

/* Same APIs as the FileInputFormat */

OracleDataDrivenDBInputFormat

For example usage of all available APIs, please see the unit tests.

/**
 * Represents a Hadoop OracleDataDrivenDBInputFormat.
 */
var OracleDataDrivenDBInputFormat = {};

/* Same APIs as the DataDrivenDBInputFormat */

SequenceFileInputFormat

For example usage of all available APIs, please see the unit tests.

/**
 * Represents a Hadoop SequenceFileInputFormat.
 */
var SequenceFileInputFormat = {};

/* Same APIs as the FileInputFormat */

SequenceFileAsBinaryInputFormat

For example usage of all available APIs, please see the unit tests.

/**
 * Represents a Hadoop SequenceFileAsBinaryInputFormat.
 */
var SequenceFileAsBinaryInputFormat = {};

/* Same APIs as the SequenceFileInputFormat */

SequenceFileAsTextInputFormat

For example usage of all available APIs, please see the unit tests.

/**
 * Represents a Hadoop SequenceFileAsTextInputFormat.
 */
var SequenceFileAsTextInputFormat = {};

/* Same APIs as the SequenceFileInputFormat */

SequenceFileInputFilter

For example usage of all available APIs, please see the unit tests.

/**
 * Represents a Hadoop SequenceFileInputFilter.
 */
var SequenceFileInputFilter = {};

/**
 * Set the filter class.
 *
 * @param {Job} job - The Hadoop job
 * @param {string} filterClass - The PathFilter class name
 */
SequenceFileInputFilter.setFilterClass = function (job, filterClass) { /* ... */ };

/* Same APIs as the SequenceFileInputFormat */

TextInputFormat

For example usage of all available APIs, please see the unit tests.

/**
 * Represents a Hadoop TextInputFormat.
 */
var TextInputFormat = {};

/* Same APIs as the FileInputFormat */