Permalink
Browse files

Started writing API documentation

  • Loading branch information...
1 parent 1531ad5 commit eacab0a53aeeb8f08479fa80b36000fd5db1f21c @iconara committed Sep 13, 2012
Showing with 189 additions and 7 deletions.
  1. +3 −1 .gitignore
  2. +6 −0 .yardopts
  3. +6 −1 Gemfile
  4. +6 −4 Gemfile.lock
  5. +1 −0 lib/hadoop.rb
  6. +7 −0 lib/rubydoop.rb
  7. +160 −1 lib/rubydoop/dsl.rb
View
@@ -1,4 +1,6 @@
/lib/*.jar
/ext/build
/tmp
-/pkg
+/pkg
+/doc
+/.yardoc
View
@@ -0,0 +1,6 @@
+--title Rubydoop
+--readme README.md
+--charset utf-8
+--markup markdown
+--no-private
+'lib/**/*.rb'
View
@@ -1,6 +1,11 @@
source :rubygems
+group :development do
+ gem 'yard'
+ gem 'maruku'
+ gem 'pry'
+end
+
group :test do
gem 'rspec'
- gem 'pry'
end
View
@@ -3,11 +3,9 @@ GEM
specs:
coderay (1.0.7)
diff-lcs (1.1.3)
+ maruku (0.6.0)
+ syntax (>= 1.0.0)
method_source (0.8)
- pry (0.9.10)
- coderay (~> 1.0.5)
- method_source (~> 0.8)
- slop (~> 3.3.1)
pry (0.9.10-java)
coderay (~> 1.0.5)
method_source (~> 0.8)
@@ -23,10 +21,14 @@ GEM
rspec-mocks (2.11.2)
slop (3.3.3)
spoon (0.0.1)
+ syntax (1.0.0)
+ yard (0.8.2.1)
PLATFORMS
java
DEPENDENCIES
+ maruku
pry
rspec
+ yard
View
@@ -3,6 +3,7 @@
require 'java'
+# @private
module Hadoop
module Io
include_package 'org.apache.hadoop.io'
View
@@ -6,23 +6,30 @@
require 'hadoop'
+# See {JobDefinition} for the job configuration DSL documentation, and {Package}
+# for the packaging documentation.
module Rubydoop
+ # @private
def self.create_mapper(conf)
create_instance(conf.get(MAPPER_KEY))
end
+ # @private
def self.create_reducer(conf)
create_instance(conf.get(REDUCER_KEY))
end
+ # @private
def self.create_combiner(conf)
create_instance(conf.get(COMBINER_KEY))
end
+ # @private
def self.create_partitioner(conf)
create_instance(conf.get(PARTITIONER_KEY))
end
+ # @private
def self.create_grouping_comparator(conf)
create_instance(conf.get(GROUPING_COMPARATOR_KEY))
end
View
@@ -1,14 +1,22 @@
# encoding: utf-8
module Rubydoop
+ # Main entrypoint into the configuration DSL.
+ #
+ # The tool runner will set the global variable `$rubydoop_context`
+ # to an object that contains references to the necessary Hadoop
+ # configuration.
+ #
+ # Within a configure block you can specify one or more jobs, see
+ # the examples in the {JobDefinition} documentation for more details.
def self.configure(&block)
- # $rubydoop_context will be set by the Java host
if $rubydoop_context
configure_ctx = ConfigurationCreator.new($rubydoop_context)
configure_ctx.instance_exec(*$rubydoop_context.arguments, &block)
end
end
+ # @private
class ConfigurationCreator
def initialize(context)
@context = context
@@ -20,54 +28,171 @@ def job(name, &block)
end
end
+ # Job configuration DSL.
+ #
+ # @example Configuring a job
+ # Rubydoop.configure do |*args|
+ # job 'word_count' do
+ # input args[0]
+ # output args[1]
+ #
+ # mapper WordCount::Mapper
+ # reducer WordCount::Mapper
+ #
+ # output_key Hadoop::Io::Text
+ # output_value Hadoop::Io::IntWritable
+ # end
+ # end
+ #
class JobDefinition
+ # @private
def initialize(context, job)
@context = context
@job = job
end
+ # Sets the input paths of the job.
+ #
+ # Calls `setInputFormatClass` on the Hadoop job and uses the static
+ # `setInputPaths` on the input format to set the job's input path.
+ #
+ # @see http://hadoop.apache.org/docs/r1.0.3/api/org/apache/hadoop/mapreduce/Job.html#setInputFormatClass(java.lang.Class) Hadoop's Job#setInputFormatClass
+ #
+ # @param [String|Enumerable] paths The input paths, either a comma separated
+ # string or an `Enumerable` of strings (which will be joined with a comma).
+ # @param [Hash] options
+ # @option options [JavaClass] :format The input format to use, defaults to `TextInputFormat`
def input(paths, options={})
paths = paths.join(',') if paths.is_a?(Enumerable)
format = options[:format] || Hadoop::Mapreduce::Lib::Input::TextInputFormat
format.set_input_paths(@job, paths)
@job.set_input_format_class(format)
end
+ # Sets the output path of the job.
+ #
+ # Calls `setOutputFormatClass` on the Hadoop job and uses the static
+ # `setOutputPath` on the output format to set the job's output path.
+ #
+ # @see http://hadoop.apache.org/docs/r1.0.3/api/org/apache/hadoop/mapreduce/Job.html#setOutputFormatClass(java.lang.Class) Hadoop's Job#setOutputFormatClass
+ #
+ # @param [String] dir The output path
+ # @param [Hash] options
+ # @option options [JavaClass] :format The output format to use, defaults to `TextOutputFormat`
def output(dir, options={})
format = options[:format] || Hadoop::Mapreduce::Lib::Output::TextOutputFormat
format.set_output_path(@job, Hadoop::Fs::Path.new(dir))
@job.set_output_format_class(format)
end
+ # Sets a job property.
+ #
+ # Calls `set` on the Hadoop Job's configuration.
+ #
+ # @see http://hadoop.apache.org/docs/r1.0.3/api/org/apache/hadoop/conf/Configuration.html#set(java.lang.String,%20java.lang.String) Hadoop's Configuration#set
+ #
+ # @param [String] property The property name
+ # @param [String] value The property value, must be a string
def set(property, value)
@job.configuration.set(property, value)
end
+ # Sets the mapper class.
+ #
+ # The equivalent of calling `setMapperClass` on a Hadoop job, but instead
+ # of a Java class you pass a Ruby class and Rubydoop will wrap it in a way
+ # that works with Hadoop.
+ #
+ # The class only needs to implement the method `map`, which will be called
+ # exactly like a Java mapper class' `map` method would be called.
+ #
+ # You can optionally implement `setup` and `cleanup`, which mirrors the
+ # methods of the same name in Java mappers.
+ #
+ # @see http://hadoop.apache.org/docs/r1.0.3/api/org/apache/hadoop/mapreduce/Mapper.html Hadoop's Mapper
+ # @see http://hadoop.apache.org/docs/r1.0.3/api/org/apache/hadoop/mapreduce/Job.html#setMapperClass(java.lang.Class) Hadoop's Job#setMapperClass
+ #
+ # @param [Class] cls The (Ruby) mapper class.
def mapper(cls)
@job.configuration.set(MAPPER_KEY, cls.name)
@job.set_mapper_class(@context.proxy_class(:mapper))
end
+ # Sets the reducer class.
+ #
+ # The equivalent of calling `setReducerClass` on a Hadoop job, but instead
+ # of a Java class you pass a Ruby class and Rubydoop will wrap it in a way
+ # that works with Hadoop.
+ #
+ # The class only needs to implement the method `reduce`, which will be called
+ # exactly like a Java reducer class' `reduce` method would be called.
+ #
+ # You can optionally implement `setup` and `cleanup`, which mirrors the
+ # methods of the same name in Java reducers.
+ #
+ # @see http://hadoop.apache.org/docs/r1.0.3/api/org/apache/hadoop/mapreduce/Reducer.html Hadoop's Reducer
+ # @see http://hadoop.apache.org/docs/r1.0.3/api/org/apache/hadoop/mapreduce/Job.html#setReducerClass(java.lang.Class) Hadoop's Job#setReducerClass
+ #
+ # @param [Class] cls The (Ruby) reducer class.
def reducer(cls)
@job.configuration.set(REDUCER_KEY, cls.name)
@job.set_reducer_class(@context.proxy_class(:reducer))
end
+ # Sets the combiner class.
+ #
+ # The equivalent of calling `setCombinerClass` on a Hadoop job, but instead
+ # of a Java class you pass a Ruby class and Rubydoop will wrap it in a way
+ # that works with Hadoop.
+ #
+ # A combiner should implement `reduce`, just like reducers.
+ #
+ # @see http://hadoop.apache.org/docs/r1.0.3/api/org/apache/hadoop/mapreduce/Job.html#setCombinerClass(java.lang.Class) Hadoop's Job#setCombinerClass
+ #
+ # @param [Class] cls The (Ruby) combiner class.
def combiner(cls)
@job.configuration.set(COMBINER_KEY, cls.name)
@job.set_combiner_class(@context.proxy_class(:combiner))
end
+ # Sets a custom partitioner.
+ #
+ # The equivalent of calling `setPartitionerClass` on a Hadoop job, but instead
+ # of a Java class you pass a Ruby class and Rubydoop will wrap it in a way
+ # that works with Hadoop.
+ #
+ # The class must implement `partition`, which will be called exactly like
+ # a Java partitioner would.
+ #
+ # @see http://hadoop.apache.org/docs/r1.0.3/api/org/apache/hadoop/mapreduce/Job.html#setPartitionerClass(java.lang.Class) Hadoop's Job#setPartitionerClass
+ #
+ # @param [Class] cls The (Ruby) partitioner class.
def partitioner(cls)
@job.configuration.set(PARTITIONER_KEY, cls.name)
@job.set_partitioner_class(@context.proxy_class(:partitioner))
end
+ # Sets a custom grouping comparator.
+ #
+ # The equivalent of calling `setGroupingComparatorClass` on a Hadoop job,
+ # but instead of a Java class you pass a Ruby class and Rubydoop will wrap
+ # it in a way that works with Hadoop.
+ #
+ # @see http://hadoop.apache.org/docs/r1.0.3/api/org/apache/hadoop/mapreduce/Job.html#setGroupingComparatorClass(java.lang.Class) Hadoop's Job#setGroupingComparatorClass
+ #
+ # @param [Class] cls The (Ruby) partitioner class.
def grouping_comparator(cls)
@job.configuration.set(GROUPING_COMPARATOR_KEY, cls.name)
@job.set_grouping_comparator_class(@context.proxy_class(:grouping_comparator))
end
+ # If you need to manipulate the Hadoop job in some that isn't covered by
+ # this DSL, this is the method for you. It yields the `Job`, letting you
+ # do whatever you want with it.
+ #
+ # @see http://hadoop.apache.org/docs/r1.0.3/api/org/apache/hadoop/mapreduce/Job.html Hadoop's Job
+ #
+ # @yield [job] The raw Hadoop Job instance
def raw(&block)
yield @job
end
@@ -80,12 +205,46 @@ def self.class_setter(dsl_name)
end
end
+ public
+
+ # @!method map_output_key(cls)
+ #
+ # Sets the mapper's output key type.
+ #
+ # @see http://hadoop.apache.org/docs/r1.0.3/api/org/apache/hadoop/mapreduce/Job.html#setMapOutputKeyClass(java.lang.Class) Hadoop's Job#setMapOutputKeyClass
+ #
+ # @param [Class] cls The mapper's output key type
class_setter :map_output_key
+
+ # @!method map_output_value(cls)
+ #
+ # Sets the mapper's output value type.
+ #
+ # @see http://hadoop.apache.org/docs/r1.0.3/api/org/apache/hadoop/mapreduce/Job.html#setMapOutputValueClass(java.lang.Class) Hadoop's Job#setMapOutputValueClass
+ #
+ # @param [Class] cls The mapper's output value type
class_setter :map_output_value
+
+ # @!method output_key(cls)
+ #
+ # Sets the reducer's output key type.
+ #
+ # @see http://hadoop.apache.org/docs/r1.0.3/api/org/apache/hadoop/mapreduce/Job.html#setOutputKeyClass(java.lang.Class) Hadoop's Job#setOutputKeyClass
+ #
+ # @param [Class] cls The reducer's output key type
class_setter :output_key
+
+ # @!method map_output_value(cls)
+ #
+ # Sets the reducer's output value type.
+ #
+ # @see http://hadoop.apache.org/docs/r1.0.3/api/org/apache/hadoop/mapreduce/Job.html#setOutputValueClass(java.lang.Class) Job#setOutputValueClass
+ #
+ # @param [Class] cls The reducer's output value type
class_setter :output_value
end
+ # @private
class Context
java_import 'java.util.LinkedList'

0 comments on commit eacab0a

Please sign in to comment.