diff --git a/VERSION b/VERSION index 7ec1d6db..3e3c2f1e 100644 --- a/VERSION +++ b/VERSION @@ -1 +1 @@ -2.1.0 +2.1.1 diff --git a/lib/wukong.rb b/lib/wukong.rb index 5875827b..ee3b9d6e 100644 --- a/lib/wukong.rb +++ b/lib/wukong.rb @@ -3,8 +3,10 @@ require 'wukong/datatypes' require 'wukong/periodic_monitor' require 'wukong/logger' + autoload :BadRecord, 'wukong/bad_record' autoload :TypedStruct, 'wukong/typed_struct' + module Wukong autoload :Script, 'wukong/script' autoload :Streamer, 'wukong/streamer' diff --git a/lib/wukong/bad_record.rb b/lib/wukong/bad_record.rb index 9c479df5..57831bcc 100644 --- a/lib/wukong/bad_record.rb +++ b/lib/wukong/bad_record.rb @@ -8,10 +8,11 @@ # return BadRecord.new("do_stuff_to-failed", record) # end # -class BadRecord < Struct.new( - :errors, - :record - ) +class BadRecord + include Gorillib::Model + field :errors, String, position: 0 + field :record, Whatever, position: 1 + def initialize errors='', *record_fields super errors, record_fields end diff --git a/lib/wukong/encoding.rb b/lib/wukong/encoding.rb index 9dad9417..91bbf2d8 100644 --- a/lib/wukong/encoding.rb +++ b/lib/wukong/encoding.rb @@ -1,22 +1,22 @@ require 'htmlentities' require 'addressable/uri' -# Fix a bug (?) in the HTMLEntities encoder class with $KCODE='NONE' -HTMLEntities::Encoder.class_eval do -private - def extended_entity_regexp - @extended_entity_regexp ||= ( - if encoding_aware? - regexp = '[^\u{20}-\u{7E}]' - else - # regexp = '[^\x20-\x7E]' - regexp = '[\x00-\x1f]|[\xc0-\xfd][\x80-\xbf]+' - end - regexp += "|'" if @flavor == 'html4' - Regexp.new(regexp) - ) - end -end +# # Fix a bug (?) in the HTMLEntities encoder class with $KCODE='NONE' +# HTMLEntities::Encoder.class_eval do +# private +# def extended_entity_regexp +# @extended_entity_regexp ||= ( +# if encoding_aware? +# regexp = '[^\u{20}-\u{7E}]' +# else +# # regexp = '[^\x20-\x7E]' +# regexp = '[\x00-\x1f]|[\xc0-\xfd][\x80-\xbf]+' +# end +# regexp += "|'" if @flavor == 'html4' +# Regexp.new(regexp) +# ) +# end +# end module Wukong # diff --git a/lib/wukong/extensions.rb b/lib/wukong/extensions.rb index 7023497f..1ed24a67 100644 --- a/lib/wukong/extensions.rb +++ b/lib/wukong/extensions.rb @@ -2,15 +2,5 @@ # These pull in the minimal functionality of the extlib|activesupport family of # gems. # -require 'extlib/blank' -require 'extlib/class' -require 'wukong/extensions/enumerable' -require 'wukong/extensions/symbol' -require 'wukong/extensions/hash' -require 'wukong/extensions/hash_like' -require 'wukong/extensions/array' -require 'wukong/extensions/struct' -require 'wukong/extensions/module' -require 'wukong/extensions/string' -require 'wukong/extensions/date_time' -require 'wukong/extensions/emittable' +require 'gorillib' +require 'gorillib/model' diff --git a/lib/wukong/script.rb b/lib/wukong/script.rb index 2654aaa1..9c5899ea 100644 --- a/lib/wukong/script.rb +++ b/lib/wukong/script.rb @@ -1,6 +1,12 @@ -require 'pathname' -require 'wukong/extensions' require 'configliere' ; Settings.use(:commandline, :env_var, :define) +require 'gorillib' +require 'gorillib/model' +require 'gorillib/model/positional_fields' +require 'gorillib/model/serialization' +require 'gorillib/hash/deep_merge' +require 'gorillib/type/extended' +require 'gorillib/datetime/to_flat' +# require 'wukong' require 'wukong/script/hadoop_command' require 'wukong/script/local_command' diff --git a/lib/wukong/streamer.rb b/lib/wukong/streamer.rb index fb9d2456..da84dabb 100644 --- a/lib/wukong/streamer.rb +++ b/lib/wukong/streamer.rb @@ -3,6 +3,7 @@ module Streamer autoload :Base, 'wukong/streamer/base' autoload :LineStreamer, 'wukong/streamer/line_streamer' autoload :RecordStreamer, 'wukong/streamer/record_streamer' + autoload :ModelStreamer, 'wukong/streamer/model_streamer' autoload :JsonStreamer, 'wukong/streamer/json_streamer' autoload :StructStreamer, 'wukong/streamer/struct_streamer' autoload :StructRecordizer, 'wukong/streamer/struct_streamer' diff --git a/lib/wukong/streamer/base.rb b/lib/wukong/streamer/base.rb index 7f518542..e917d4f2 100644 --- a/lib/wukong/streamer/base.rb +++ b/lib/wukong/streamer/base.rb @@ -66,7 +66,8 @@ def recordize line # yours if you override this method. # def emit record - puts record.to_flat.join("\t") + return if record.nil? + puts record.to_tsv end # diff --git a/spec/wukong/encoding_spec.rb b/spec/wukong/encoding_spec.rb index 72cbeb94..06e9d56c 100644 --- a/spec/wukong/encoding_spec.rb +++ b/spec/wukong/encoding_spec.rb @@ -32,5 +32,4 @@ end end - end diff --git a/wukong.gemspec b/wukong.gemspec index e325f8ac..798189c7 100644 --- a/wukong.gemspec +++ b/wukong.gemspec @@ -5,11 +5,11 @@ Gem::Specification.new do |s| s.name = "wukong" - s.version = "2.1.0" + s.version = "2.1.1" s.required_rubygems_version = Gem::Requirement.new(">= 0") if s.respond_to? :required_rubygems_version= s.authors = ["Infochimps"] - s.date = "2012-08-13" + s.date = "2012-08-17" s.description = " Treat your dataset like a:\n\n * stream of lines when it's efficient to process by lines\n * stream of field arrays when it's efficient to deal directly with fields\n * stream of lightweight objects when it's efficient to deal with objects\n\n Wukong is friends with Hadoop the elephant, Pig the query language, and the cat on your command line.\n" s.email = "coders@infochimps.org" s.executables = ["wu-lign", "hdp-parts_to_keys.rb"]