Skip to content

HTTPS clone URL

Subversion checkout URL

You can clone with HTTPS or Subversion.

Download ZIP
Browse files

AVRO-411. Add Ruby data file interop tests. Contributed by Jeff Hodges.

git-svn-id: https://svn.apache.org/repos/asf/hadoop/avro/trunk@908170 13f79535-47bb-0310-9956-ffa450edef68
  • Loading branch information...
commit 3e3f0fbe27d319893393f34a887b2ab427a038c0 1 parent 835e486
@cutting cutting authored
View
2  CHANGES.txt
@@ -344,6 +344,8 @@ Trunk (unreleased changes)
AVRO-383. Optimizing ResolvingDecoder for default values (thiru)
+ AVRO-411. Add Ruby data file interop tests. (Jeff Hodges via cutting)
+
BUG FIXES
AVRO-176. Safeguard against bad istreams before reading. (sbanacho)
View
8 build.sh
@@ -44,20 +44,21 @@ case "$target" in
(cd lang/py; ant test)
(cd lang/c; ./build.sh test)
(cd lang/c++; ./build.sh test)
- # (cd lang/ruby; rake test)
+ (cd lang/ruby; rake test)
# create interop test data
(cd lang/java; ant interop-data-generate)
#(cd lang/py; ant interop-data-generate)
(cd lang/c; ./build.sh interop-data-generate)
#(cd lang/c++; make interop-data-generate)
+ (cd lang/ruby; rake generate_interop)
# run interop data tests
(cd lang/java; ant interop-data-test)
#(cd lang/py; ant interop-data-test)
(cd lang/c; ./build.sh interop-data-test)
- #(cd lang/c; make interop-data-test)
#(cd lang/c++; make interop-data-test)
+ (cd lang/ruby; rake interop)
# run interop rpc tests
/bin/bash share/test/interop/bin/test_rpc_interop.sh
@@ -101,6 +102,9 @@ case "$target" in
(cd lang/c; ./build.sh clean)
(cd lang/c++; ./build.sh clean)
+
+ (cd lang/ruby; rake clean)
+
;;
*)
View
29 lang/ruby/Rakefile
@@ -14,6 +14,7 @@
# See the License for the specific language governing permissions and
# limitations under the License.
+require 'rubygems'
require 'echoe'
Echoe.new('avro') do |p|
p.author = "Jeff Hodges"
@@ -22,3 +23,31 @@ Echoe.new('avro') do |p|
p.url = "http://hadoop.apache.org/avro/"
p.runtime_dependencies = %w[rubygems yajl]
end
+
+t = Rake::TestTask.new(:interop)
+t.pattern = 'interop/test*.rb'
+
+task :generate_interop do
+ $:.unshift(HERE + '/lib')
+ $:.unshift(HERE + '/test')
+ require 'avro'
+ require 'random_data'
+
+ schema = Avro::Schema.parse(File.read(SCHEMAS + '/interop.avsc'))
+ r = RandomData.new(schema, ENV['SEED'])
+ f = File.open(BUILD + '/interop/data/ruby.avro', 'w')
+ writer = Avro::DataFile::Writer.new(f, Avro::IO::DatumWriter.new(schema), schema)
+ begin
+ writer << r.next
+ writer << r.next
+ ensure
+ writer.close
+ end
+end
+
+
+HERE = File.expand_path(File.dirname(__FILE__))
+SHARE = HERE + '/../../share'
+SCHEMAS = SHARE + '/test/schemas'
+BUILD = HERE + '/../../build'
+
View
24 lang/ruby/lib/avro/data_file.rb
@@ -18,7 +18,7 @@
module Avro
module DataFile
- VERSION = 0
+ VERSION = 1
MAGIC = "Obj" + [VERSION].pack('c')
MAGIC_SIZE = MAGIC.size
SYNC_SIZE = 16
@@ -50,22 +50,22 @@ def initialize(writer, datum_writer, writers_schema=nil)
if writers_schema
@sync_marker = Writer.generate_sync_marker
- meta['codec'] = 'null'
- meta['schema'] = writers_schema.to_s
+ meta['avro.codec'] = 'null'
+ meta['avro.schema'] = writers_schema.to_s
datum_writer.writers_schema = writers_schema
write_header
else
# open writer for reading to collect metadata
- dfr = DataFileReader.new(writer, Avro::IO::DatumReader.new)
+ dfr = Reader.new(writer, Avro::IO::DatumReader.new)
# FIXME(jmhodges): collect arbitrary metadata
# collect metadata
@sync_marker = dfr.sync_marker
- meta['codec'] = dfr.meta['codec']
+ meta['avro.codec'] = dfr.meta['avro.codec']
# get schema used to write existing file
- schema_from_file = dfr.meta['schema']
- meta['schema'] = schema_from_file
+ schema_from_file = dfr.meta['avro.schema']
+ meta['avro.schema'] = schema_from_file
datum_writer.writers_schema = Schema.parse(schema_from_file)
# seek to the end of the file and prepare for writing
@@ -128,10 +128,10 @@ def write_block
encoder.write_long(to_write.size)
# write block contents
- if meta['codec'] == 'null'
+ if meta['avro.codec'] == 'null'
writer.write(to_write)
else
- msg = "#{meta['codec'].inspect} coded is not supported"
+ msg = "#{meta['avro.codec'].inspect} coded is not supported"
raise DataFileError, msg
end
@@ -161,14 +161,14 @@ def initialize(reader, datum_reader)
read_header
# ensure the codec is valid
- codec_from_file = meta['codec']
+ codec_from_file = meta['avro.codec']
if codec_from_file && ! VALID_CODECS.include?(codec_from_file)
raise DataFileError, "Unknown codec: #{codec_from_file}"
end
# get ready to read
@block_count = 0
- datum_reader.writers_schema = Schema.parse meta['schema']
+ datum_reader.writers_schema = Schema.parse meta['avro.schema']
end
# Iterates through each datum in this file
@@ -209,7 +209,7 @@ def read_header
msg = 'Not an Avro data file: shorter than the Avro magic block'
raise DataFileError, msg
elsif magic_in_file != MAGIC
- msg = "Not an Avro data file: #{magic_in_file} doesn't match #{MAGIC}"
+ msg = "Not an Avro data file: #{magic_in_file.inspect} doesn't match #{MAGIC.inspect}"
raise DataFileError, msg
end
View
73 lang/ruby/test/random_data.rb
@@ -0,0 +1,73 @@
+class RandomData
+ def initialize(schm, seed=nil)
+ srand(seed) if seed
+ @seed = seed
+ @schm = schm
+ end
+
+ def next
+ nextdata(@schm)
+ end
+
+ def nextdata(schm, d=0)
+ case schm.type
+ when 'boolean'
+ rand > 0.5
+ when 'string'
+ randstr()
+ when 'int'
+ rand(Avro::Schema::INT_MAX_VALUE - Avro::Schema::INT_MIN_VALUE) + Avro::Schema::INT_MIN_VALUE
+ when 'long'
+ rand(Avro::Schema::LONG_MAX_VALUE - Avro::Schema::LONG_MIN_VALUE) + Avro::Schema::LONG_MIN_VALUE
+ when 'float'
+ (-1024 + 2048 * rand).round.to_f
+ when 'double'
+ Avro::Schema::LONG_MIN_VALUE + (Avro::Schema::LONG_MAX_VALUE - Avro::Schema::LONG_MIN_VALUE) * rand
+ when 'bytes'
+ randstr(BYTEPOOL)
+ when 'null'
+ nil
+ when 'array'
+ arr = []
+ len = rand(5) + 2 - d
+ len = 0 if len < 0
+ len.times{ arr << nextdata(schm.items, d+1) }
+ arr
+ when 'map'
+ map = {}
+ len = rand(5) + 2 - d
+ len = 0 if len < 0
+ len.times do
+ map[nextdata(Avro::Schema::PrimitiveSchema.new('string'))] = nextdata(schm.values, d+1)
+ end
+ map
+ when 'record'
+ m = {}
+ schm.fields.each do |field|
+ m[field.name] = nextdata(field.type, d+1)
+ end
+ m
+ when 'union'
+ types = schm.schemas
+ nextdata(types[rand(types.size)], d)
+ when 'enum'
+ symbols = schm.symbols
+ len = symbols.size
+ return nil if len == 0
+ symbols[rand(len)]
+ when 'fixed'
+ f = ""
+ schm.size.times { f << BYTEPOOL[rand(BYTEPOOL.size), 1] }
+ f
+ end
+ end
+
+ CHARPOOL = 'abcdefghjkmnpqrstuvwxyzABCDEFGHJKLMNPQRSTUVWXYZ23456789'
+ BYTEPOOL = '12345abcd'
+
+ def randstr(chars=CHARPOOL, length=20)
+ str = ''
+ rand(length+1).times { str << chars[rand(chars.size)] }
+ str
+ end
+end
View
76 lang/ruby/test/test_help.rb
@@ -16,80 +16,8 @@
require 'rubygems'
require 'test/unit'
-require 'avro'
require 'stringio'
-
require 'fileutils'
FileUtils.mkdir_p('tmp')
-
-class RandomData
- def initialize(schm, seed=nil)
- srand(seed) if seed
- @seed = seed
- @schm = schm
- end
-
- def next
- nextdata(@schm)
- end
-
- def nextdata(schm, d=0)
- case schm.type
- when 'boolean'
- rand > 0.5
- when 'string'
- randstr()
- when 'int'
- rand(Avro::Schema::INT_MAX_VALUE - Avro::Schema::INT_MIN_VALUE) + Avro::Schema::INT_MIN_VALUE
- when 'long'
- rand(Avro::Schema::LONG_MAX_VALUE - Avro::Schema::LONG_MIN_VALUE) + Avro::Schema::LONG_MIN_VALUE
- when 'float'
- (-1024 + 2048 * rand).round.to_f
- when 'double'
- Avro::Schema::LONG_MIN_VALUE + (Avro::Schema::LONG_MAX_VALUE - Avro::Schema::LONG_MIN_VALUE) * rand
- when 'bytes'
- randstr(BYTEPOOL)
- when 'null'
- nil
- when 'array'
- arr = []
- len = rand(5) + 2 - d
- len = 0 if len < 0
- len.times{ arr << nextdata(schm.items, d+1) }
- arr
- when 'map'
- map = {}
- len = rand(5) + 2 - d
- len = 0 if len < 0
- len.times do
- map[nextdata(Avro::Schema::PrimitiveSchema.new('string'))] = nextdata(schm.values, d+1)
- end
- map
- when 'record'
- m = {}
- schm.fields.each do |field|
- m[field.name] = nextdata(field.type, d+1)
- end
- m
- when 'union'
- types = schm.schemas
- nextdata(types[rand(types.size)], d)
- when 'enum'
- symbols = schm.symbols
- len = symbols.size
- return nil if len == 0
- symbols[rand(len)]
- when 'fixed'
- BYTEPOOL[rand(BYTEPOOL.size), 1]
- end
- end
-
- CHARPOOL = 'abcdefghjkmnpqrstuvwxyzABCDEFGHJKLMNPQRSTUVWXYZ23456789'
- BYTEPOOL = '12345abcd'
-
- def randstr(chars=CHARPOOL, length=20)
- str = ''
- rand(length+1).times { str << chars[rand(chars.size)] }
- str
- end
-end
+require 'avro'
+require 'random_data'
Please sign in to comment.
Something went wrong with that request. Please try again.