Permalink
Browse files

refactoring for cookery

  • Loading branch information...
jmay committed Apr 23, 2012
1 parent e3fab3d commit 4255a6786be9cbcb3a19016b00a7f155d113b495
View
@@ -4,8 +4,15 @@ class Masticate::Base
attr_reader :input_count, :output_count
attr_reader :csv_options
- def initialize(filename)
- @filename = filename
+ def initialize(args)
+ case args
+ when String
+ @filename = args
+ when Hash
+ configure(args)
+ else
+ raise "invalid initialization: #{args}"
+ end
end
def with_input
@@ -40,4 +47,8 @@ def standard_options(opts)
@csv_options[:quote_char] = opts[:quote_char] || "\0"
end
end
+
+ # def crunch(row)
+ # # noop
+ # end
end
View
@@ -16,24 +16,30 @@ def cook(opts)
recipe = File.read(recipefile).lines
standard_options(opts)
- recipe.each do |step|
+ steps = recipe.map do |step|
# puts step
argv = Shellwords.split(step)
masticator = Masticate::MyOptionParser.new
command, options = masticator.parse(argv)
- puts "#{command}: #{options}"
- masticator.execute(command, options)
+ masticator.prepare(command, options)
end
- # @output_count = 0
- # with_input do |input|
- # while line = get
- # row = CSV.parse_line(line, csv_options)
- # emit(row.to_csv) if row
- # end
- # end
- # @output.close if opts[:output]
- #
+ @output_count = 0
+ headers = nil
+ with_input do |input|
+ while line = get
+ row = CSV.parse_line(line, csv_options)
+
+ steps.each do |step|
+ puts "APPLY #{step} to #{row}"
+ row = step.crunch(row)
+ end
+
+ emit(row.to_csv) if row
+ end
+ end
+ @output.close if opts[:output]
+
{
:input_count => @input_count,
:output_count => @output_count
View
@@ -2,25 +2,33 @@
require "csv"
class Masticate::Gsubber < Masticate::Base
- def gsub(opts)
+ def configure(opts)
standard_options(opts)
- field = opts[:field] or raise "missing field to gsub"
- from = Regexp.new(opts[:from]) or raise "Invalid regex '#{opts[:from]}' for conversion"
- to = opts[:to] or raise "missing 'to' string for gsub"
+ @field = opts[:field] or raise "missing field to gsub"
+ @from = Regexp.new(opts[:from]) or raise "Invalid regex '#{opts[:from]}' for conversion"
+ @to = opts[:to] or raise "missing 'to' string for gsub"
+ end
+
+ def set_headers(row)
+ @headers = row
+ @index = @headers.index(@field) or raise "Unable to find column '#{@field}' in headers"
+ end
+ def gsub(opts)
+ configure(opts)
@output_count = 0
headers = nil
with_input do |input|
while line = get
row = CSV.parse_line(line, csv_options)
if !headers
headers = row
- index = headers.index(field) or raise "Unable to find column '#{field}' in headers"
+ index = headers.index(@field) or raise "Unable to find column '#{@field}' in headers"
emit(line)
else
oldval = row[index]
- newval = oldval.gsub(from, to)
+ newval = oldval.gsub(@from, @to)
row[index] = newval
emit(row.to_csv)
end
@@ -33,4 +41,15 @@ def gsub(opts)
:output_count => @output_count
}
end
+
+ def crunch(row)
+ # puts "GSUB CRUNCH #{row}"
+ if !@headers
+ set_headers(row)
+ else
+ row[@index] = row[@index].gsub(@from, @to)
+ end
+ # puts "GSUB RESULT IS #{row}"
+ row
+ end
end
View
@@ -2,11 +2,15 @@
require "csv"
class Masticate::MaxRows < Masticate::Base
- def maxrows(opts)
+ def configure(opts)
standard_options(opts)
- groupby = opts[:by] or raise "missing field to group by"
- maxon = opts[:max] or raise "missing field to max on"
+ @groupby = opts[:by] or raise "missing field to group by"
+ @maxon = opts[:max] or raise "missing field to max on"
+ end
+
+ def maxrows(opts)
+ configure(opts)
@output_count = 0
headers = nil
@@ -16,8 +20,8 @@ def maxrows(opts)
row = CSV.parse_line(line, csv_options)
if !headers
headers = row
- index_by = headers.index(groupby) or raise "Unable to find column '#{groupby}'"
- index_max = headers.index(maxon) or raise "Unable to find column '#{maxon}'"
+ index_by = headers.index(@groupby) or raise "Unable to find column '#{@groupby}'"
+ index_max = headers.index(@maxon) or raise "Unable to find column '#{@maxon}'"
emit(line)
else
key = row[index_by]
@@ -45,4 +49,30 @@ def maxrows(opts)
:output_count => @output_count
}
end
+
+ def crunch(row)
+ if !@headers
+ @headers = row
+ @index_by = row.index(@groupby) or raise "Unable to find column '#{@groupby}'"
+ @index_max = row.index(@maxon) or raise "Unable to find column '#{@maxon}'"
+ @accum = {}
+ row
+ elsif row.nil?
+ # output the accumulated results
+ @accum.each do |k,row|
+ emit(row.to_csv)
+ end
+ else
+ key = row[@index_by]
+ if !@accum[key]
+ @accum[key] = row
+ else
+ oldscore = @accum[key][@index_max]
+ newscore = row[@index_max]
+ if newscore > oldscore
+ @accum[key] = row
+ end
+ end
+ end
+ end
end
@@ -79,6 +79,21 @@ def parse(argv = ARGV)
[@command, @options, filenames]
end
+ def prepare(command, options)
+ puts "PREPARING #{command}: #{options}"
+
+ klasses = {
+ 'gsub' => Masticate::Gsubber,
+ 'datify' => Masticate::Datify,
+ 'maxrows' => Masticate::MaxRows,
+ 'relabel' => Masticate::Relabel,
+ 'pluck' => Masticate::Plucker
+ }
+
+ klass = klasses[command]
+ klass.new(options)
+ end
+
def execute(command, options, filenames = nil)
filename = filenames.first
View
@@ -2,10 +2,17 @@
require "csv"
class Masticate::Plucker < Masticate::Base
- def pluck(opts)
+ def configure(opts)
standard_options(opts)
- fields = opts[:fields] or raise "missing fields to pluck"
+ @fields = opts[:fields] or raise "missing fields to pluck"
+ end
+
+ def pluck(opts)
+ configure(opts)
+ # standard_options(opts)
+ #
+ # fields = opts[:fields] or raise "missing fields to pluck"
@output_count = 0
headers = nil
@@ -14,7 +21,7 @@ def pluck(opts)
row = CSV.parse_line(line, csv_options)
if !headers
headers = row
- indexes = fields.map do |f|
+ indexes = @fields.map do |f|
case f
when String
headers.index(f) or raise "Unable to find column '#{f}'"
@@ -41,4 +48,27 @@ def pluck(opts)
:output_count => @output_count
}
end
+
+ def crunch(row)
+ if !@headers
+ @headers = row
+ @indexes = @fields.map do |f|
+ case f
+ when String
+ row.index(f) or raise "Unable to find column '#{f}'"
+ when Fixnum
+ if f > row.count
+ raise "Cannot pluck column #{f}, there are only #{row.count} fields"
+ else
+ f-1
+ end
+ else
+ raise "Invalid field descriptor '#{f}'"
+ end
+ end
+ end
+
+ # output is just the selected columns
+ @indexes.map {|i| row[i]}
+ end
end
View
@@ -3,17 +3,43 @@
# * assuming that input file is in valid CSV format (no validation)
class Masticate::Relabel < Masticate::Base
- def initialize(filename)
- @filename = filename
+ def configure(opts)
+ standard_options(opts)
+
+ @fields = opts[:fields] or raise "missing fieldnames for relabel"
end
def relabel(opts)
- fields = opts[:fields] or raise "missing fieldnames for relabel"
+ configure(opts)
+
+ @output_count = 0
+ headers = nil
+ with_input do |input|
+ while line = get
+ row = CSV.parse_line(line, csv_options)
+ if !headers
+ headers = @fields
+ emit(headers.to_csv)
+ else
+ emit(row.to_csv)
+ end
+ end
+ end
+ @output.close if opts[:output]
- File.unlink(opts[:output]) if opts[:output] && File.exists?(opts[:output])
- redirect = ">>#{opts[:output]}" if opts[:output]
+ # File.unlink(opts[:output]) if opts[:output] && File.exists?(opts[:output])
+ # redirect = ">>#{opts[:output]}" if opts[:output]
+ #
+ # system "/bin/echo -n '#{fields.to_csv}' #{redirect}"
+ # system "tail +2 #{@filename} #{redirect}"
+ end
- system "/bin/echo -n '#{fields.to_csv}' #{redirect}"
- system "tail +2 #{@filename} #{redirect}"
+ def crunch(row)
+ if !@headers
+ @headers = @fields
+ puts "output #{@headers}"
+ row = @headers
+ end
+ row
end
end
View
@@ -1,4 +1,4 @@
-gsub --field order_number --from '/,|(.00$)/'
+gsub --field order_number --from '/,|(.00$)/' --to ''
relabel --fields one,two,three,four,five,six,seven,eight,nine,ten,eleven,twelve,thirteen,fourteen
pluck --fields two,eight,fourteen
maxrows --by two --max eight
View
@@ -10,7 +10,7 @@
results = Masticate.cook(input, :output => tmp, :recipe => recipe)
output = File.read(tmp)
correct_output = File.read(File.dirname(__FILE__) + "/../data/cooking_result.csv")
-
+
output.should == correct_output
end
end

0 comments on commit 4255a67

Please sign in to comment.