Permalink
Browse files

Merge pull request #131 from tylergannon/master

	Added XLSX support, optional row validation, added SQLite insert_update_db
  • Loading branch information...
thbar committed Jul 2, 2013
2 parents af7ee0b + eb4f934 commit 15ace62d0c913f2db7dfc2be528fa7fdf2b87756
@@ -82,6 +82,8 @@ def flush
res.free
when "ActiveRecord::ConnectionAdapters::Mysql2Adapter"
res.each { none = false }
+ when "ActiveRecord::ConnectionAdapters::SQLite3Adapter"
+ res.each { none = false }
else raise "Unsupported adapter #{conn.class} for this destination"
end
View
@@ -309,12 +309,13 @@ def process_control(control)
control = ETL::Control::Control.resolve(control)
say_on_own_line "Processing control #{control.file}"
- ETL::Engine.job = ETL::Execution::Job.create!(
- :control_file => control.file,
- :status => 'executing',
- :batch_id => ETL::Engine.batch ? ETL::Engine.batch.id : nil
- )
-
+ ETL::Engine.job = ETL::Execution::Job.new.tap do |job|
+ job.control_file = control.file
+ job.status = 'executing'
+ job.batch_id = ETL::Engine.batch ? ETL::Engine.batch.id : nil
+ job.save!
+ end
+
execute_dependencies(control)
start_time = Time.now
@@ -10,6 +10,8 @@ def initialize(source, options={})
configure
end
+ attr_reader :validate_rows
+
def get_fields_names(file)
File.open(file) do |input|
fields = CSV.parse(input.readline, options).first
@@ -43,7 +45,7 @@ def each
end
line += 1
row = {}
- validate_row(raw_row, line, file)
+ validate_row(raw_row, line, file) if self.validate_rows
raw_row.each_with_index do |value, index|
f = fields[index]
row[f.name] = value
@@ -70,6 +72,12 @@ def validate_row(row, line, file)
end
def configure
+ @validate_rows = if source.configuration.has_key?(:validate_rows)
+ source.configuration[:validate_rows]
+ else
+ true
+ end
+
source.definition.each do |options|
case options
when Symbol
@@ -90,4 +98,4 @@ def initialize(name)
end
end
end
-end
+end
@@ -1,10 +1,10 @@
-optional_require 'spreadsheet'
+optional_require 'roo'
module ETL
module Parser
class ExcelParser < ETL::Parser::Parser
- attr_accessor :ignore_blank_line
+ attr_accessor :ignore_blank_line, :worksheet_column, :validate_rows
# Initialize the parser
# * <tt>source</tt>: The Source object
@@ -20,18 +20,27 @@ def each
ETL::Engine.logger.debug "parsing #{file}"
line = 0
lines_skipped = 0
- book = Spreadsheet.open file
+ book = Roo::Spreadsheet.open file
loopworksheets = []
if worksheets.empty?
- loopworksheets = book.worksheets
+ loopworksheets = book.sheets
else
worksheets.each do |index|
- loopworksheets << book.worksheet(index)
+ loopworksheets << book.sheet(index)
end
end
+
+ sheet_index = -1
- loopworksheets.each do |sheet|
+ book.each_with_pagename do |name, sheet|
+ sheet_index += 1
+ # puts "Sheet: #{name}"
+ # puts worksheets.inspect
+ if !worksheets.empty? && !worksheets.include?(sheet_index)
+ # puts "No!!! #{sheet_index.inspect}"
+ next
+ end
sheet.each do |raw_row|
if lines_skipped < source.skip_lines
ETL::Engine.logger.debug "skipping line"
@@ -44,11 +53,12 @@ def each
lines_skipped += 1
next
end
- validate_row(raw_row, line, file)
+ validate_row(raw_row, line, file) if self.validate_rows
raw_row.each_with_index do |value, index|
f = fields[index]
row[f.name] = value
end
+ row[worksheet_column] = name if worksheet_column
yield row
end
end
@@ -87,6 +97,12 @@ def configure
end unless source.definition[:worksheets].nil?
self.ignore_blank_line = source.definition[:ignore_blank_line]
+ self.worksheet_column = source.definition[:worksheet_column]
+ self.validate_rows = if source.configuration.has_key?(:validate_rows)
+ source.configuration[:validate_rows]
+ else
+ true
+ end
source.definition[:fields].each do |options|
case options

0 comments on commit 15ace62

Please sign in to comment.