Browse files

Cater for carriage return characters within csv data when guessing li…

…ne ending
  • Loading branch information...
1 parent 8fd8422 commit 95c50629b7d343ad13521951c76b5e4e4793aea7 @chrismhilton chrismhilton committed Feb 14, 2014
View
12 lib/smarter_csv/smarter_csv.rb
@@ -21,7 +21,7 @@ def SmarterCSV.process(input, options={}, &block) # first parameter: filename
f = input.respond_to?(:readline) ? input : File.open(input, "r:#{options[:file_encoding]}")
if options[:row_sep] == :auto
- options[:row_sep] = SmarterCSV.guess_line_ending( f )
+ options[:row_sep] = SmarterCSV.guess_line_ending( f, options )
f.rewind
end
$/ = options[:row_sep]
@@ -201,12 +201,16 @@ def self.only_or_except_limit_execution( options, option_name, key )
end
# limitation: this currently reads the whole file in before making a decision
- def self.guess_line_ending( filehandle )
+ def self.guess_line_ending( filehandle, options )
counts = {"\n" => 0 , "\r" => 0, "\r\n" => 0}
+ quoted_char = false
+ # count how many of the pre-defined line-endings we find
+ # ignoring those contained within quote characters
filehandle.each_char do |c|
- next if c !~ /\r|\n|\r\n/
- counts[c] += 1 # count how many of the pre-defined line-endings we find
+ quoted_char = !quoted_char if c == options[:quote_char]
+ next if quoted_char || c !~ /\r|\n|\r\n/
+ counts[c] += 1
end
# find the key/value pair with the largest counter:
k,v = counts.max_by{|k,v| v}
View
3 spec/fixtures/carriage_returns_quoted.csv
@@ -0,0 +1,3 @@
+Band,Members,Albums
+New Order,"Bernard Sumner Peter Hook Stephen Morris Gillian Gilbert","Movement Power, Corruption and Lies Low-Life Brotherhood Substance"
+Led Zeppelin,"Jimmy Page Robert Plant John Bonham John Paul Jones","Led Zeppelin Led Zeppelin II Led Zeppelin III Led Zeppelin IV"
View
26 spec/smarter_csv/carriage_return_spec.rb
@@ -71,6 +71,19 @@
data[7][:city].should == "Liverpool"
end
+ it 'should process a file with more quoted text carriage return characters (\r) than line ending characters (\n)' do
+ row_sep = "\n"
+ text_sep = "\r"
+ data = SmarterCSV.process("#{fixture_path}/carriage_returns_quoted.csv", {:row_sep => row_sep})
+ data.flatten.size.should == 2
+ data[0][:band].should == "New Order"
+ data[0][:members].should == ["Bernard Sumner", "Peter Hook", "Stephen Morris", "Gillian Gilbert"].join(text_sep)
+ data[0][:albums].should == ["Movement", "Power, Corruption and Lies", "Low-Life", "Brotherhood", "Substance"].join(text_sep)
+ data[1][:band].should == "Led Zeppelin"
+ data[1][:members].should == ["Jimmy Page", "Robert Plant", "John Bonham", "John Paul Jones"].join(text_sep)
+ data[1][:albums].should == ["Led Zeppelin", "Led Zeppelin II", "Led Zeppelin III", "Led Zeppelin IV"].join(text_sep)
+ end
+
end
describe 'process files with line endings in automatic mode' do
@@ -141,4 +154,17 @@
data[7][:city].should == "Liverpool"
end
+ it 'should process a file with more quoted text carriage return characters (\r) than line ending characters (\n)' do
+ row_sep = "\n"
+ text_sep = "\r"
+ data = SmarterCSV.process("#{fixture_path}/carriage_returns_quoted.csv", {:row_sep => :auto})
+ data.flatten.size.should == 2
+ data[0][:band].should == "New Order"
+ data[0][:members].should == ["Bernard Sumner", "Peter Hook", "Stephen Morris", "Gillian Gilbert"].join(text_sep)
+ data[0][:albums].should == ["Movement", "Power, Corruption and Lies", "Low-Life", "Brotherhood", "Substance"].join(text_sep)
+ data[1][:band].should == "Led Zeppelin"
+ data[1][:members].should == ["Jimmy Page", "Robert Plant", "John Bonham", "John Paul Jones"].join(text_sep)
+ data[1][:albums].should == ["Led Zeppelin", "Led Zeppelin II", "Led Zeppelin III", "Led Zeppelin IV"].join(text_sep)
+ end
+
end

0 comments on commit 95c5062

Please sign in to comment.