From f75a52d19c5a7db6e6f7c89ac4000e14e4dd437c Mon Sep 17 00:00:00 2001 From: Henrik N Date: Fri, 24 Aug 2012 11:46:38 +0200 Subject: [PATCH] Input data string can be any encoding. --- README.md | 11 +++++++---- lib/bankgiro_inbetalningar.rb | 2 +- lib/bankgiro_inbetalningar/parser.rb | 2 +- spec/bankgiro_inbetalningar/parser_spec.rb | 12 ++++++++++-- spec/bankgiro_inbetalningar_spec.rb | 2 +- spec/spec_helper.rb | 4 ++++ 6 files changed, 24 insertions(+), 9 deletions(-) diff --git a/README.md b/README.md index 301c8bc..629f838 100644 --- a/README.md +++ b/README.md @@ -23,9 +23,9 @@ Or install it yourself as: Use the convenience method `BankgiroInbetalningar.parse` to parse a file: ```ruby -res = BankgiroInbetalningar.parse('BgMaxfil4.txt') +res = BankgiroInbetalningar.parse("BgMaxfil4.txt") # Or -data = File.read("BgMaxfil4.txt") +data = File.read("BgMaxfil4.txt").force_encoding("ISO-8859-1") res = BankgiroInbetalningar.parse_data(data) raise "oops" unless res.valid? @@ -44,8 +44,11 @@ res.payments.each do |p| end ``` -See the specs for more details. Note that all text is in UTF-8, as it should be, -and not in ISO-8859-1 as Bankgirot prefers. It is the 21st century. +See the specs for more details. + +Files are expected to be ISO-8859-1 (as Bankgirot prefers), but data strings +can be in any encoding, as long as `String#encoding` is correct. The library +returns UTF-8. It *is* the 21st century. ## Todo / Missing features diff --git a/lib/bankgiro_inbetalningar.rb b/lib/bankgiro_inbetalningar.rb index 70c2e32..118126d 100644 --- a/lib/bankgiro_inbetalningar.rb +++ b/lib/bankgiro_inbetalningar.rb @@ -4,7 +4,7 @@ module BankgiroInbetalningar def self.parse(filename) - data = File.read(filename) + data = File.read(filename).force_encoding("ISO-8859-1") parse_data(data) end diff --git a/lib/bankgiro_inbetalningar/parser.rb b/lib/bankgiro_inbetalningar/parser.rb index 557351f..55cbbc5 100644 --- a/lib/bankgiro_inbetalningar/parser.rb +++ b/lib/bankgiro_inbetalningar/parser.rb @@ -5,7 +5,7 @@ class Parser attr_accessor :result def initialize(data) - @raw_data ||= data.encode('utf-8', 'iso-8859-1') + @raw_data ||= data.encode("UTF-8") end def run diff --git a/spec/bankgiro_inbetalningar/parser_spec.rb b/spec/bankgiro_inbetalningar/parser_spec.rb index 6551ff0..f2c1928 100644 --- a/spec/bankgiro_inbetalningar/parser_spec.rb +++ b/spec/bankgiro_inbetalningar/parser_spec.rb @@ -4,10 +4,18 @@ module BankgiroInbetalningar describe Parser do context "parsing sample file 4" do - let(:data) { File.read(fixture_path('BgMaxfil4.txt')) } + let(:data) { data_from_file('BgMaxfil4.txt') } let(:parser) { Parser.new(data) } let(:result) { parser.run ; parser.result } + context "with non Latin-1 data" do + let(:data) { data_from_file('BgMaxfil4.txt').encode("UTF-8") } + + it "handles that fine" do + result.payments[1].payer.name.should include "Olles färg" + end + end + it "returns valid results" do result.should be_valid end @@ -74,7 +82,7 @@ module BankgiroInbetalningar end context "parsing a broken sample file 4" do - let(:data) { File.read(fixture_path('BgMaxfil4_broken.txt')) } + let(:data) { data_from_file('BgMaxfil4_broken.txt') } let(:parser) { Parser.new(data) } let(:result) { parser.run ; parser.result } diff --git a/spec/bankgiro_inbetalningar_spec.rb b/spec/bankgiro_inbetalningar_spec.rb index 09392fb..9d9c5e2 100644 --- a/spec/bankgiro_inbetalningar_spec.rb +++ b/spec/bankgiro_inbetalningar_spec.rb @@ -18,7 +18,7 @@ describe BankgiroInbetalningar, ".parse_data" do context "parsing a minimal file" do - let(:data) { File.read(fixture_path('minimal.txt')) } + let(:data) { data_from_file('minimal.txt') } subject { BankgiroInbetalningar.parse_data(data) } it "finds the timestamp" do diff --git a/spec/spec_helper.rb b/spec/spec_helper.rb index 053906a..a7efd06 100644 --- a/spec/spec_helper.rb +++ b/spec/spec_helper.rb @@ -1,6 +1,10 @@ $: << File.expand_path('../../lib', __FILE__) require 'bankgiro_inbetalningar' +def data_from_file(name) + File.read(fixture_path(name)).force_encoding("ISO-8859-1") +end + def fixture_path(name) File.expand_path("../fixtures/#{name}", __FILE__) end