From 3f93860e1359a49f8063e7c02e193b08f5dd5046 Mon Sep 17 00:00:00 2001
From: Ben Prew <ben@throwingbones.com>
Date: Fri, 31 Jan 2020 15:10:11 -0800
Subject: [PATCH 1/8] Pin rubies to OS default versions

---
 .ruby-version |  2 +-
 .travis.yml   | 11 ++++++++---
 2 files changed, 9 insertions(+), 4 deletions(-)

diff --git a/.ruby-version b/.ruby-version
index 76521af..633c00d 100644
--- a/.ruby-version
+++ b/.ruby-version
@@ -1 +1 @@
-ruby-2.2.0
+2.0.0-p648
diff --git a/.travis.yml b/.travis.yml
index 86cd0b0..a86d447 100644
--- a/.travis.yml
+++ b/.travis.yml
@@ -1,6 +1,11 @@
 language: ruby
 rvm:
-  - 2.0.0
-  - 2.3.3
-  - 2.4.0
+  # Mac High Sierra
+  - 2.0.0-p648
+  # Mac Mojave
+  - 2.3.7
+  # Ubuntu 19.10
+  - 2.5
+  # Mac Catalina
+  - 2.6
 script: "bundle exec rake"

From 7e15a61006ace84f722f348c06bbc75b9f6ea138 Mon Sep 17 00:00:00 2001
From: Ben Prew <ben@throwingbones.com>
Date: Fri, 31 Jan 2020 15:12:00 -0800
Subject: [PATCH 2/8] Update gems to higest version supported by Ruby 2.0.  Add
 pry to devel gems

---
 Gemfile        |  1 -
 Gemfile.lock   | 42 ++++++++++++++++++++++++++++--------------
 reckon.gemspec |  4 +---
 3 files changed, 29 insertions(+), 18 deletions(-)

diff --git a/Gemfile b/Gemfile
index 5378d16..22e17d6 100644
--- a/Gemfile
+++ b/Gemfile
@@ -1,5 +1,4 @@
 source "http://rubygems.org"
-
 gemspec
 
 gem 'rake'
diff --git a/Gemfile.lock b/Gemfile.lock
index e69c33a..ba2d0a5 100644
--- a/Gemfile.lock
+++ b/Gemfile.lock
@@ -3,7 +3,6 @@ PATH
   specs:
     reckon (0.4.4)
       chronic (>= 0.3.0)
-      fastercsv (>= 1.5.1)
       highline (>= 1.5.2)
       terminal-table (>= 1.4.2)
 
@@ -11,24 +10,39 @@ GEM
   remote: http://rubygems.org/
   specs:
     chronic (0.10.2)
-    diff-lcs (1.1.3)
-    fastercsv (1.5.5)
-    highline (1.6.21)
-    rake (10.0.4)
-    rspec (2.11.0)
-      rspec-core (~> 2.11.0)
-      rspec-expectations (~> 2.11.0)
-      rspec-mocks (~> 2.11.0)
-    rspec-core (2.11.1)
-    rspec-expectations (2.11.2)
-      diff-lcs (~> 1.1.3)
-    rspec-mocks (2.11.1)
-    terminal-table (1.4.5)
+    coderay (1.1.2)
+    diff-lcs (1.3)
+    highline (2.0.3)
+    method_source (0.9.2)
+    pry (0.12.2)
+      coderay (~> 1.1.0)
+      method_source (~> 0.9.0)
+    rake (12.3.3)
+    rspec (3.9.0)
+      rspec-core (~> 3.9.0)
+      rspec-expectations (~> 3.9.0)
+      rspec-mocks (~> 3.9.0)
+    rspec-core (3.9.1)
+      rspec-support (~> 3.9.1)
+    rspec-expectations (3.9.0)
+      diff-lcs (>= 1.2.0, < 2.0)
+      rspec-support (~> 3.9.0)
+    rspec-mocks (3.9.1)
+      diff-lcs (>= 1.2.0, < 2.0)
+      rspec-support (~> 3.9.0)
+    rspec-support (3.9.2)
+    terminal-table (1.8.0)
+      unicode-display_width (~> 1.1, >= 1.1.1)
+    unicode-display_width (1.6.1)
 
 PLATFORMS
   ruby
 
 DEPENDENCIES
+  pry (>= 0.12.2)
   rake
   reckon!
   rspec (>= 1.2.9)
+
+BUNDLED WITH
+   1.17.3
diff --git a/reckon.gemspec b/reckon.gemspec
index ddc6af4..9b44164 100644
--- a/reckon.gemspec
+++ b/reckon.gemspec
@@ -1,4 +1,3 @@
-# -*- encoding: utf-8 -*-
 $:.push File.expand_path("../lib", __FILE__)
 
 Gem::Specification.new do |s|
@@ -16,9 +15,8 @@ Gem::Specification.new do |s|
   s.require_paths = ["lib"]
 
   s.add_development_dependency "rspec", ">= 1.2.9"
-  s.add_runtime_dependency "fastercsv", ">= 1.5.1"
+  s.add_development_dependency "pry", ">= 0.12.2"
   s.add_runtime_dependency "chronic", ">= 0.3.0"
   s.add_runtime_dependency "highline", ">= 1.5.2"
   s.add_runtime_dependency "terminal-table", ">= 1.4.2"
 end
-

From 5ce43ae0b36ee25cc313bd63a85129a7b7c49783 Mon Sep 17 00:00:00 2001
From: Ben Prew <ben@throwingbones.com>
Date: Fri, 31 Jan 2020 15:14:06 -0800
Subject: [PATCH 3/8] bug: fix order-dependent test

Sort isn't stable, so sorting by date in each_with_backwards meant that the "Book Store"
transaction wasn't always row 7, so look for the string, instead of by index.
---
 spec/reckon/app_spec.rb | 16 +++++++++-------
 1 file changed, 9 insertions(+), 7 deletions(-)

diff --git a/spec/reckon/app_spec.rb b/spec/reckon/app_spec.rb
index acb05dc..f39b674 100644
--- a/spec/reckon/app_spec.rb
+++ b/spec/reckon/app_spec.rb
@@ -1,5 +1,4 @@
 #!/usr/bin/env ruby
-# encoding: utf-8
 
 require "spec_helper"
 require 'rubygems'
@@ -8,10 +7,10 @@
 describe Reckon::App do
   context 'with chase csv input' do
     before do
-      @chase = Reckon::App.new(:string => BANK_CSV)
-      @chase.learn_from( BANK_LEDGER )
+      @chase = Reckon::App.new(string: BANK_CSV)
+      @chase.learn_from(BANK_LEDGER)
       @rows = []
-      @chase.each_row_backwards { |row| @rows.push( row ) }
+      @chase.each_row_backwards { |row| @rows.push(row) }
     end
 
     describe "each_row_backwards" do
@@ -27,7 +26,11 @@
 
     describe "weighted_account_match" do
       it "should guess the correct account" do
-        @chase.weighted_account_match( @rows[7] ).first[:account].should == "Expenses:Books"
+        row = @rows.find { |n| n[:description] =~ /Book Store/ }
+
+        result = @chase.weighted_account_match(row).first
+        result[:account].should == "Expenses:Books"
+        result[:cosine].should > 0.0
       end
     end
   end
@@ -95,6 +98,5 @@
 2004/05/27 Book Store
   Expenses:Books                 $20.00
   Liabilities:MasterCard
-  LEDGER
-
+LEDGER
 end

From 718909de827f0e757cda16be93bcd8ba6321113a Mon Sep 17 00:00:00 2001
From: Ben Prew <ben@throwingbones.com>
Date: Fri, 31 Jan 2020 15:16:12 -0800
Subject: [PATCH 4/8] bug: fix order-dependent test by choosing the lowest
 index date column first

Sorting by date_score isn't stable, so either date field for Broker Canada data could've
been returned. Added index to the sort key to use the column that came first.  This
behavior matches the 3-4 csv files I process from my financial institutions.
---
 lib/reckon/csv_parser.rb | 21 +++++++++------------
 1 file changed, 9 insertions(+), 12 deletions(-)

diff --git a/lib/reckon/csv_parser.rb b/lib/reckon/csv_parser.rb
index d00dc17..d820119 100644
--- a/lib/reckon/csv_parser.rb
+++ b/lib/reckon/csv_parser.rb
@@ -192,20 +192,17 @@ def detect_columns
         end
       end
 
-      results.reject! {|i| money_column_indices.include?(i[:index]) }
-      self.date_column_index = results.sort { |a, b| b[:date_score] <=> a[:date_score] }.first[:index]
-      results.reject! {|i| i[:index] == date_column_index }
-      @date_column = DateColumn.new( columns[ self.date_column_index ], @options )
-
-      if ( money_column_indices.length == 1 )
-        @money_column = MoneyColumn.new( columns[money_column_indices[0]],
-                                        @options )
+      results.reject! { |i| money_column_indices.include?(i[:index]) }
+      # sort by highest score followed by lowest index
+      @date_column_index = results.max_by { |n| [n[:date_score], -n[:index]] }[:index]
+      results.reject! { |i| i[:index] == date_column_index }
+      @date_column = DateColumn.new(columns[date_column_index], @options)
+
+      @money_column = MoneyColumn.new(columns[money_column_indices[0]], @options)
+      if money_column_indices.length == 1
         detect_sign_column if @money_column.positive?
       else
-        @money_column = MoneyColumn.new( columns[money_column_indices[0]],
-                                        @options )
-        @money_column.merge!(
-          MoneyColumn.new( columns[money_column_indices[1]], @options ) )
+        @money_column.merge! MoneyColumn.new(columns[money_column_indices[1]], @options)
       end
 
       self.description_column_indices = results.map { |i| i[:index] }

From 9c953645d1b63145900b528dcc1cec1be6de90ce Mon Sep 17 00:00:00 2001
From: Ben Prew <ben@throwingbones.com>
Date: Fri, 31 Jan 2020 15:24:49 -0800
Subject: [PATCH 5/8] Remove fastercsv, ruby 2.0 is our minimum version.

High Sierra installs 2.0, so it's unlikely that someone would have a ruby < 2.0
installed. High Sierra is 2 versions behind the current OSx version (Catalina).
---
 lib/reckon.rb            | 11 +++--------
 lib/reckon/csv_parser.rb | 13 +++----------
 2 files changed, 6 insertions(+), 18 deletions(-)

diff --git a/lib/reckon.rb b/lib/reckon.rb
index 9d33b3c..6b0d17b 100755
--- a/lib/reckon.rb
+++ b/lib/reckon.rb
@@ -1,19 +1,14 @@
 #!/usr/bin/env ruby
 
 require 'rubygems'
-if RUBY_VERSION =~ /^1\.9/ || RUBY_VERSION =~ /^2/
-  require 'csv'
-else
-  require 'fastercsv'
-end
+require 'chronic'
+require 'csv'
 require 'highline/import'
 require 'optparse'
-require 'chronic'
-require 'time'
 require 'terminal-table'
+require 'time'
 
 require File.expand_path(File.join(File.dirname(__FILE__), "reckon", "app"))
 require File.expand_path(File.join(File.dirname(__FILE__), "reckon", "ledger_parser"))
 require File.expand_path(File.join(File.dirname(__FILE__), "reckon", "csv_parser"))
 require File.expand_path(File.join(File.dirname(__FILE__), "reckon", "money"))
-
diff --git a/lib/reckon/csv_parser.rb b/lib/reckon/csv_parser.rb
index d820119..b7341a1 100644
--- a/lib/reckon/csv_parser.rb
+++ b/lib/reckon/csv_parser.rb
@@ -1,5 +1,4 @@
 #coding: utf-8
-require 'pp'
 
 module Reckon
   class CSVParser
@@ -227,21 +226,15 @@ def columns
 
     def parse
       data = options[:string] || File.read(options[:file])
-
-      if RUBY_VERSION =~ /^1\.9/ || RUBY_VERSION =~ /^2/
-        data = data.force_encoding(options[:encoding] || 'BINARY').encode('UTF-8', :invalid => :replace, :undef => :replace, :replace => '?')
-        csv_engine = CSV
-      else
-        csv_engine = FasterCSV
-      end
-
-      @csv_data = csv_engine.parse data.strip, :col_sep => options[:csv_separator] || ','
+      data = data.force_encoding(options[:encoding] || 'BINARY').encode('UTF-8', :invalid => :replace, :undef => :replace, :replace => '?')
+      @csv_data = CSV.parse data.strip, :col_sep => options[:csv_separator] || ','
       if options[:contains_header]
         options[:contains_header].times { csv_data.shift }
       end
       csv_data
     end
 
+
     @settings = { :testing => false }
 
     def self.settings

From e7753929b924dccfa1ffff093158ee4dcaf1dfb3 Mon Sep 17 00:00:00 2001
From: Ben Prew <ben@throwingbones.com>
Date: Fri, 31 Jan 2020 15:53:20 -0800
Subject: [PATCH 6/8] bug: don't try to parse rows that the user considers
 header rows

Since we throw them away anyway, we should just skip them
---
 lib/reckon/csv_parser.rb       | 11 ++++++-----
 spec/reckon/csv_parser_spec.rb | 18 ++++++++++++++++++
 2 files changed, 24 insertions(+), 5 deletions(-)

diff --git a/lib/reckon/csv_parser.rb b/lib/reckon/csv_parser.rb
index b7341a1..f91e766 100644
--- a/lib/reckon/csv_parser.rb
+++ b/lib/reckon/csv_parser.rb
@@ -225,15 +225,16 @@ def columns
     end
 
     def parse
+      rows = []
       data = options[:string] || File.read(options[:file])
       data = data.force_encoding(options[:encoding] || 'BINARY').encode('UTF-8', :invalid => :replace, :undef => :replace, :replace => '?')
-      @csv_data = CSV.parse data.strip, :col_sep => options[:csv_separator] || ','
-      if options[:contains_header]
-        options[:contains_header].times { csv_data.shift }
+      data.each_line.with_index do |line, i|
+        next if i < (options[:contains_header] || 0)
+        rows << CSV.parse_line(line, col_sep: options[:csv_separator] || ',')
       end
-      csv_data
-    end
 
+      @csv_data = rows
+    end
 
     @settings = { :testing => false }
 
diff --git a/spec/reckon/csv_parser_spec.rb b/spec/reckon/csv_parser_spec.rb
index c623e81..af1f8e5 100755
--- a/spec/reckon/csv_parser_spec.rb
+++ b/spec/reckon/csv_parser_spec.rb
@@ -42,6 +42,24 @@
     it "should work with other separators" do
       Reckon::CSVParser.new(:string => "one;two\nthree;four", :csv_separator => ';').columns.should == [['one', 'three'], ['two', 'four']]
     end
+
+    describe 'file with invalid csv in header' do
+      file = %q(
+
+="0234500012345678";21/11/2015;19/02/2016;36;19/02/2016;1234,37 EUR
+
+Date de l'opération;Libellé;Détail de l'écriture;Montant de l'opération;Devise
+19/02/2016;VIR RECU 508160;VIR RECU 1234567834S DE: Francois REF: 123457891234567894561231 PROVENANCE: DE Allemagne ;50,00;EUR
+18/02/2016;COTISATION JAZZ;COTISATION JAZZ ;-8,10;EUR
+)
+      it 'should ignore invalid header lines' do
+        Reckon::CSVParser.new(string: file, contains_header: 4)
+      end
+
+      it 'should fail' do
+        expect { Reckon::CSVParser.new(string: file, contains_header: 1) }.to raise_error(CSV::MalformedCSVError)
+      end
+    end
   end
 
   describe "columns" do

From deba42b8065565f014e1e299d6efeacc35d52584 Mon Sep 17 00:00:00 2001
From: Ben Prew <ben@throwingbones.com>
Date: Fri, 31 Jan 2020 15:56:36 -0800
Subject: [PATCH 7/8] Use CharDet to detect char encoding, strip BOM from file

If the user doesn't pass an encoding option, we try to determine the encoding of the
file using CharDet, then convert it to UTF-8 before parsing it as CSV.  Also, strip the
BOM, if it exists.  Fall back to BINARY as a last resort
---
 Gemfile.lock                         |  2 ++
 lib/reckon.rb                        |  1 +
 lib/reckon/csv_parser.rb             | 19 ++++++++++++++-----
 reckon.gemspec                       |  1 +
 spec/data_fixtures/bom_utf8_file.csv |  1 +
 spec/reckon/csv_parser_spec.rb       | 17 ++++++++++++++++-
 6 files changed, 35 insertions(+), 6 deletions(-)
 create mode 100644 spec/data_fixtures/bom_utf8_file.csv

diff --git a/Gemfile.lock b/Gemfile.lock
index ba2d0a5..42cf42f 100644
--- a/Gemfile.lock
+++ b/Gemfile.lock
@@ -4,6 +4,7 @@ PATH
     reckon (0.4.4)
       chronic (>= 0.3.0)
       highline (>= 1.5.2)
+      rchardet (>= 1.8.0)
       terminal-table (>= 1.4.2)
 
 GEM
@@ -18,6 +19,7 @@ GEM
       coderay (~> 1.1.0)
       method_source (~> 0.9.0)
     rake (12.3.3)
+    rchardet (1.8.0)
     rspec (3.9.0)
       rspec-core (~> 3.9.0)
       rspec-expectations (~> 3.9.0)
diff --git a/lib/reckon.rb b/lib/reckon.rb
index 6b0d17b..b0a0097 100755
--- a/lib/reckon.rb
+++ b/lib/reckon.rb
@@ -1,6 +1,7 @@
 #!/usr/bin/env ruby
 
 require 'rubygems'
+require 'rchardet'
 require 'chronic'
 require 'csv'
 require 'highline/import'
diff --git a/lib/reckon/csv_parser.rb b/lib/reckon/csv_parser.rb
index f91e766..a561d6e 100644
--- a/lib/reckon/csv_parser.rb
+++ b/lib/reckon/csv_parser.rb
@@ -7,7 +7,7 @@ class CSVParser
     def initialize(options = {})
       self.options = options
       self.options[:currency] ||= '$'
-      parse
+      @csv_data = parse(options[:string] || File.read(options[:file]))
       filter_csv
       detect_columns
     end
@@ -224,16 +224,25 @@ def columns
       end
     end
 
-    def parse
+    def parse(data)
+      # Use force_encoding to convert the string to utf-8 with as few invalid characters
+      # as possible.
+      data.force_encoding(try_encoding(data))
+      data = data.encode('UTF-8', invalid: :replace, undef: :replace, replace: '?')
+      data.sub!("\xEF\xBB\xBF", '') # strip byte order marker, if it exists
+
       rows = []
-      data = options[:string] || File.read(options[:file])
-      data = data.force_encoding(options[:encoding] || 'BINARY').encode('UTF-8', :invalid => :replace, :undef => :replace, :replace => '?')
       data.each_line.with_index do |line, i|
         next if i < (options[:contains_header] || 0)
         rows << CSV.parse_line(line, col_sep: options[:csv_separator] || ',')
       end
 
-      @csv_data = rows
+      rows
+    end
+
+    def try_encoding(data)
+      cd = CharDet.detect(data)
+      options[:encoding] || cd['encoding'] || 'BINARY'
     end
 
     @settings = { :testing => false }
diff --git a/reckon.gemspec b/reckon.gemspec
index 9b44164..74b8bf5 100644
--- a/reckon.gemspec
+++ b/reckon.gemspec
@@ -19,4 +19,5 @@ Gem::Specification.new do |s|
   s.add_runtime_dependency "chronic", ">= 0.3.0"
   s.add_runtime_dependency "highline", ">= 1.5.2"
   s.add_runtime_dependency "terminal-table", ">= 1.4.2"
+  s.add_runtime_dependency "rchardet", ">= 1.8.0"
 end
diff --git a/spec/data_fixtures/bom_utf8_file.csv b/spec/data_fixtures/bom_utf8_file.csv
new file mode 100644
index 0000000..5ad16fc
--- /dev/null
+++ b/spec/data_fixtures/bom_utf8_file.csv
@@ -0,0 +1 @@
+﻿"Date","Time","TimeZone","Name","Type","Status","Currency","Gross","Fee","Net","From Email Address","To Email Address","Transaction ID","Shipping Address","Address Status","Item Title","Item ID","Shipping and Handling Amount","Insurance Amount","Sales Tax","Option 1 Name","Option 1 Value","Option 2 Name","Option 2 Value","Reference Txn ID","Invoice Number","Custom Number","Quantity","Receipt ID","Balance","Address Line 1","Address Line 2/District/Neighborhood","Town/City","State/Province/Region/County/Territory/Prefecture/Republic","Zip/Postal Code","Country","Contact Phone Number","Subject","Note","Country Code","Balance Impact"
diff --git a/spec/reckon/csv_parser_spec.rb b/spec/reckon/csv_parser_spec.rb
index af1f8e5..6c29ea2 100755
--- a/spec/reckon/csv_parser_spec.rb
+++ b/spec/reckon/csv_parser_spec.rb
@@ -33,16 +33,31 @@
   end
 
   describe "parse" do
+    it "should use binary encoding if none specified and chardet fails" do
+      allow(CharDet).to receive(:detect).and_return({'encoding' => nil})
+      app = Reckon::CSVParser.new(:file => File.expand_path(File.join(File.dirname(__FILE__), "..", "data_fixtures", "extratofake.csv")))
+      expect(app.try_encoding("foobarbaz")).to eq("BINARY")
+    end
     it "should work with foreign character encodings" do
       app = Reckon::CSVParser.new(:file => File.expand_path(File.join(File.dirname(__FILE__), "..", "data_fixtures", "extratofake.csv")))
       app.columns[0][0..2].should == ["Data", "10/31/2012", "11/01/2012"]
-      app.columns[2].first.should == "Hist?rico"
+      app.columns[2].first.should == "Histórico"
     end
 
     it "should work with other separators" do
       Reckon::CSVParser.new(:string => "one;two\nthree;four", :csv_separator => ';').columns.should == [['one', 'three'], ['two', 'four']]
     end
 
+    it 'should parse quoted lines' do
+      file = %q("30.03.2015";"29.03.2015";"09.04.2015";"BARAUSZAHLUNGSENTGELT";"5266 xxxx xxxx 9454";"";"0";"EUR";"0,00";"EUR";"-3,50";"0")
+      Reckon::CSVParser.new(string: file, csv_separator: ';', comma_separates_cents: true).columns.length.should == 12
+    end
+
+    it 'should parse csv with BOM' do
+      file = File.expand_path(File.join(File.dirname(__FILE__), "..", "data_fixtures", "bom_utf8_file.csv"))
+      Reckon::CSVParser.new(file: file).columns.length.should == 41
+    end
+
     describe 'file with invalid csv in header' do
       file = %q(
 

From 0e9e9771e0b4fc2519664fcd95e8d2dcba9dbd2b Mon Sep 17 00:00:00 2001
From: Ben Prew <ben@throwingbones.com>
Date: Fri, 31 Jan 2020 15:58:30 -0800
Subject: [PATCH 8/8] Minor cleanup, use require_relative where appropriate

---
 lib/reckon/csv_parser.rb       | 2 +-
 spec/reckon/csv_parser_spec.rb | 6 +++---
 2 files changed, 4 insertions(+), 4 deletions(-)

diff --git a/lib/reckon/csv_parser.rb b/lib/reckon/csv_parser.rb
index a561d6e..a9407a2 100644
--- a/lib/reckon/csv_parser.rb
+++ b/lib/reckon/csv_parser.rb
@@ -159,7 +159,7 @@ def detect_sign_column
 
     def detect_columns
       results, found_likely_money_column = evaluate_columns(columns)
-      self.money_column_indices = [ results.sort { |a, b| b[:money_score] <=> a[:money_score] }.first[:index] ]
+      self.money_column_indices = [ results.max_by { |n| n[:money_score] }[:index] ]
 
       if !found_likely_money_column
         found_likely_double_money_columns = false
diff --git a/spec/reckon/csv_parser_spec.rb b/spec/reckon/csv_parser_spec.rb
index 6c29ea2..319a541 100755
--- a/spec/reckon/csv_parser_spec.rb
+++ b/spec/reckon/csv_parser_spec.rb
@@ -1,9 +1,9 @@
 #!/usr/bin/env ruby
-# encoding: utf-8
+# coding: utf-8
 
-require "spec_helper"
+require_relative "../spec_helper"
 require 'rubygems'
-require 'reckon'
+require_relative '../../lib/reckon'
 
 Reckon::CSVParser.settings[:testing] = true