cantino · benprew · Feb 1, 2020 · Jan 31, 2020 · Jan 31, 2020 · Jan 31, 2020
diff --git a/.ruby-version b/.ruby-version
@@ -1 +1 @@
-ruby-2.2.0
+2.0.0-p648
diff --git a/.travis.yml b/.travis.yml
@@ -1,6 +1,11 @@
 language: ruby
 rvm:
-  - 2.0.0
-  - 2.3.3
-  - 2.4.0
+  # Mac High Sierra
+  - 2.0.0-p648
+  # Mac Mojave
+  - 2.3.7
+  # Ubuntu 19.10
+  - 2.5
+  # Mac Catalina
+  - 2.6
 script: "bundle exec rake"
diff --git a/Gemfile b/Gemfile
@@ -1,5 +1,4 @@
 source "http://rubygems.org"
-
 gemspec
 
 gem 'rake'
diff --git a/Gemfile.lock b/Gemfile.lock
@@ -3,32 +3,48 @@ PATH
   specs:
     reckon (0.4.4)
       chronic (>= 0.3.0)
-      fastercsv (>= 1.5.1)
       highline (>= 1.5.2)
+      rchardet (>= 1.8.0)
       terminal-table (>= 1.4.2)
 
 GEM
   remote: http://rubygems.org/
   specs:
     chronic (0.10.2)
-    diff-lcs (1.1.3)
-    fastercsv (1.5.5)
-    highline (1.6.21)
-    rake (10.0.4)
-    rspec (2.11.0)
-      rspec-core (~> 2.11.0)
-      rspec-expectations (~> 2.11.0)
-      rspec-mocks (~> 2.11.0)
-    rspec-core (2.11.1)
-    rspec-expectations (2.11.2)
-      diff-lcs (~> 1.1.3)
-    rspec-mocks (2.11.1)
-    terminal-table (1.4.5)
+    coderay (1.1.2)
+    diff-lcs (1.3)
+    highline (2.0.3)
+    method_source (0.9.2)
+    pry (0.12.2)
+      coderay (~> 1.1.0)
+      method_source (~> 0.9.0)
+    rake (12.3.3)
+    rchardet (1.8.0)
+    rspec (3.9.0)
+      rspec-core (~> 3.9.0)
+      rspec-expectations (~> 3.9.0)
+      rspec-mocks (~> 3.9.0)
+    rspec-core (3.9.1)
+      rspec-support (~> 3.9.1)
+    rspec-expectations (3.9.0)
+      diff-lcs (>= 1.2.0, < 2.0)
+      rspec-support (~> 3.9.0)
+    rspec-mocks (3.9.1)
+      diff-lcs (>= 1.2.0, < 2.0)
+      rspec-support (~> 3.9.0)
+    rspec-support (3.9.2)
+    terminal-table (1.8.0)
+      unicode-display_width (~> 1.1, >= 1.1.1)
+    unicode-display_width (1.6.1)
 
 PLATFORMS
   ruby
 
 DEPENDENCIES
+  pry (>= 0.12.2)
   rake
   reckon!
   rspec (>= 1.2.9)
+
+BUNDLED WITH
+   1.17.3
diff --git a/lib/reckon.rb b/lib/reckon.rb
@@ -1,19 +1,15 @@
 #!/usr/bin/env ruby
 
 require 'rubygems'
-if RUBY_VERSION =~ /^1\.9/ || RUBY_VERSION =~ /^2/
-  require 'csv'
-else
-  require 'fastercsv'
-end
+require 'rchardet'
+require 'chronic'
+require 'csv'
 require 'highline/import'
 require 'optparse'
-require 'chronic'
-require 'time'
 require 'terminal-table'
+require 'time'
 
 require File.expand_path(File.join(File.dirname(__FILE__), "reckon", "app"))
 require File.expand_path(File.join(File.dirname(__FILE__), "reckon", "ledger_parser"))
 require File.expand_path(File.join(File.dirname(__FILE__), "reckon", "csv_parser"))
 require File.expand_path(File.join(File.dirname(__FILE__), "reckon", "money"))
-
diff --git a/lib/reckon/csv_parser.rb b/lib/reckon/csv_parser.rb
@@ -1,5 +1,4 @@
 #coding: utf-8
-require 'pp'
 
 module Reckon
   class CSVParser
@@ -8,7 +7,7 @@ class CSVParser
     def initialize(options = {})
       self.options = options
       self.options[:currency] ||= '$'
-      parse
+      @csv_data = parse(options[:string] || File.read(options[:file]))
       filter_csv
       detect_columns
     end
@@ -160,7 +159,7 @@ def detect_sign_column
 
     def detect_columns
       results, found_likely_money_column = evaluate_columns(columns)
-      self.money_column_indices = [ results.sort { |a, b| b[:money_score] <=> a[:money_score] }.first[:index] ]
+      self.money_column_indices = [ results.max_by { |n| n[:money_score] }[:index] ]
 
       if !found_likely_money_column
         found_likely_double_money_columns = false
@@ -192,20 +191,17 @@ def detect_columns
         end
       end
 
-      results.reject! {|i| money_column_indices.include?(i[:index]) }
-      self.date_column_index = results.sort { |a, b| b[:date_score] <=> a[:date_score] }.first[:index]
-      results.reject! {|i| i[:index] == date_column_index }
-      @date_column = DateColumn.new( columns[ self.date_column_index ], @options )
+      results.reject! { |i| money_column_indices.include?(i[:index]) }
+      # sort by highest score followed by lowest index
+      @date_column_index = results.max_by { |n| [n[:date_score], -n[:index]] }[:index]
+      results.reject! { |i| i[:index] == date_column_index }
+      @date_column = DateColumn.new(columns[date_column_index], @options)
 
-      if ( money_column_indices.length == 1 )
-        @money_column = MoneyColumn.new( columns[money_column_indices[0]],
-                                        @options )
+      @money_column = MoneyColumn.new(columns[money_column_indices[0]], @options)
+      if money_column_indices.length == 1
         detect_sign_column if @money_column.positive?
       else
-        @money_column = MoneyColumn.new( columns[money_column_indices[0]],
-                                        @options )
-        @money_column.merge!(
-          MoneyColumn.new( columns[money_column_indices[1]], @options ) )
+        @money_column.merge! MoneyColumn.new(columns[money_column_indices[1]], @options)
       end
 
       self.description_column_indices = results.map { |i| i[:index] }
@@ -228,21 +224,25 @@ def columns
       end
     end
 
-    def parse
-      data = options[:string] || File.read(options[:file])
-
-      if RUBY_VERSION =~ /^1\.9/ || RUBY_VERSION =~ /^2/
-        data = data.force_encoding(options[:encoding] || 'BINARY').encode('UTF-8', :invalid => :replace, :undef => :replace, :replace => '?')
-        csv_engine = CSV
-      else
-        csv_engine = FasterCSV
+    def parse(data)
+      # Use force_encoding to convert the string to utf-8 with as few invalid characters
+      # as possible.
+      data.force_encoding(try_encoding(data))
+      data = data.encode('UTF-8', invalid: :replace, undef: :replace, replace: '?')
+      data.sub!("\xEF\xBB\xBF", '') # strip byte order marker, if it exists
+
+      rows = []
+      data.each_line.with_index do |line, i|
+        next if i < (options[:contains_header] || 0)
+        rows << CSV.parse_line(line, col_sep: options[:csv_separator] || ',')
       end
 
-      @csv_data = csv_engine.parse data.strip, :col_sep => options[:csv_separator] || ','
-      if options[:contains_header]
-        options[:contains_header].times { csv_data.shift }
-      end
-      csv_data
+      rows
+    end
+
+    def try_encoding(data)
+      cd = CharDet.detect(data)
+      options[:encoding] || cd['encoding'] || 'BINARY'
     end
 
     @settings = { :testing => false }

diff --git a/reckon.gemspec b/reckon.gemspec
@@ -1,4 +1,3 @@
-# -*- encoding: utf-8 -*-
 $:.push File.expand_path("../lib", __FILE__)
 
 Gem::Specification.new do |s|
@@ -16,9 +15,9 @@ Gem::Specification.new do |s|
   s.require_paths = ["lib"]
 
   s.add_development_dependency "rspec", ">= 1.2.9"
-  s.add_runtime_dependency "fastercsv", ">= 1.5.1"
+  s.add_development_dependency "pry", ">= 0.12.2"
   s.add_runtime_dependency "chronic", ">= 0.3.0"
   s.add_runtime_dependency "highline", ">= 1.5.2"
   s.add_runtime_dependency "terminal-table", ">= 1.4.2"
+  s.add_runtime_dependency "rchardet", ">= 1.8.0"
 end
-
diff --git a/spec/data_fixtures/bom_utf8_file.csv b/spec/data_fixtures/bom_utf8_file.csv
@@ -0,0 +1 @@
+"Date","Time","TimeZone","Name","Type","Status","Currency","Gross","Fee","Net","From Email Address","To Email Address","Transaction ID","Shipping Address","Address Status","Item Title","Item ID","Shipping and Handling Amount","Insurance Amount","Sales Tax","Option 1 Name","Option 1 Value","Option 2 Name","Option 2 Value","Reference Txn ID","Invoice Number","Custom Number","Quantity","Receipt ID","Balance","Address Line 1","Address Line 2/District/Neighborhood","Town/City","State/Province/Region/County/Territory/Prefecture/Republic","Zip/Postal Code","Country","Contact Phone Number","Subject","Note","Country Code","Balance Impact"
diff --git a/spec/reckon/app_spec.rb b/spec/reckon/app_spec.rb
@@ -1,5 +1,4 @@
 #!/usr/bin/env ruby
-# encoding: utf-8
 
 require "spec_helper"
 require 'rubygems'
@@ -8,10 +7,10 @@
 describe Reckon::App do
   context 'with chase csv input' do
     before do
-      @chase = Reckon::App.new(:string => BANK_CSV)
-      @chase.learn_from( BANK_LEDGER )
+      @chase = Reckon::App.new(string: BANK_CSV)
+      @chase.learn_from(BANK_LEDGER)
       @rows = []
-      @chase.each_row_backwards { |row| @rows.push( row ) }
+      @chase.each_row_backwards { |row| @rows.push(row) }
     end
 
     describe "each_row_backwards" do
@@ -27,7 +26,11 @@
 
     describe "weighted_account_match" do
       it "should guess the correct account" do
-        @chase.weighted_account_match( @rows[7] ).first[:account].should == "Expenses:Books"
+        row = @rows.find { |n| n[:description] =~ /Book Store/ }
+
+        result = @chase.weighted_account_match(row).first
+        result[:account].should == "Expenses:Books"
+        result[:cosine].should > 0.0
       end
     end
   end
@@ -95,6 +98,5 @@
 2004/05/27 Book Store
   Expenses:Books                 $20.00
   Liabilities:MasterCard
-  LEDGER
-
+LEDGER
 end
diff --git a/spec/reckon/csv_parser_spec.rb b/spec/reckon/csv_parser_spec.rb
@@ -1,9 +1,9 @@
 #!/usr/bin/env ruby
-# encoding: utf-8
+# coding: utf-8
 
-require "spec_helper"
+require_relative "../spec_helper"
 require 'rubygems'
-require 'reckon'
+require_relative '../../lib/reckon'
 
 Reckon::CSVParser.settings[:testing] = true
 
@@ -33,15 +33,48 @@
   end
 
   describe "parse" do
+    it "should use binary encoding if none specified and chardet fails" do
+      allow(CharDet).to receive(:detect).and_return({'encoding' => nil})
+      app = Reckon::CSVParser.new(:file => File.expand_path(File.join(File.dirname(__FILE__), "..", "data_fixtures", "extratofake.csv")))
+      expect(app.try_encoding("foobarbaz")).to eq("BINARY")
+    end
     it "should work with foreign character encodings" do
       app = Reckon::CSVParser.new(:file => File.expand_path(File.join(File.dirname(__FILE__), "..", "data_fixtures", "extratofake.csv")))
       app.columns[0][0..2].should == ["Data", "10/31/2012", "11/01/2012"]
-      app.columns[2].first.should == "Hist?rico"
+      app.columns[2].first.should == "Histórico"
     end
 
     it "should work with other separators" do
       Reckon::CSVParser.new(:string => "one;two\nthree;four", :csv_separator => ';').columns.should == [['one', 'three'], ['two', 'four']]
     end
+
+    it 'should parse quoted lines' do
+      file = %q("30.03.2015";"29.03.2015";"09.04.2015";"BARAUSZAHLUNGSENTGELT";"5266 xxxx xxxx 9454";"";"0";"EUR";"0,00";"EUR";"-3,50";"0")
+      Reckon::CSVParser.new(string: file, csv_separator: ';', comma_separates_cents: true).columns.length.should == 12
+    end
+
+    it 'should parse csv with BOM' do
+      file = File.expand_path(File.join(File.dirname(__FILE__), "..", "data_fixtures", "bom_utf8_file.csv"))
+      Reckon::CSVParser.new(file: file).columns.length.should == 41
+    end
+
+    describe 'file with invalid csv in header' do
+      file = %q(
+
+="0234500012345678";21/11/2015;19/02/2016;36;19/02/2016;1234,37 EUR
+
+Date de l'opération;Libellé;Détail de l'écriture;Montant de l'opération;Devise
+19/02/2016;VIR RECU 508160;VIR RECU 1234567834S DE: Francois REF: 123457891234567894561231 PROVENANCE: DE Allemagne ;50,00;EUR
+18/02/2016;COTISATION JAZZ;COTISATION JAZZ ;-8,10;EUR
+)
+      it 'should ignore invalid header lines' do
+        Reckon::CSVParser.new(string: file, contains_header: 4)
+      end
+
+      it 'should fail' do
+        expect { Reckon::CSVParser.new(string: file, contains_header: 1) }.to raise_error(CSV::MalformedCSVError)
+      end
+    end
   end
 
   describe "columns" do