diff --git a/.ruby-version b/.ruby-version index 76521af..633c00d 100644 --- a/.ruby-version +++ b/.ruby-version @@ -1 +1 @@ -ruby-2.2.0 +2.0.0-p648 diff --git a/.travis.yml b/.travis.yml index 86cd0b0..a86d447 100644 --- a/.travis.yml +++ b/.travis.yml @@ -1,6 +1,11 @@ language: ruby rvm: - - 2.0.0 - - 2.3.3 - - 2.4.0 + # Mac High Sierra + - 2.0.0-p648 + # Mac Mojave + - 2.3.7 + # Ubuntu 19.10 + - 2.5 + # Mac Catalina + - 2.6 script: "bundle exec rake" diff --git a/Gemfile b/Gemfile index 5378d16..22e17d6 100644 --- a/Gemfile +++ b/Gemfile @@ -1,5 +1,4 @@ source "http://rubygems.org" - gemspec gem 'rake' diff --git a/Gemfile.lock b/Gemfile.lock index e69c33a..42cf42f 100644 --- a/Gemfile.lock +++ b/Gemfile.lock @@ -3,32 +3,48 @@ PATH specs: reckon (0.4.4) chronic (>= 0.3.0) - fastercsv (>= 1.5.1) highline (>= 1.5.2) + rchardet (>= 1.8.0) terminal-table (>= 1.4.2) GEM remote: http://rubygems.org/ specs: chronic (0.10.2) - diff-lcs (1.1.3) - fastercsv (1.5.5) - highline (1.6.21) - rake (10.0.4) - rspec (2.11.0) - rspec-core (~> 2.11.0) - rspec-expectations (~> 2.11.0) - rspec-mocks (~> 2.11.0) - rspec-core (2.11.1) - rspec-expectations (2.11.2) - diff-lcs (~> 1.1.3) - rspec-mocks (2.11.1) - terminal-table (1.4.5) + coderay (1.1.2) + diff-lcs (1.3) + highline (2.0.3) + method_source (0.9.2) + pry (0.12.2) + coderay (~> 1.1.0) + method_source (~> 0.9.0) + rake (12.3.3) + rchardet (1.8.0) + rspec (3.9.0) + rspec-core (~> 3.9.0) + rspec-expectations (~> 3.9.0) + rspec-mocks (~> 3.9.0) + rspec-core (3.9.1) + rspec-support (~> 3.9.1) + rspec-expectations (3.9.0) + diff-lcs (>= 1.2.0, < 2.0) + rspec-support (~> 3.9.0) + rspec-mocks (3.9.1) + diff-lcs (>= 1.2.0, < 2.0) + rspec-support (~> 3.9.0) + rspec-support (3.9.2) + terminal-table (1.8.0) + unicode-display_width (~> 1.1, >= 1.1.1) + unicode-display_width (1.6.1) PLATFORMS ruby DEPENDENCIES + pry (>= 0.12.2) rake reckon! rspec (>= 1.2.9) + +BUNDLED WITH + 1.17.3 diff --git a/lib/reckon.rb b/lib/reckon.rb index 9d33b3c..b0a0097 100755 --- a/lib/reckon.rb +++ b/lib/reckon.rb @@ -1,19 +1,15 @@ #!/usr/bin/env ruby require 'rubygems' -if RUBY_VERSION =~ /^1\.9/ || RUBY_VERSION =~ /^2/ - require 'csv' -else - require 'fastercsv' -end +require 'rchardet' +require 'chronic' +require 'csv' require 'highline/import' require 'optparse' -require 'chronic' -require 'time' require 'terminal-table' +require 'time' require File.expand_path(File.join(File.dirname(__FILE__), "reckon", "app")) require File.expand_path(File.join(File.dirname(__FILE__), "reckon", "ledger_parser")) require File.expand_path(File.join(File.dirname(__FILE__), "reckon", "csv_parser")) require File.expand_path(File.join(File.dirname(__FILE__), "reckon", "money")) - diff --git a/lib/reckon/csv_parser.rb b/lib/reckon/csv_parser.rb index d00dc17..a9407a2 100644 --- a/lib/reckon/csv_parser.rb +++ b/lib/reckon/csv_parser.rb @@ -1,5 +1,4 @@ #coding: utf-8 -require 'pp' module Reckon class CSVParser @@ -8,7 +7,7 @@ class CSVParser def initialize(options = {}) self.options = options self.options[:currency] ||= '$' - parse + @csv_data = parse(options[:string] || File.read(options[:file])) filter_csv detect_columns end @@ -160,7 +159,7 @@ def detect_sign_column def detect_columns results, found_likely_money_column = evaluate_columns(columns) - self.money_column_indices = [ results.sort { |a, b| b[:money_score] <=> a[:money_score] }.first[:index] ] + self.money_column_indices = [ results.max_by { |n| n[:money_score] }[:index] ] if !found_likely_money_column found_likely_double_money_columns = false @@ -192,20 +191,17 @@ def detect_columns end end - results.reject! {|i| money_column_indices.include?(i[:index]) } - self.date_column_index = results.sort { |a, b| b[:date_score] <=> a[:date_score] }.first[:index] - results.reject! {|i| i[:index] == date_column_index } - @date_column = DateColumn.new( columns[ self.date_column_index ], @options ) + results.reject! { |i| money_column_indices.include?(i[:index]) } + # sort by highest score followed by lowest index + @date_column_index = results.max_by { |n| [n[:date_score], -n[:index]] }[:index] + results.reject! { |i| i[:index] == date_column_index } + @date_column = DateColumn.new(columns[date_column_index], @options) - if ( money_column_indices.length == 1 ) - @money_column = MoneyColumn.new( columns[money_column_indices[0]], - @options ) + @money_column = MoneyColumn.new(columns[money_column_indices[0]], @options) + if money_column_indices.length == 1 detect_sign_column if @money_column.positive? else - @money_column = MoneyColumn.new( columns[money_column_indices[0]], - @options ) - @money_column.merge!( - MoneyColumn.new( columns[money_column_indices[1]], @options ) ) + @money_column.merge! MoneyColumn.new(columns[money_column_indices[1]], @options) end self.description_column_indices = results.map { |i| i[:index] } @@ -228,21 +224,25 @@ def columns end end - def parse - data = options[:string] || File.read(options[:file]) - - if RUBY_VERSION =~ /^1\.9/ || RUBY_VERSION =~ /^2/ - data = data.force_encoding(options[:encoding] || 'BINARY').encode('UTF-8', :invalid => :replace, :undef => :replace, :replace => '?') - csv_engine = CSV - else - csv_engine = FasterCSV + def parse(data) + # Use force_encoding to convert the string to utf-8 with as few invalid characters + # as possible. + data.force_encoding(try_encoding(data)) + data = data.encode('UTF-8', invalid: :replace, undef: :replace, replace: '?') + data.sub!("\xEF\xBB\xBF", '') # strip byte order marker, if it exists + + rows = [] + data.each_line.with_index do |line, i| + next if i < (options[:contains_header] || 0) + rows << CSV.parse_line(line, col_sep: options[:csv_separator] || ',') end - @csv_data = csv_engine.parse data.strip, :col_sep => options[:csv_separator] || ',' - if options[:contains_header] - options[:contains_header].times { csv_data.shift } - end - csv_data + rows + end + + def try_encoding(data) + cd = CharDet.detect(data) + options[:encoding] || cd['encoding'] || 'BINARY' end @settings = { :testing => false } diff --git a/reckon.gemspec b/reckon.gemspec index ddc6af4..74b8bf5 100644 --- a/reckon.gemspec +++ b/reckon.gemspec @@ -1,4 +1,3 @@ -# -*- encoding: utf-8 -*- $:.push File.expand_path("../lib", __FILE__) Gem::Specification.new do |s| @@ -16,9 +15,9 @@ Gem::Specification.new do |s| s.require_paths = ["lib"] s.add_development_dependency "rspec", ">= 1.2.9" - s.add_runtime_dependency "fastercsv", ">= 1.5.1" + s.add_development_dependency "pry", ">= 0.12.2" s.add_runtime_dependency "chronic", ">= 0.3.0" s.add_runtime_dependency "highline", ">= 1.5.2" s.add_runtime_dependency "terminal-table", ">= 1.4.2" + s.add_runtime_dependency "rchardet", ">= 1.8.0" end - diff --git a/spec/data_fixtures/bom_utf8_file.csv b/spec/data_fixtures/bom_utf8_file.csv new file mode 100644 index 0000000..5ad16fc --- /dev/null +++ b/spec/data_fixtures/bom_utf8_file.csv @@ -0,0 +1 @@ +"Date","Time","TimeZone","Name","Type","Status","Currency","Gross","Fee","Net","From Email Address","To Email Address","Transaction ID","Shipping Address","Address Status","Item Title","Item ID","Shipping and Handling Amount","Insurance Amount","Sales Tax","Option 1 Name","Option 1 Value","Option 2 Name","Option 2 Value","Reference Txn ID","Invoice Number","Custom Number","Quantity","Receipt ID","Balance","Address Line 1","Address Line 2/District/Neighborhood","Town/City","State/Province/Region/County/Territory/Prefecture/Republic","Zip/Postal Code","Country","Contact Phone Number","Subject","Note","Country Code","Balance Impact" diff --git a/spec/reckon/app_spec.rb b/spec/reckon/app_spec.rb index acb05dc..f39b674 100644 --- a/spec/reckon/app_spec.rb +++ b/spec/reckon/app_spec.rb @@ -1,5 +1,4 @@ #!/usr/bin/env ruby -# encoding: utf-8 require "spec_helper" require 'rubygems' @@ -8,10 +7,10 @@ describe Reckon::App do context 'with chase csv input' do before do - @chase = Reckon::App.new(:string => BANK_CSV) - @chase.learn_from( BANK_LEDGER ) + @chase = Reckon::App.new(string: BANK_CSV) + @chase.learn_from(BANK_LEDGER) @rows = [] - @chase.each_row_backwards { |row| @rows.push( row ) } + @chase.each_row_backwards { |row| @rows.push(row) } end describe "each_row_backwards" do @@ -27,7 +26,11 @@ describe "weighted_account_match" do it "should guess the correct account" do - @chase.weighted_account_match( @rows[7] ).first[:account].should == "Expenses:Books" + row = @rows.find { |n| n[:description] =~ /Book Store/ } + + result = @chase.weighted_account_match(row).first + result[:account].should == "Expenses:Books" + result[:cosine].should > 0.0 end end end @@ -95,6 +98,5 @@ 2004/05/27 Book Store Expenses:Books $20.00 Liabilities:MasterCard - LEDGER - +LEDGER end diff --git a/spec/reckon/csv_parser_spec.rb b/spec/reckon/csv_parser_spec.rb index c623e81..319a541 100755 --- a/spec/reckon/csv_parser_spec.rb +++ b/spec/reckon/csv_parser_spec.rb @@ -1,9 +1,9 @@ #!/usr/bin/env ruby -# encoding: utf-8 +# coding: utf-8 -require "spec_helper" +require_relative "../spec_helper" require 'rubygems' -require 'reckon' +require_relative '../../lib/reckon' Reckon::CSVParser.settings[:testing] = true @@ -33,15 +33,48 @@ end describe "parse" do + it "should use binary encoding if none specified and chardet fails" do + allow(CharDet).to receive(:detect).and_return({'encoding' => nil}) + app = Reckon::CSVParser.new(:file => File.expand_path(File.join(File.dirname(__FILE__), "..", "data_fixtures", "extratofake.csv"))) + expect(app.try_encoding("foobarbaz")).to eq("BINARY") + end it "should work with foreign character encodings" do app = Reckon::CSVParser.new(:file => File.expand_path(File.join(File.dirname(__FILE__), "..", "data_fixtures", "extratofake.csv"))) app.columns[0][0..2].should == ["Data", "10/31/2012", "11/01/2012"] - app.columns[2].first.should == "Hist?rico" + app.columns[2].first.should == "Histórico" end it "should work with other separators" do Reckon::CSVParser.new(:string => "one;two\nthree;four", :csv_separator => ';').columns.should == [['one', 'three'], ['two', 'four']] end + + it 'should parse quoted lines' do + file = %q("30.03.2015";"29.03.2015";"09.04.2015";"BARAUSZAHLUNGSENTGELT";"5266 xxxx xxxx 9454";"";"0";"EUR";"0,00";"EUR";"-3,50";"0") + Reckon::CSVParser.new(string: file, csv_separator: ';', comma_separates_cents: true).columns.length.should == 12 + end + + it 'should parse csv with BOM' do + file = File.expand_path(File.join(File.dirname(__FILE__), "..", "data_fixtures", "bom_utf8_file.csv")) + Reckon::CSVParser.new(file: file).columns.length.should == 41 + end + + describe 'file with invalid csv in header' do + file = %q( + +="0234500012345678";21/11/2015;19/02/2016;36;19/02/2016;1234,37 EUR + +Date de l'opération;Libellé;Détail de l'écriture;Montant de l'opération;Devise +19/02/2016;VIR RECU 508160;VIR RECU 1234567834S DE: Francois REF: 123457891234567894561231 PROVENANCE: DE Allemagne ;50,00;EUR +18/02/2016;COTISATION JAZZ;COTISATION JAZZ ;-8,10;EUR +) + it 'should ignore invalid header lines' do + Reckon::CSVParser.new(string: file, contains_header: 4) + end + + it 'should fail' do + expect { Reckon::CSVParser.new(string: file, contains_header: 1) }.to raise_error(CSV::MalformedCSVError) + end + end end describe "columns" do