From 7730bfdc56e7daf80644ddfd0db731ad77d5ff55 Mon Sep 17 00:00:00 2001 From: Andy Meneely Date: Fri, 28 Oct 2016 11:57:36 -0400 Subject: [PATCH] Initial implementation of data frame Contributes to #156 --- lib/squib/api/data.rb | 11 ++- lib/squib/import/data_frame.rb | 78 +++++++++++++++ spec/api/api_data_spec.rb | 36 +++---- spec/import/data_frame_spec.rb | 167 +++++++++++++++++++++++++++++++++ 4 files changed, 269 insertions(+), 23 deletions(-) create mode 100644 lib/squib/import/data_frame.rb create mode 100644 spec/import/data_frame_spec.rb diff --git a/lib/squib/api/data.rb b/lib/squib/api/data.rb index fe00ff3b..c26b744a 100644 --- a/lib/squib/api/data.rb +++ b/lib/squib/api/data.rb @@ -3,6 +3,7 @@ require_relative '../args/input_file' require_relative '../args/import' require_relative '../args/csv_opts' +require_relative '../import/data_frame' module Squib @@ -12,7 +13,7 @@ def xlsx(opts = {}) import = Args::Import.new.load!(opts) s = Roo::Excelx.new(input.file[0]) s.default_sheet = s.sheets[input.sheet[0]] - data = {} + data = Squib::DataFrame.new s.first_column.upto(s.last_column) do |col| header = s.cell(s.first_row, col).to_s header.strip! if import.strip? @@ -39,14 +40,14 @@ def csv(opts = {}) csv_opts = Args::CSV_Opts.new(opts) table = CSV.parse(data, csv_opts.to_hash) check_duplicate_csv_headers(table) - hash = Hash.new + hash = Squib::DataFrame.new table.headers.each do |header| new_header = header.to_s new_header = new_header.strip if import.strip? hash[new_header] ||= table[header] end if import.strip? - new_hash = Hash.new + new_hash = Squib::DataFrame.new hash.each do |header, col| new_hash[header] = col.map do |str| str = str.strip if str.respond_to?(:strip) @@ -78,9 +79,9 @@ def check_duplicate_csv_headers(table) # @api private def explode_quantities(data, qty) - return data unless data.key? qty.to_s.strip + return data unless data.col? qty.to_s.strip qtys = data[qty] - new_data = {} + new_data = Squib::DataFrame.new data.each do |col, arr| new_data[col] = [] qtys.each_with_index do |qty, index| diff --git a/lib/squib/import/data_frame.rb b/lib/squib/import/data_frame.rb new file mode 100644 index 00000000..19056c16 --- /dev/null +++ b/lib/squib/import/data_frame.rb @@ -0,0 +1,78 @@ +require 'json' +require 'forwardable' + +module Squib + class DataFrame + include Enumerable + + def initialize(hash = {}, def_columns = true) + @hash = hash + columns.each { |col| def_column(col) } if def_columns + end + + def def_column(col) + raise "Column #{col} - does not exist" unless @hash.key? col + method_name = snake_case(col) + return if self.class.method_defined?(method_name) #warn people? or skip? + define_singleton_method method_name do + @hash[col] + end + end + + def each(&block) + @hash.each(&block) + end + + def [](i) + @hash[i] + end + + def []=(i, v) + @hash[i] = v + end + + def columns + @hash.keys + end + + def ncolumns + @hash.keys.size + end + + def col?(col) + @hash.key? col + end + + def row(i) + @hash.inject(Hash.new) { |ret, (name, arr)| ret[name] = arr[i]; ret } + end + + def nrows + @hash.inject(0) { |max, (_n, col)| col.size > max ? col.size : max } + end + + def to_json + @hash.to_json + end + + def to_pretty_json + JSON.pretty_generate(@hash) + end + + def to_h + @hash + end + + private + + def snake_case(str) + str.strip. + gsub(/\s+/,'_'). + gsub(/([A-Z]+)([A-Z][a-z])/,'\1_\2'). + gsub(/([a-z]+)([A-Z])/,'\1_\2'). + downcase. + to_sym + end + + end +end diff --git a/spec/api/api_data_spec.rb b/spec/api/api_data_spec.rb index 15ab0d96..4993eb95 100644 --- a/spec/api/api_data_spec.rb +++ b/spec/api/api_data_spec.rb @@ -3,7 +3,7 @@ describe Squib::Deck do context '#csv' do it 'loads basic csv data' do - expect(Squib.csv(file: csv_file('basic.csv'))).to eq({ + expect(Squib.csv(file: csv_file('basic.csv')).to_h.to_h).to eq({ 'h1' => [1, 3], 'h2' => [2, 4] }) @@ -12,7 +12,7 @@ it 'collapses duplicate columns and warns' do expect(Squib.logger).to receive(:warn) .with('CSV duplicated the following column keys: h1,h1') - expect(Squib.csv(file: csv_file('dup_cols.csv'))).to eq({ + expect(Squib.csv(file: csv_file('dup_cols.csv')).to_h.to_h).to eq({ 'h1' => [1, 3], 'h2' => [5, 7], 'H2' => [6, 8], @@ -21,7 +21,7 @@ end it 'strips spaces by default' do - expect(Squib.csv(file: csv_file('with_spaces.csv'))).to eq({ + expect(Squib.csv(file: csv_file('with_spaces.csv')).to_h).to eq({ 'With Spaces' => ['a b c', 3], 'h2' => [2, 4], 'h3' => [3, nil] @@ -29,7 +29,7 @@ end it 'skips space stripping if told to' do - expect(Squib.csv(strip: false, file: csv_file('with_spaces.csv'))).to eq({ + expect(Squib.csv(strip: false, file: csv_file('with_spaces.csv')).to_h).to eq({ ' With Spaces ' => ['a b c ', 3], 'h2' => [2, 4], 'h3' => [3, nil] @@ -37,14 +37,14 @@ end it 'explodes quantities' do - expect(Squib.csv(file: csv_file('qty.csv'))).to eq({ + expect(Squib.csv(file: csv_file('qty.csv')).to_h).to eq({ 'Name' => %w(Ha Ha Ha Ho), 'Qty' => [3, 3, 3, 1], }) end it 'explodes quantities on specified header' do - expect(Squib.csv(explode: 'Quantity', file: csv_file('qty_named.csv'))).to eq({ + expect(Squib.csv(explode: 'Quantity', file: csv_file('qty_named.csv')).to_h).to eq({ 'Name' => %w(Ha Ha Ha Ho), 'Quantity' => [3, 3, 3, 1], }) @@ -52,7 +52,7 @@ it 'loads inline data' do hash = Squib.csv(data: "h1,h2\n1,2\n3,4") - expect(hash).to eq({ + expect(hash.to_h).to eq({ 'h1' => [1, 3], 'h2' => [2, 4] }) @@ -60,7 +60,7 @@ it 'loads csv with newlines' do hash = Squib.csv(file: csv_file('newline.csv')) - expect(hash).to eq({ + expect(hash.to_h).to eq({ 'title' => ['Foo'], 'level' => [1], 'notes' => ["a\nb"] @@ -70,7 +70,7 @@ it 'loads custom CSV options' do hash = Squib.csv(file: csv_file('custom_opts.csv'), col_sep: '-', quote_char: '|') - expect(hash).to eq({ + expect(hash.to_h).to eq({ 'x' => ['p'], 'y' => ['q-r'] }) @@ -85,7 +85,7 @@ 'ha' end end - expect(data).to eq({ + expect(data.to_h).to eq({ 'h1' => [2, 6], 'h2' => %w(ha ha), }) @@ -99,7 +99,7 @@ value end end - expect(data).to eq({ + expect(data.to_h).to eq({ 'a' => ["foo\nbar", 1], 'b' => [1, "blah\n"], }) @@ -109,7 +109,7 @@ context '#xlsx' do it 'loads basic xlsx data' do - expect(Squib.xlsx(file: xlsx_file('basic.xlsx'))).to eq({ + expect(Squib.xlsx(file: xlsx_file('basic.xlsx')).to_h).to eq({ 'Name' => %w(Larry Curly Mo), 'General Number' => %w(1 2 3), # general types always get loaded as strings with no conversion 'Actual Number' => [4.0, 5.0, 6.0], # numbers get auto-converted to integers @@ -117,7 +117,7 @@ end it 'loads xlsx with formulas' do - expect(Squib.xlsx(file: xlsx_file('formulas.xlsx'))).to eq({ + expect(Squib.xlsx(file: xlsx_file('formulas.xlsx')).to_h).to eq({ 'A' => %w(1 2), 'B' => %w(3 4), 'Sum' => %w(4 6), @@ -125,20 +125,20 @@ end it 'loads xlsm files with macros' do - expect(Squib.xlsx(file: xlsx_file('with_macros.xlsm'))).to eq({ + expect(Squib.xlsx(file: xlsx_file('with_macros.xlsm')).to_h).to eq({ 'foo' => %w(8 10), 'bar' => %w(9 11), }) end it 'strips whitespace by default' do - expect(Squib.xlsx(file: xlsx_file('whitespace.xlsx'))).to eq({ + expect(Squib.xlsx(file: xlsx_file('whitespace.xlsx')).to_h).to eq({ 'With Whitespace' => ['foo', 'bar', 'baz'], }) end it 'does not strip whitespace when specified' do - expect(Squib.xlsx(file: xlsx_file('whitespace.xlsx'), strip: false)).to eq({ + expect(Squib.xlsx(file: xlsx_file('whitespace.xlsx'), strip: false).to_h).to eq({ ' With Whitespace ' => ['foo ', ' bar', ' baz '], }) end @@ -154,7 +154,7 @@ 'ha' end end - expect(data).to eq({ + expect(data.to_h).to eq({ 'Name' => %w(he he he), 'General Number' => %w(ha ha ha), 'Actual Number' => [8.0, 10.0, 12.0], @@ -162,7 +162,7 @@ end it 'explodes quantities' do - expect(Squib.xlsx(explode: 'Qty', file: xlsx_file('explode_quantities.xlsx'))).to eq({ + expect(Squib.xlsx(explode: 'Qty', file: xlsx_file('explode_quantities.xlsx')).to_h).to eq({ 'Name' => ['Zergling', 'Zergling', 'Zergling', 'High Templar'], 'Qty' => %w(3 3 3 1), }) diff --git a/spec/import/data_frame_spec.rb b/spec/import/data_frame_spec.rb new file mode 100644 index 00000000..bb14f896 --- /dev/null +++ b/spec/import/data_frame_spec.rb @@ -0,0 +1,167 @@ +require 'spec_helper' +require 'squib/import/data_frame' + +describe Squib::DataFrame do + let(:basic) do + { + 'Name' => ['Mage', 'Rogue', 'Warrior'], + 'Cost' => [1, 2, 3], + } + end + + let(:uneven) do + { + 'Name' => ['Mage', 'Rogue', 'Warrior'], + 'Cost' => [1, 2], + } + end + + let(:whitespace) do + { + 'Name \n\r\t' => [' Mage \t\r\n'], + } + end + + let(:defined) do + { + 'nrows' => [1,2,3], + } + end + + context 'is Enumerable and like a hash, so it' do + it 'responds to each' do + expect(subject).to respond_to(:each) + end + + it 'responds to any?' do + expect(subject.any?).to be false + end + + it 'responds to []' do + data = Squib::DataFrame.new basic + expect(data['Cost']).to eq([1, 2, 3]) + end + + it 'responds to []=' do + data = Squib::DataFrame.new basic + data[:a] = 2 + expect(data[:a]).to eq(2) + end + end + + context :columns do + it 'provides a list of columns' do + data = Squib::DataFrame.new basic + expect(data.columns).to eq %w(Name Cost) + end + end + + context :ncolumns do + it 'provides column count' do + data = Squib::DataFrame.new basic + expect(data.ncolumns).to eq 2 + end + end + + context :row do + it 'returns a hash of each row' do + data = Squib::DataFrame.new basic + expect(data.row(0)).to eq ({'Name' => 'Mage', 'Cost' => 1}) + expect(data.row(1)).to eq ({'Name' => 'Rogue', 'Cost' => 2}) + end + + it 'returns nil for uneven data' do + data = Squib::DataFrame.new uneven + expect(data.row(2)).to eq ({'Name' => 'Warrior', 'Cost' => nil}) + end + end + + context :nrows do + it 'returns a row count on even data' do + data = Squib::DataFrame.new basic + expect(data.nrows).to eq 3 + end + + it 'returns largest row count on uneven data' do + data = Squib::DataFrame.new basic + expect(data.nrows).to eq 3 + end + end + + context :json do + it 'returns quoty json' do + data = Squib::DataFrame.new basic + expect(data.to_json).to eq "{\"Name\":[\"Mage\",\"Rogue\",\"Warrior\"],\"Cost\":[1,2,3]}" + end + + it 'returns pretty json' do + data = Squib::DataFrame.new basic + str = <<-EOS +{ + "Name": [ + "Mage", + "Rogue", + "Warrior" + ], + "Cost": [ + 1, + 2, + 3 + ] +} +EOS + expect(data.to_pretty_json).to eq str.chomp + end + end + + context :def_column do + it 'creates name for Name column' do + data = Squib::DataFrame.new basic + expect(data).to respond_to(:name) + expect(data.name).to eq %w(Mage Rogue Warrior) + expect(data.cost).to eq [1, 2, 3] + end + + it 'does not redefine methods' do + data = Squib::DataFrame.new defined + expect(data).to respond_to(:nrows) + expect(data.nrows).to eq 3 + end + end + + context :snake_case do + subject { Squib::DataFrame.new } + it 'strips leading & trailing whitespace' do + expect(subject.send(:snake_case, ' A ')).to eq :a + end + + it 'converts space to _' do + expect(subject.send(:snake_case, 'A b')).to eq :a_b + end + + it 'handles multiwhitespace' do + expect(subject.send(:snake_case, 'A b')).to eq :a_b + end + + it 'handles camelcase' do + expect(subject.send(:snake_case, 'fooBar')).to eq :foo_bar + end + + it 'handles camelcase with multiple capitals' do + expect(subject.send(:snake_case, 'FOOBar')).to eq :foo_bar + end + end + + context :col? do + it 'returns true if a column exists' do + data = Squib::DataFrame.new basic + expect(data.col? 'Name').to be true + end + + it 'returns false if a column does not exist' do + data = Squib::DataFrame.new basic + expect(data.col? 'ROUS').to be false + end + end + +end