Skip to content

Commit

Permalink
Initial implementation of data frame
Browse files Browse the repository at this point in the history
Contributes to #156
  • Loading branch information
andymeneely committed Oct 28, 2016
1 parent f94f710 commit 7730bfd
Show file tree
Hide file tree
Showing 4 changed files with 269 additions and 23 deletions.
11 changes: 6 additions & 5 deletions lib/squib/api/data.rb
Expand Up @@ -3,6 +3,7 @@
require_relative '../args/input_file'
require_relative '../args/import'
require_relative '../args/csv_opts'
require_relative '../import/data_frame'

module Squib

Expand All @@ -12,7 +13,7 @@ def xlsx(opts = {})
import = Args::Import.new.load!(opts)
s = Roo::Excelx.new(input.file[0])
s.default_sheet = s.sheets[input.sheet[0]]
data = {}
data = Squib::DataFrame.new
s.first_column.upto(s.last_column) do |col|
header = s.cell(s.first_row, col).to_s
header.strip! if import.strip?
Expand All @@ -39,14 +40,14 @@ def csv(opts = {})
csv_opts = Args::CSV_Opts.new(opts)
table = CSV.parse(data, csv_opts.to_hash)
check_duplicate_csv_headers(table)
hash = Hash.new
hash = Squib::DataFrame.new
table.headers.each do |header|
new_header = header.to_s
new_header = new_header.strip if import.strip?
hash[new_header] ||= table[header]
end
if import.strip?
new_hash = Hash.new
new_hash = Squib::DataFrame.new
hash.each do |header, col|
new_hash[header] = col.map do |str|
str = str.strip if str.respond_to?(:strip)
Expand Down Expand Up @@ -78,9 +79,9 @@ def check_duplicate_csv_headers(table)

# @api private
def explode_quantities(data, qty)
return data unless data.key? qty.to_s.strip
return data unless data.col? qty.to_s.strip
qtys = data[qty]
new_data = {}
new_data = Squib::DataFrame.new
data.each do |col, arr|
new_data[col] = []
qtys.each_with_index do |qty, index|
Expand Down
78 changes: 78 additions & 0 deletions lib/squib/import/data_frame.rb
@@ -0,0 +1,78 @@
require 'json'
require 'forwardable'

module Squib
class DataFrame
include Enumerable

def initialize(hash = {}, def_columns = true)
@hash = hash
columns.each { |col| def_column(col) } if def_columns
end

def def_column(col)
raise "Column #{col} - does not exist" unless @hash.key? col
method_name = snake_case(col)
return if self.class.method_defined?(method_name) #warn people? or skip?
define_singleton_method method_name do
@hash[col]
end
end

def each(&block)
@hash.each(&block)
end

def [](i)
@hash[i]
end

def []=(i, v)
@hash[i] = v
end

def columns
@hash.keys
end

def ncolumns
@hash.keys.size
end

def col?(col)
@hash.key? col
end

def row(i)
@hash.inject(Hash.new) { |ret, (name, arr)| ret[name] = arr[i]; ret }
end

def nrows
@hash.inject(0) { |max, (_n, col)| col.size > max ? col.size : max }
end

def to_json
@hash.to_json
end

def to_pretty_json
JSON.pretty_generate(@hash)
end

def to_h
@hash
end

private

def snake_case(str)
str.strip.
gsub(/\s+/,'_').
gsub(/([A-Z]+)([A-Z][a-z])/,'\1_\2').
gsub(/([a-z]+)([A-Z])/,'\1_\2').
downcase.
to_sym
end

end
end
36 changes: 18 additions & 18 deletions spec/api/api_data_spec.rb
Expand Up @@ -3,7 +3,7 @@
describe Squib::Deck do
context '#csv' do
it 'loads basic csv data' do
expect(Squib.csv(file: csv_file('basic.csv'))).to eq({
expect(Squib.csv(file: csv_file('basic.csv')).to_h.to_h).to eq({
'h1' => [1, 3],
'h2' => [2, 4]
})
Expand All @@ -12,7 +12,7 @@
it 'collapses duplicate columns and warns' do
expect(Squib.logger).to receive(:warn)
.with('CSV duplicated the following column keys: h1,h1')
expect(Squib.csv(file: csv_file('dup_cols.csv'))).to eq({
expect(Squib.csv(file: csv_file('dup_cols.csv')).to_h.to_h).to eq({
'h1' => [1, 3],
'h2' => [5, 7],
'H2' => [6, 8],
Expand All @@ -21,46 +21,46 @@
end

it 'strips spaces by default' do
expect(Squib.csv(file: csv_file('with_spaces.csv'))).to eq({
expect(Squib.csv(file: csv_file('with_spaces.csv')).to_h).to eq({
'With Spaces' => ['a b c', 3],
'h2' => [2, 4],
'h3' => [3, nil]
})
end

it 'skips space stripping if told to' do
expect(Squib.csv(strip: false, file: csv_file('with_spaces.csv'))).to eq({
expect(Squib.csv(strip: false, file: csv_file('with_spaces.csv')).to_h).to eq({
' With Spaces ' => ['a b c ', 3],
'h2' => [2, 4],
'h3' => [3, nil]
})
end

it 'explodes quantities' do
expect(Squib.csv(file: csv_file('qty.csv'))).to eq({
expect(Squib.csv(file: csv_file('qty.csv')).to_h).to eq({
'Name' => %w(Ha Ha Ha Ho),
'Qty' => [3, 3, 3, 1],
})
end

it 'explodes quantities on specified header' do
expect(Squib.csv(explode: 'Quantity', file: csv_file('qty_named.csv'))).to eq({
expect(Squib.csv(explode: 'Quantity', file: csv_file('qty_named.csv')).to_h).to eq({
'Name' => %w(Ha Ha Ha Ho),
'Quantity' => [3, 3, 3, 1],
})
end

it 'loads inline data' do
hash = Squib.csv(data: "h1,h2\n1,2\n3,4")
expect(hash).to eq({
expect(hash.to_h).to eq({
'h1' => [1, 3],
'h2' => [2, 4]
})
end

it 'loads csv with newlines' do
hash = Squib.csv(file: csv_file('newline.csv'))
expect(hash).to eq({
expect(hash.to_h).to eq({
'title' => ['Foo'],
'level' => [1],
'notes' => ["a\nb"]
Expand All @@ -70,7 +70,7 @@
it 'loads custom CSV options' do
hash = Squib.csv(file: csv_file('custom_opts.csv'),
col_sep: '-', quote_char: '|')
expect(hash).to eq({
expect(hash.to_h).to eq({
'x' => ['p'],
'y' => ['q-r']
})
Expand All @@ -85,7 +85,7 @@
'ha'
end
end
expect(data).to eq({
expect(data.to_h).to eq({
'h1' => [2, 6],
'h2' => %w(ha ha),
})
Expand All @@ -99,7 +99,7 @@
value
end
end
expect(data).to eq({
expect(data.to_h).to eq({
'a' => ["foo\nbar", 1],
'b' => [1, "blah\n"],
})
Expand All @@ -109,36 +109,36 @@

context '#xlsx' do
it 'loads basic xlsx data' do
expect(Squib.xlsx(file: xlsx_file('basic.xlsx'))).to eq({
expect(Squib.xlsx(file: xlsx_file('basic.xlsx')).to_h).to eq({
'Name' => %w(Larry Curly Mo),
'General Number' => %w(1 2 3), # general types always get loaded as strings with no conversion
'Actual Number' => [4.0, 5.0, 6.0], # numbers get auto-converted to integers
})
end

it 'loads xlsx with formulas' do
expect(Squib.xlsx(file: xlsx_file('formulas.xlsx'))).to eq({
expect(Squib.xlsx(file: xlsx_file('formulas.xlsx')).to_h).to eq({
'A' => %w(1 2),
'B' => %w(3 4),
'Sum' => %w(4 6),
})
end

it 'loads xlsm files with macros' do
expect(Squib.xlsx(file: xlsx_file('with_macros.xlsm'))).to eq({
expect(Squib.xlsx(file: xlsx_file('with_macros.xlsm')).to_h).to eq({
'foo' => %w(8 10),
'bar' => %w(9 11),
})
end

it 'strips whitespace by default' do
expect(Squib.xlsx(file: xlsx_file('whitespace.xlsx'))).to eq({
expect(Squib.xlsx(file: xlsx_file('whitespace.xlsx')).to_h).to eq({
'With Whitespace' => ['foo', 'bar', 'baz'],
})
end

it 'does not strip whitespace when specified' do
expect(Squib.xlsx(file: xlsx_file('whitespace.xlsx'), strip: false)).to eq({
expect(Squib.xlsx(file: xlsx_file('whitespace.xlsx'), strip: false).to_h).to eq({
' With Whitespace ' => ['foo ', ' bar', ' baz '],
})
end
Expand All @@ -154,15 +154,15 @@
'ha'
end
end
expect(data).to eq({
expect(data.to_h).to eq({
'Name' => %w(he he he),
'General Number' => %w(ha ha ha),
'Actual Number' => [8.0, 10.0, 12.0],
})
end

it 'explodes quantities' do
expect(Squib.xlsx(explode: 'Qty', file: xlsx_file('explode_quantities.xlsx'))).to eq({
expect(Squib.xlsx(explode: 'Qty', file: xlsx_file('explode_quantities.xlsx')).to_h).to eq({
'Name' => ['Zergling', 'Zergling', 'Zergling', 'High Templar'],
'Qty' => %w(3 3 3 1),
})
Expand Down

0 comments on commit 7730bfd

Please sign in to comment.